Java 类com.amazonaws.services.elasticmapreduce.util.StepFactory 实例源码

项目：herd 文件：EmrPigStepHelper.java

@Override
public StepConfig getEmrStepConfig(Object step)
{
    EmrPigStep pigStep = (EmrPigStep) step;

    // Default ActionOnFailure is to cancel the execution and wait
    ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;

    if (pigStep.isContinueOnError() != null && pigStep.isContinueOnError())
    {
        // Override based on user input
        actionOnFailure = ActionOnFailure.CONTINUE;
    }

    // If there are no arguments to hive script
    if (CollectionUtils.isEmpty(pigStep.getScriptArguments()))
    {
        // Just build the StepConfig object and return
        return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure)
            .withHadoopJarStep(new StepFactory().newRunPigScriptStep(pigStep.getScriptLocation().trim()));
    }
    // If there are arguments specified
    else
    {
        return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep(new StepFactory()
            .newRunPigScriptStep(pigStep.getScriptLocation().trim(),
                pigStep.getScriptArguments().toArray(new String[pigStep.getScriptArguments().size()])));
    }
}

项目：herd 文件：EmrHiveStepHelper.java

@Override
public StepConfig getEmrStepConfig(Object step)
{
    EmrHiveStep emrHiveStep = (EmrHiveStep) step;

    // Default ActionOnFailure is to cancel the execution and wait
    ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;

    if (emrHiveStep.isContinueOnError() != null && emrHiveStep.isContinueOnError())
    {
        // Override based on user input
        actionOnFailure = ActionOnFailure.CONTINUE;
    }

    // If there are no arguments to hive script
    if (CollectionUtils.isEmpty(emrHiveStep.getScriptArguments()))
    {
        // Just build the StepConfig object and return
        return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure)
            .withHadoopJarStep(new StepFactory().newRunHiveScriptStep(emrHiveStep.getScriptLocation().trim()));
    }
    // If there are arguments specified
    else
    {
        // For each argument, add "-d" option
        List<String> hiveArgs = new ArrayList<>();
        for (String hiveArg : emrHiveStep.getScriptArguments())
        {
            hiveArgs.add("-d");
            hiveArgs.add(hiveArg);
        }
        // Return the StepConfig object
        return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep(
            new StepFactory().newRunHiveScriptStep(emrHiveStep.getScriptLocation().trim(), hiveArgs.toArray(new String[hiveArgs.size()])));
    }
}

项目：herd 文件：EmrDaoImpl.java

/**
 * Create the step config list of objects for hive/pig installation.
 *
 * @param emrClusterDefinition the EMR definition name value.
 *
 * @return list of step configuration that contains all the steps for the given configuration.
 */
private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition)
{
    StepFactory stepFactory = new StepFactory();
    List<StepConfig> appSteps = new ArrayList<>();

    // Create install hive step and add to the StepConfig list
    if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion()))
    {
        StepConfig installHive =
            new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion()).withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion()));
        appSteps.add(installHive);
    }

    // Create install Pig step and add to the StepConfig List
    if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion()))
    {
        StepConfig installPig =
            new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion()).withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion()));
        appSteps.add(installPig);
    }

    // Add the hadoop jar steps that need to be added.
    if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps()))
    {
        for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps())
        {
            StepConfig stepConfig = emrHelper
                .getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(), hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(),
                    hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError());

            appSteps.add(stepConfig);
        }
    }

    return appSteps;
}

项目：aws-big-data-blog 文件：LambdaContainer.java

protected String fireEMRJob(String paramsStr,String clusterId){
    StepFactory stepFactory = new StepFactory();
    AmazonElasticMapReduceClient emr = new AmazonElasticMapReduceClient();
    emr.setRegion(Region.getRegion(Regions.fromName(System.getenv().get("AWS_REGION"))));
    Application sparkConfig = new Application()
            .withName("Spark");

    String[] params = paramsStr.split(",");
    StepConfig enabledebugging = new StepConfig()
            .withName("Enable debugging")
            .withActionOnFailure("TERMINATE_JOB_FLOW")
            .withHadoopJarStep(stepFactory.newEnableDebuggingStep());

    HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig()
            .withJar("command-runner.jar")
            .withArgs(params);  

    final StepConfig sparkStep = new StepConfig()
            .withName("Spark Step")
            .withActionOnFailure("CONTINUE")
            .withHadoopJarStep(sparkStepConf);


    AddJobFlowStepsRequest request = new AddJobFlowStepsRequest(clusterId)
            .withSteps(new ArrayList<StepConfig>(){{add(sparkStep);}});


    AddJobFlowStepsResult result = emr.addJobFlowSteps(request);
    return result.getStepIds().get(0);
}

项目：digdag 文件：EmrOperatorFactory.java

private StepFactory stepFactory()
{
    // TODO: configure region
    return new StepFactory();
}

项目：aws-big-data-blog 文件：EMRUtils.java

/**
 * This method uses method the AWS Java to launch an Apache HBase cluster on Amazon EMR. 
 * 
 * @param client - AmazonElasticMapReduce client that interfaces directly with the Amazon EMR Web Service
 * @param clusterIdentifier - identifier of an existing cluster
 * @param amiVersion - AMI to use for launching this cluster
 * @param keypair - A keypair for SSHing into the Amazon EMR master node
 * @param masterInstanceType - Master node Amazon EC2 instance type 
 * @param coreInstanceType - core nodes Amazon EC2 instance type 
 * @param logUri - An Amazon S3 bucket for your 
 * @param numberOfNodes - total number of nodes in this cluster including master node
 * @return
 */
public static String createCluster(AmazonElasticMapReduce client,
        String clusterIdentifier,
        String amiVersion,
        String keypair,
        String masterInstanceType,
        String coreInstanceType,
        String logUri,
        int numberOfNodes) {

    if (clusterExists(client, clusterIdentifier)) {
        LOG.info("Cluster " + clusterIdentifier + " is available");
        return clusterIdentifier;
    }

    //Error checking
    if (amiVersion == null || amiVersion.isEmpty()) throw new RuntimeException("ERROR: Please specify an AMI Version");
    if (keypair == null || keypair.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon Key Pair");
    if (masterInstanceType == null || masterInstanceType.isEmpty()) throw new RuntimeException("ERROR: Please specify a Master Instance Type");
    if (logUri == null || logUri.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon S3 bucket for your logs.");
    if (numberOfNodes < 0) throw new RuntimeException("ERROR: Please specify at least 1 node");

      RunJobFlowRequest request = new RunJobFlowRequest()
        .withAmiVersion(amiVersion)
        .withBootstrapActions(new BootstrapActionConfig()
                     .withName("Install HBase")
                     .withScriptBootstrapAction(new ScriptBootstrapActionConfig()
                     .withPath("s3://elasticmapreduce/bootstrap-actions/setup-hbase")))
        .withName("Job Flow With HBAse Actions")     
        .withSteps(new StepConfig() //enable debugging step
                    .withName("Enable debugging")
                    .withActionOnFailure("TERMINATE_CLUSTER")
                    .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()), 
                    //Start HBase step - after installing it with a bootstrap action
                    createStepConfig("Start HBase","TERMINATE_CLUSTER", "/home/hadoop/lib/hbase.jar", getHBaseArgs()), 
                    //add HBase backup step
                    createStepConfig("Modify backup schedule","TERMINATE_JOB_FLOW", "/home/hadoop/lib/hbase.jar", getHBaseBackupArgs()))
        .withLogUri(logUri)
        .withInstances(new JobFlowInstancesConfig()
        .withEc2KeyName(keypair)
        .withInstanceCount(numberOfNodes)
        .withKeepJobFlowAliveWhenNoSteps(true)
        .withMasterInstanceType(masterInstanceType)
        .withSlaveInstanceType(coreInstanceType));

    RunJobFlowResult result = client.runJobFlow(request);

    String state = null;
    while (!(state = clusterState(client, result.getJobFlowId())).equalsIgnoreCase("waiting")) {
        try {
            Thread.sleep(10 * 1000);
            LOG.info(result.getJobFlowId() + " is " + state + ". Waiting for cluster to become available.");
        } catch (InterruptedException e) {

        }

        if (state.equalsIgnoreCase("TERMINATED_WITH_ERRORS")){
            LOG.error("Could not create EMR Cluster");
            System.exit(-1);    
        }
    }
    LOG.info("Created cluster " + result.getJobFlowId());
    LOG.info("Cluster " + clusterIdentifier + " is available"); 
    return result.getJobFlowId();
}

项目：eoulsan 文件：AWSElasticMapReduceJob.java

void init() {

    requireNonNull(this.AWSAccessKey);
    requireNonNull(this.AWSAccessKey);
    requireNonNull(this.jarLocation);
    requireNonNull(this.jarArguments);
    requireNonNull(this.slavesInstanceType);
    requireNonNull(this.hadoopVersion);
    requireNonNull(this.jobFlowName);

    if (this.nInstances < 1) {
      throw new IllegalArgumentException(
          "the number of instance is lower than 1");
    }

    if (this.masterInstanceType == null) {
      this.masterInstanceType = this.slavesInstanceType;
    }

    // Set the hadoop jar step
    final HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig()
        .withJar(this.jarLocation.trim()).withArgs(this.jarArguments);

    // Set step config
    final StepConfig stepConfig = new StepConfig()
        .withName(this.jobFlowName + "-step").withHadoopJarStep(hadoopJarStep)
        .withActionOnFailure("TERMINATE_JOB_FLOW");

    // Set the instance
    final JobFlowInstancesConfig instances =
        new JobFlowInstancesConfig().withInstanceCount(this.nInstances)
            .withMasterInstanceType(this.masterInstanceType)
            .withSlaveInstanceType(this.slavesInstanceType)
            .withHadoopVersion(this.hadoopVersion);

    // Configure hadoop
    final ScriptBootstrapActionConfig scriptBootstrapAction =
        new ScriptBootstrapActionConfig()
            .withPath(
                "s3n://eu-west-1.elasticmapreduce/bootstrap-actions/configure-hadoop")
            .withArgs("--site-key-value",
                "mapreduce.tasktracker.map.tasks.maximum="
                    + this.taskTrackerMaxMapTasks);

    final BootstrapActionConfig bootstrapActions =
        new BootstrapActionConfig().withName("Configure hadoop")
            .withScriptBootstrapAction(scriptBootstrapAction);

    // Enable debugging
    StepFactory stepFactory = new StepFactory();
    StepConfig enableDebugging = new StepConfig().withName("Enable Debugging")
        .withActionOnFailure("TERMINATE_JOB_FLOW")
        .withHadoopJarStep(stepFactory.newEnableDebuggingStep());

    // Run flow
    this.runFlowRequest = new RunJobFlowRequest().withName(this.jobFlowName);

    // Enable or not debugging
    if (this.enableDebugging) {
      this.runFlowRequest.withInstances(instances).withSteps(enableDebugging,
          stepConfig);
    } else {
      this.runFlowRequest.withInstances(instances).withSteps(stepConfig);
    }

    // Limit the number of task in a task tracker
    if (this.taskTrackerMaxMapTasks > 0) {
      this.runFlowRequest.withBootstrapActions(bootstrapActions);
    }

    if (this.logPathname != null && !"".equals(this.logPathname)) {
      this.runFlowRequest.withLogUri(this.logPathname);
    }

    // Set EC2 Key name
    if (this.ec2KeyName != null) {
      this.runFlowRequest.getInstances().setEc2KeyName(this.ec2KeyName);
    }
  }