@Override public StepConfig getEmrStepConfig(Object step) { EmrShellStep emrShellStep = (EmrShellStep) step; // Hadoop Jar provided by Amazon for running Shell Scripts String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR); // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (emrShellStep.isContinueOnError() != null && emrShellStep.isContinueOnError()) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; } // Add the script location List<String> argsList = new ArrayList<>(); argsList.add(emrShellStep.getScriptLocation().trim()); // Add the script arguments if (!CollectionUtils.isEmpty(emrShellStep.getScriptArguments())) { for (String argument : emrShellStep.getScriptArguments()) { argsList.add(argument.trim()); } } // Return the StepConfig object HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList); return new StepConfig().withName(emrShellStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep(jarConfig); }
/** * Builds the StepConfig for the Hadoop jar step. * * @param stepName the step name. * @param jarLocation the location of jar. * @param mainClass the main class. * @param scriptArguments the arguments. * @param isContinueOnError indicate what to do on error. * * @return the stepConfig. */ public StepConfig getEmrHadoopJarStepConfig(String stepName, String jarLocation, String mainClass, List<String> scriptArguments, Boolean isContinueOnError) { // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (isContinueOnError != null && isContinueOnError) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; } // If there are no arguments if (CollectionUtils.isEmpty(scriptArguments)) { // Build the StepConfig object and return return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass)); } else { // If there are arguments, include the arguments in the StepConfig object return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep( new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass) .withArgs(scriptArguments.toArray(new String[scriptArguments.size()]))); } }
/** * This is a helper method for creating step configuration information * @param stepName - a custom name to label this step * @param actionOnFailure - options are terminate cluster, terminate job flow, contiunue * @param jarPath - path to jar file - could be on S3 or local file system * @param args list of Java args to configure custom step * @return */ private static StepConfig createStepConfig(String stepName, String actionOnFailure, String jarPath, List<String> args ) { //Start HBase step - after installing it with a bootstrap action StepConfig stepConfig = new StepConfig() .withName(stepName) .withActionOnFailure(actionOnFailure) .withHadoopJarStep(new HadoopJarStepConfig() .withJar(jarPath) .withArgs(args)); return stepConfig; }
protected String fireEMRJob(String paramsStr,String clusterId){ StepFactory stepFactory = new StepFactory(); AmazonElasticMapReduceClient emr = new AmazonElasticMapReduceClient(); emr.setRegion(Region.getRegion(Regions.fromName(System.getenv().get("AWS_REGION")))); Application sparkConfig = new Application() .withName("Spark"); String[] params = paramsStr.split(","); StepConfig enabledebugging = new StepConfig() .withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig() .withJar("command-runner.jar") .withArgs(params); final StepConfig sparkStep = new StepConfig() .withName("Spark Step") .withActionOnFailure("CONTINUE") .withHadoopJarStep(sparkStepConf); AddJobFlowStepsRequest request = new AddJobFlowStepsRequest(clusterId) .withSteps(new ArrayList<StepConfig>(){{add(sparkStep);}}); AddJobFlowStepsResult result = emr.addJobFlowSteps(request); return result.getStepIds().get(0); }
void init() { requireNonNull(this.AWSAccessKey); requireNonNull(this.AWSAccessKey); requireNonNull(this.jarLocation); requireNonNull(this.jarArguments); requireNonNull(this.slavesInstanceType); requireNonNull(this.hadoopVersion); requireNonNull(this.jobFlowName); if (this.nInstances < 1) { throw new IllegalArgumentException( "the number of instance is lower than 1"); } if (this.masterInstanceType == null) { this.masterInstanceType = this.slavesInstanceType; } // Set the hadoop jar step final HadoopJarStepConfig hadoopJarStep = new HadoopJarStepConfig() .withJar(this.jarLocation.trim()).withArgs(this.jarArguments); // Set step config final StepConfig stepConfig = new StepConfig() .withName(this.jobFlowName + "-step").withHadoopJarStep(hadoopJarStep) .withActionOnFailure("TERMINATE_JOB_FLOW"); // Set the instance final JobFlowInstancesConfig instances = new JobFlowInstancesConfig().withInstanceCount(this.nInstances) .withMasterInstanceType(this.masterInstanceType) .withSlaveInstanceType(this.slavesInstanceType) .withHadoopVersion(this.hadoopVersion); // Configure hadoop final ScriptBootstrapActionConfig scriptBootstrapAction = new ScriptBootstrapActionConfig() .withPath( "s3n://eu-west-1.elasticmapreduce/bootstrap-actions/configure-hadoop") .withArgs("--site-key-value", "mapreduce.tasktracker.map.tasks.maximum=" + this.taskTrackerMaxMapTasks); final BootstrapActionConfig bootstrapActions = new BootstrapActionConfig().withName("Configure hadoop") .withScriptBootstrapAction(scriptBootstrapAction); // Enable debugging StepFactory stepFactory = new StepFactory(); StepConfig enableDebugging = new StepConfig().withName("Enable Debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); // Run flow this.runFlowRequest = new RunJobFlowRequest().withName(this.jobFlowName); // Enable or not debugging if (this.enableDebugging) { this.runFlowRequest.withInstances(instances).withSteps(enableDebugging, stepConfig); } else { this.runFlowRequest.withInstances(instances).withSteps(stepConfig); } // Limit the number of task in a task tracker if (this.taskTrackerMaxMapTasks > 0) { this.runFlowRequest.withBootstrapActions(bootstrapActions); } if (this.logPathname != null && !"".equals(this.logPathname)) { this.runFlowRequest.withLogUri(this.logPathname); } // Set EC2 Key name if (this.ec2KeyName != null) { this.runFlowRequest.getInstances().setEc2KeyName(this.ec2KeyName); } }