Java 类com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce 实例源码

项目：digdag 文件：EmrOperatorFactory.java

private List<StepSummary> listSubmittedSteps(AmazonElasticMapReduce emr, String tag, NewCluster cluster)
{
    List<StepSummary> steps = new ArrayList<>();
    ListStepsRequest request = new ListStepsRequest().withClusterId(cluster.id());
    while (steps.size() < cluster.steps()) {
        ListStepsResult result = emr.listSteps(request);
        for (StepSummary step : result.getSteps()) {
            if (step.getName().contains(tag)) {
                steps.add(step);
            }
        }
        if (result.getMarker() == null) {
            break;
        }
        request.setMarker(result.getMarker());
    }
    // The ListSteps api returns steps in reverse order. So reverse them to submission order.
    Collections.reverse(steps);
    return steps;
}

项目：aws-utilization-monitor 文件：AwsScan.java

/**
 * Collect data for ElasticMapReduce.
 *
 * @param stats
 *            current statistics object.
 * @param account
 *            currently used credentials object.
 * @param region
 *            currently used aws region.
 */
public static void scanElasticMapReduce(AwsStats stats, AwsAccount account, Regions region) {
    LOG.debug("Scan for MapReduce in region " + region.getName() + " in account " + account.getAccountId());

    try {
        AmazonElasticMapReduce elasticMapReduce = new AmazonElasticMapReduceClient(account.getCredentials());
        elasticMapReduce.setRegion(Region.getRegion(region));

        List<ClusterSummary> list = elasticMapReduce.listClusters().getClusters();

        int totalItems = list.size();
        for (ClusterSummary cs : list) {
            stats.add(new AwsResource(cs.getName(), account.getAccountId(), AwsResourceType.ElasticMapReduce, region));
        }

        LOG.info(totalItems + " ElasticMapReduce clusters in region " + region.getName() + " in account " + account.getAccountId());
    } catch (AmazonServiceException ase) {
        if (ase.getErrorCode().contains("AccessDenied")) {
            LOG.info("Access denied for ElasticMapReduce in region " + region.getName() + " in account " + account.getAccountId());
        } else {
            LOG.error("Exception of ElasticMapReduce: " + ase.getMessage());
        }
    }
}

项目：digdag 文件：EmrOperatorFactory.java

private TaskResult run(String tag, AmazonElasticMapReduce emr, AWSKMSClient kms, Filer filer)
        throws IOException
{
    ParameterCompiler parameterCompiler = new ParameterCompiler(kms, context);

    // Set up step compiler
    List<Config> steps = params.getListOrEmpty("steps", Config.class);
    StepCompiler stepCompiler = new StepCompiler(tag, steps, filer, parameterCompiler, objectMapper, defaultActionOnFailure);

    // Set up job submitter
    Submitter submitter;
    Config cluster = null;
    try {
        cluster = params.parseNestedOrGetEmpty("cluster");
    }
    catch (ConfigException ignore) {
    }
    if (cluster != null) {
        // Create a new cluster
        submitter = newClusterSubmitter(emr, tag, stepCompiler, cluster, filer, parameterCompiler);
    }
    else {
        // Cluster ID? Use existing cluster.
        String clusterId = params.get("cluster", String.class);
        submitter = existingClusterSubmitter(emr, tag, stepCompiler, clusterId, filer);
    }

    // Submit EMR job
    SubmissionResult submission = submitter.submit();

    // Wait for the steps to finish running
    if (!steps.isEmpty()) {
        waitForSteps(emr, submission);
    }

    return result(submission);
}

项目：digdag 文件：EmrOperatorFactory.java

private void waitForSteps(AmazonElasticMapReduce emr, SubmissionResult submission)
{
    String lastStepId = Iterables.getLast(submission.stepIds());
    pollingWaiter(state, "result")
            .withWaitMessage("EMR job still running: %s", submission.clusterId())
            .withPollInterval(DurationInterval.of(Duration.ofSeconds(15), Duration.ofMinutes(5)))
            .awaitOnce(Step.class, pollState -> checkStepCompletion(emr, submission, lastStepId, pollState));
}

项目：digdag 文件：EmrOperatorFactory.java

private Submitter existingClusterSubmitter(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler, String clusterId, Filer filer)
{
    return () -> {
        List<String> stepIds = pollingRetryExecutor(state, "submission")
                .retryUnless(AmazonServiceException.class, Aws::isDeterministicException)
                .withRetryInterval(DurationInterval.of(Duration.ofSeconds(30), Duration.ofMinutes(5)))
                .runOnce(new TypeReference<List<String>>() {}, s -> {

                    RemoteFile runner = prepareRunner(filer, tag);

                    // Compile steps
                    stepCompiler.compile(runner);

                    // Stage files to S3
                    filer.stageFiles();

                    AddJobFlowStepsRequest request = new AddJobFlowStepsRequest()
                            .withJobFlowId(clusterId)
                            .withSteps(stepCompiler.stepConfigs());

                    int steps = request.getSteps().size();
                    logger.info("Submitting {} EMR step(s) to {}", steps, clusterId);
                    AddJobFlowStepsResult result = emr.addJobFlowSteps(request);
                    logSubmittedSteps(clusterId, steps, i -> request.getSteps().get(i).getName(), i -> result.getStepIds().get(i));
                    return ImmutableList.copyOf(result.getStepIds());
                });

        return SubmissionResult.ofExistingCluster(clusterId, stepIds);
    };
}

项目：digdag 文件：EmrOperatorFactory.java

private Submitter newClusterSubmitter(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler, Config clusterConfig, Filer filer, ParameterCompiler parameterCompiler)
{

    return () -> {
        // Start cluster
        NewCluster cluster = pollingRetryExecutor(state, "submission")
                .withRetryInterval(DurationInterval.of(Duration.ofSeconds(30), Duration.ofMinutes(5)))
                // TODO: EMR requests are not idempotent, thus retrying might produce duplicate cluster submissions.
                .retryUnless(AmazonServiceException.class, Aws::isDeterministicException)
                .runOnce(NewCluster.class, s -> submitNewClusterRequest(emr, tag, stepCompiler, clusterConfig, filer, parameterCompiler));

        // Get submitted step IDs
        List<String> stepIds = pollingRetryExecutor(this.state, "steps")
                .withRetryInterval(DurationInterval.of(Duration.ofSeconds(30), Duration.ofMinutes(5)))
                .retryUnless(AmazonServiceException.class, Aws::isDeterministicException)
                .runOnce(new TypeReference<List<String>>() {}, s -> {
                    List<StepSummary> steps = listSubmittedSteps(emr, tag, cluster);
                    logSubmittedSteps(cluster.id(), cluster.steps(), i -> steps.get(i).getName(), i -> steps.get(i).getId());
                    return steps.stream().map(StepSummary::getId).collect(toList());
                });

        // Log cluster status while waiting for it to come up
        pollingWaiter(state, "bootstrap")
                .withWaitMessage("EMR cluster still booting")
                .withPollInterval(DurationInterval.of(Duration.ofSeconds(30), Duration.ofMinutes(5)))
                .awaitOnce(String.class, pollState -> checkClusterBootStatus(emr, cluster, pollState));

        return SubmissionResult.ofNewCluster(cluster.id(), stepIds);
    };
}

项目：aws-big-data-blog 文件：EMRUtils.java

/**
 * Helper method to determine if an Amazon EMR cluster exists
 * 
 * @param client
 *        The {@link AmazonElasticMapReduceClient} with read permissions
 * @param clusterIdentifier
 *        The Amazon EMR cluster to check
 * @return true if the Amazon EMR cluster exists, otherwise false
 */
public static boolean clusterExists(AmazonElasticMapReduce client, String clusterIdentifier) {
    if (clusterIdentifier != null && !clusterIdentifier.isEmpty()) {
        ListClustersResult clustersList = client.listClusters();
        ListIterator<ClusterSummary> iterator = clustersList.getClusters().listIterator();
        ClusterSummary summary;
        for (summary = iterator.next() ; iterator.hasNext();summary = iterator.next()) {
            if (summary.getId().equals(clusterIdentifier)) {
                DescribeClusterRequest describeClusterRequest = new DescribeClusterRequest().withClusterId(clusterIdentifier);  
                DescribeClusterResult result = client.describeCluster(describeClusterRequest);  
                if (result != null) {
                    Cluster cluster = result.getCluster();
                    //check if HBase is installed on this cluster
                    if (isHBaseInstalled(client, cluster.getId())) return false;
                    String state = cluster.getStatus().getState();
                    LOG.info(clusterIdentifier + " is " + state + ". ");
                    if (state.equalsIgnoreCase("RUNNING") ||state.equalsIgnoreCase("WAITING"))  {
                        LOG.info("The cluster with id " + clusterIdentifier + " exists and is " + state);   
                        return true;
                    }
                }
            }       
        }                   
    }
    LOG.info("The cluster with id " + clusterIdentifier + " does not exist");
    return false;  
}

项目：aws-big-data-blog 文件：EMRUtils.java

/**
 * Helper method to determine the master node public DNS of an Amazon EMR cluster
 * 
 * @param client - The {@link AmazonElasticMapReduceClient} with read permissions
 * @param clusterIdentifier - unique identifier for this cluster
 * @return public dns url
 */
public static String getPublicDns(AmazonElasticMapReduce client, String clusterId) {    
    DescribeJobFlowsResult describeJobFlows=client.describeJobFlows(new DescribeJobFlowsRequest().withJobFlowIds(clusterId));
    describeJobFlows.getJobFlows();
    List<JobFlowDetail> jobFlows = describeJobFlows.getJobFlows();      
    JobFlowDetail jobflow =  jobFlows.get(0);       
    JobFlowInstancesDetail instancesDetail = jobflow.getInstances();
    LOG.info("EMR cluster public DNS is "+instancesDetail.getMasterPublicDnsName());
    return instancesDetail.getMasterPublicDnsName();
}

项目：aws-big-data-blog 文件：EMRUtils.java

/**
 * Helper method to determine if HBase is installed on this cluster
 * @param client - The {@link AmazonElasticMapReduceClient} with read permissions
 * @param clusterId - unique identifier for this cluster
 * @return true, other throws Runtime exception
 */
private static boolean isHBaseInstalled(AmazonElasticMapReduce client, String clusterId) {
    ListBootstrapActionsResult bootstrapActions = client.listBootstrapActions(new ListBootstrapActionsRequest()
                                                                                  .withClusterId(clusterId));
    ListIterator<Command> iterator = bootstrapActions.getBootstrapActions().listIterator();
    while(iterator.hasNext()) {
        Command command = iterator.next(); 
        if (command.getName().equalsIgnoreCase("Install HBase")) return true;
    }
    throw new RuntimeException("ERROR: Apache HBase is not installed on this cluster!!");
}