@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { // Status update calls don't really change the state of the attempt. TaskAttemptStatus newReportedStatus = ((TaskAttemptStatusUpdateEvent) event) .getReportedTaskAttemptStatus(); // Now switch the information in the reportedStatus taskAttempt.reportedStatus = newReportedStatus; taskAttempt.reportedStatus.taskState = taskAttempt.getState(); // send event to speculator about the reported status taskAttempt.eventHandler.handle (new SpeculatorEvent (taskAttempt.reportedStatus, taskAttempt.clock.getTime())); taskAttempt.updateProgressSplits(); //if fetch failures are present, send the fetch failure event to job //this only will happen in reduce attempt type if (taskAttempt.reportedStatus.fetchFailedMaps != null && taskAttempt.reportedStatus.fetchFailedMaps.size() > 0) { taskAttempt.eventHandler.handle(new JobTaskAttemptFetchFailureEvent( taskAttempt.attemptId, taskAttempt.reportedStatus.fetchFailedMaps)); } }
@Override public void transition(JobImpl job, JobEvent event) { //get number of shuffling reduces int shufflingReduceTasks = 0; for (TaskId taskId : job.reduceTasks) { Task task = job.tasks.get(taskId); if (TaskState.RUNNING.equals(task.getState())) { for(TaskAttempt attempt : task.getAttempts().values()) { if(attempt.getPhase() == Phase.SHUFFLE) { shufflingReduceTasks++; break; } } } } JobTaskAttemptFetchFailureEvent fetchfailureEvent = (JobTaskAttemptFetchFailureEvent) event; for (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId mapId : fetchfailureEvent.getMaps()) { Integer fetchFailures = job.fetchFailuresMapping.get(mapId); fetchFailures = (fetchFailures == null) ? 1 : (fetchFailures+1); job.fetchFailuresMapping.put(mapId, fetchFailures); float failureRate = shufflingReduceTasks == 0 ? 1.0f : (float) fetchFailures / shufflingReduceTasks; // declare faulty if fetch-failures >= max-allowed-failures if (fetchFailures >= job.getMaxFetchFailuresNotifications() && failureRate >= job.getMaxAllowedFetchFailuresFraction()) { LOG.info("Too many fetch-failures for output of task attempt: " + mapId + " ... raising fetch failure to map"); job.eventHandler.handle(new TaskAttemptEvent(mapId, TaskAttemptEventType.TA_TOO_MANY_FETCH_FAILURE)); job.fetchFailuresMapping.remove(mapId); } } }
private void sendFetchFailure(MRApp app, TaskAttempt reduceAttempt, TaskAttempt mapAttempt) { app.getContext().getEventHandler().handle( new JobTaskAttemptFetchFailureEvent( reduceAttempt.getID(), Arrays.asList(new TaskAttemptId[] {mapAttempt.getID()}))); }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { // Status update calls don't really change the state of the attempt. TaskAttemptStatus newReportedStatus = ((TaskAttemptStatusUpdateEvent) event) .getReportedTaskAttemptStatus(); // Now switch the information in the reportedStatus taskAttempt.reportedStatus = newReportedStatus; taskAttempt.reportedStatus.taskState = taskAttempt.getState(); // send event to speculator about the reported status taskAttempt.eventHandler.handle (new SpeculatorEvent (taskAttempt.reportedStatus, taskAttempt.clock.getTime())); taskAttempt.updateProgressSplits(); //if fetch failures are present, send the fetch failure event to job //this only will happen in reduce attempt type if (taskAttempt.reportedStatus.fetchFailedMaps != null && taskAttempt.reportedStatus.fetchFailedMaps.size() > 0) { String hostname = taskAttempt.container == null ? "UNKNOWN" : taskAttempt.container.getNodeId().getHost(); taskAttempt.eventHandler.handle(new JobTaskAttemptFetchFailureEvent( taskAttempt.attemptId, taskAttempt.reportedStatus.fetchFailedMaps, hostname)); } }
@Override public void transition(JobImpl job, JobEvent event) { //get number of shuffling reduces int shufflingReduceTasks = 0; for (TaskId taskId : job.reduceTasks) { Task task = job.tasks.get(taskId); if (TaskState.RUNNING.equals(task.getState())) { for(TaskAttempt attempt : task.getAttempts().values()) { if(attempt.getPhase() == Phase.SHUFFLE) { shufflingReduceTasks++; break; } } } } JobTaskAttemptFetchFailureEvent fetchfailureEvent = (JobTaskAttemptFetchFailureEvent) event; for (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId mapId : fetchfailureEvent.getMaps()) { Integer fetchFailures = job.fetchFailuresMapping.get(mapId); fetchFailures = (fetchFailures == null) ? 1 : (fetchFailures+1); job.fetchFailuresMapping.put(mapId, fetchFailures); float failureRate = shufflingReduceTasks == 0 ? 1.0f : (float) fetchFailures / shufflingReduceTasks; // declare faulty if fetch-failures >= max-allowed-failures if (fetchFailures >= job.getMaxFetchFailuresNotifications() && failureRate >= job.getMaxAllowedFetchFailuresFraction()) { LOG.info("Too many fetch-failures for output of task attempt: " + mapId + " ... raising fetch failure to map"); job.eventHandler.handle(new TaskAttemptTooManyFetchFailureEvent(mapId, fetchfailureEvent.getReduce(), fetchfailureEvent.getHost())); job.fetchFailuresMapping.remove(mapId); } } }
private void sendFetchFailure(MRApp app, TaskAttempt reduceAttempt, TaskAttempt mapAttempt, String hostname) { app.getContext().getEventHandler().handle( new JobTaskAttemptFetchFailureEvent( reduceAttempt.getID(), Arrays.asList(new TaskAttemptId[] {mapAttempt.getID()}), hostname)); }
@Override public void transition(JobImpl job, JobEvent event) { //get number of shuffling reduces int shufflingReduceTasks = 0; for (TaskId taskId : job.reduceTasks) { Task task = job.tasks.get(taskId); if (TaskState.RUNNING.equals(task.getState())) { for(TaskAttempt attempt : task.getAttempts().values()) { if(attempt.getPhase() == Phase.SHUFFLE) { shufflingReduceTasks++; break; } } } } JobTaskAttemptFetchFailureEvent fetchfailureEvent = (JobTaskAttemptFetchFailureEvent) event; for (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId mapId : fetchfailureEvent.getMaps()) { Integer fetchFailures = job.fetchFailuresMapping.get(mapId); fetchFailures = (fetchFailures == null) ? 1 : (fetchFailures+1); job.fetchFailuresMapping.put(mapId, fetchFailures); float failureRate = shufflingReduceTasks == 0 ? 1.0f : (float) fetchFailures / shufflingReduceTasks; // declare faulty if fetch-failures >= max-allowed-failures boolean isMapFaulty = (failureRate >= MAX_ALLOWED_FETCH_FAILURES_FRACTION); if (fetchFailures >= MAX_FETCH_FAILURES_NOTIFICATIONS && isMapFaulty) { LOG.info("Too many fetch-failures for output of task attempt: " + mapId + " ... raising fetch failure to map"); job.eventHandler.handle(new TaskAttemptEvent(mapId, TaskAttemptEventType.TA_TOO_MANY_FETCH_FAILURE)); job.fetchFailuresMapping.remove(mapId); } } }