private static StubbedJob createStubbedJob(Configuration conf, Dispatcher dispatcher, int numSplits, AppContext appContext) { JobID jobID = JobID.forName("job_1234567890000_0001"); JobId jobId = TypeConverter.toYarn(jobID); if (appContext == null) { appContext = mock(AppContext.class); when(appContext.hasSuccessfullyUnregistered()).thenReturn(true); } StubbedJob job = new StubbedJob(jobId, ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 0), 0), conf,dispatcher.getEventHandler(), true, "somebody", numSplits, appContext); dispatcher.register(JobEventType.class, job); EventHandler mockHandler = mock(EventHandler.class); dispatcher.register(TaskEventType.class, mockHandler); dispatcher.register(org.apache.hadoop.mapreduce.jobhistory.EventType.class, mockHandler); dispatcher.register(JobFinishEvent.Type.class, mockHandler); return job; }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { //set the finish time taskAttempt.setFinishTime(); taskAttempt.eventHandler.handle( createJobCounterUpdateEventTASucceeded(taskAttempt)); taskAttempt.logAttemptFinishedEvent(TaskAttemptStateInternal.SUCCEEDED); taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, TaskEventType.T_ATTEMPT_SUCCEEDED)); taskAttempt.eventHandler.handle (new SpeculatorEvent (taskAttempt.reportedStatus, taskAttempt.clock.getTime())); }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { // set the finish time taskAttempt.setFinishTime(); if (taskAttempt.getLaunchTime() != 0) { taskAttempt.eventHandler .handle(createJobCounterUpdateEventTAFailed(taskAttempt, false)); TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(taskAttempt, TaskAttemptStateInternal.FAILED); taskAttempt.eventHandler.handle(new JobHistoryEvent( taskAttempt.attemptId.getTaskId().getJobId(), tauce)); // taskAttempt.logAttemptFinishedEvent(TaskAttemptStateInternal.FAILED); Not // handling failed map/reduce events. }else { LOG.debug("Not generating HistoryFinish event since start event not " + "generated for taskAttempt: " + taskAttempt.getID()); } taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED)); }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { // too many fetch failure can only happen for map tasks Preconditions .checkArgument(taskAttempt.getID().getTaskId().getTaskType() == TaskType.MAP); //add to diagnostic taskAttempt.addDiagnosticInfo("Too Many fetch failures.Failing the attempt"); if (taskAttempt.getLaunchTime() != 0) { taskAttempt.eventHandler .handle(createJobCounterUpdateEventTAFailed(taskAttempt, true)); TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(taskAttempt, TaskAttemptStateInternal.FAILED); taskAttempt.eventHandler.handle(new JobHistoryEvent( taskAttempt.attemptId.getTaskId().getJobId(), tauce)); }else { LOG.debug("Not generating HistoryFinish event since start event not " + "generated for taskAttempt: " + taskAttempt.getID()); } taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED)); }
@SuppressWarnings("unchecked") @Override public KillTaskResponse killTask(KillTaskRequest request) throws IOException { TaskId taskId = request.getTaskId(); UserGroupInformation callerUGI = UserGroupInformation.getCurrentUser(); String message = "Kill task " + taskId + " received from " + callerUGI + " at " + Server.getRemoteAddress(); LOG.info(message); verifyAndGetTask(taskId, JobACL.MODIFY_JOB); appContext.getEventHandler().handle( new TaskEvent(taskId, TaskEventType.T_KILL)); KillTaskResponse response = recordFactory.newRecordInstance(KillTaskResponse.class); return response; }
@Test public void testJobError() throws Exception { MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); //send an invalid event on task at current state app.getContext().getEventHandler().handle( new TaskEvent( task.getID(), TaskEventType.T_SCHEDULE)); //this must lead to job error app.waitForState(job, JobState.ERROR); }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { taskAttempt.setFinishTime(); taskAttempt.taskAttemptListener.unregister( taskAttempt.attemptId, taskAttempt.jvmID); taskAttempt.eventHandler.handle(new ContainerLauncherEvent( taskAttempt.attemptId, taskAttempt.getAssignedContainerID(), taskAttempt.getAssignedContainerMgrAddress(), taskAttempt.container.getContainerToken(), ContainerLauncher.EventType.CONTAINER_REMOTE_CLEANUP)); taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, TaskEventType.T_ATTEMPT_KILLED)); }
@SuppressWarnings("unchecked") private static void notifyTaskAttemptFailed(TaskAttemptImpl taskAttempt) { if (taskAttempt.getLaunchTime() == 0) { sendJHStartEventForAssignedFailTask(taskAttempt); } // set the finish time taskAttempt.setFinishTime(); taskAttempt.eventHandler .handle(createJobCounterUpdateEventTAFailed(taskAttempt, false)); TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(taskAttempt, TaskAttemptStateInternal.FAILED); taskAttempt.eventHandler.handle(new JobHistoryEvent( taskAttempt.attemptId.getTaskId().getJobId(), tauce)); taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED)); }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { // too many fetch failure can only happen for map tasks Preconditions .checkArgument(taskAttempt.getID().getTaskId().getTaskType() == TaskType.MAP); //add to diagnostic taskAttempt.addDiagnosticInfo("Too Many fetch failures.Failing the attempt"); //set the finish time taskAttempt.setFinishTime(); if (taskAttempt.getLaunchTime() != 0) { taskAttempt.eventHandler .handle(createJobCounterUpdateEventTAFailed(taskAttempt, true)); TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(taskAttempt, TaskAttemptStateInternal.FAILED); taskAttempt.eventHandler.handle(new JobHistoryEvent( taskAttempt.attemptId.getTaskId().getJobId(), tauce)); }else { LOG.debug("Not generating HistoryFinish event since start event not " + "generated for taskAttempt: " + taskAttempt.getID()); } taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED)); }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { //set the finish time taskAttempt.setFinishTime(); if (taskAttempt.getLaunchTime() != 0) { taskAttempt.eventHandler .handle(createJobCounterUpdateEventTAKilled(taskAttempt, false)); TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(taskAttempt, TaskAttemptStateInternal.KILLED); taskAttempt.eventHandler.handle(new JobHistoryEvent( taskAttempt.attemptId.getTaskId().getJobId(), tauce)); }else { LOG.debug("Not generating HistoryFinish event since start event not " + "generated for taskAttempt: " + taskAttempt.getID()); } // taskAttempt.logAttemptFinishedEvent(TaskAttemptStateInternal.KILLED); Not logging Map/Reduce attempts in case of failure. taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, TaskEventType.T_ATTEMPT_KILLED)); }
@Test /** * Kill map attempt for succeeded map task * {@link TaskState#SUCCEEDED}->{@link TaskState#SCHEDULED} */ public void testKillAttemptForSuccessfulTask() { LOG.info("--- START: testKillAttemptForSuccessfulTask ---"); mockTask = createMockTask(TaskType.MAP); TaskId taskId = getNewTaskID(); scheduleTaskAttempt(taskId); launchTaskAttempt(getLastAttempt().getAttemptId()); commitTaskAttempt(getLastAttempt().getAttemptId()); mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ATTEMPT_SUCCEEDED)); assertTaskSucceededState(); mockTask.handle( new TaskTAttemptKilledEvent(getLastAttempt().getAttemptId(), true)); assertEquals(TaskAttemptEventType.TA_RESCHEDULE, taskAttemptEventHandler.lastTaskAttemptEvent.getType()); assertTaskScheduledState(); }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent evnt) { TaskAttemptContainerLaunchedEvent event = (TaskAttemptContainerLaunchedEvent) evnt; //set the launch time taskAttempt.launchTime = taskAttempt.clock.getTime(); taskAttempt.shufflePort = event.getShufflePort(); // register it to TaskAttemptListener so that it can start monitoring it. taskAttempt.taskAttemptListener .registerLaunchedTask(taskAttempt.attemptId, taskAttempt.jvmID); //TODO Resolve to host / IP in case of a local address. InetSocketAddress nodeHttpInetAddr = // TODO: Costly to create sock-addr? NetUtils.createSocketAddr(taskAttempt.container.getNodeHttpAddress()); taskAttempt.trackerName = nodeHttpInetAddr.getHostName(); taskAttempt.httpPort = nodeHttpInetAddr.getPort(); taskAttempt.sendLaunchedEvents(); taskAttempt.eventHandler.handle (new SpeculatorEvent (taskAttempt.attemptId, true, taskAttempt.clock.getTime())); //make remoteTask reference as null as it is no more needed //and free up the memory taskAttempt.remoteTask = null; //tell the Task that attempt has started taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, TaskEventType.T_ATTEMPT_LAUNCHED)); }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, TaskEventType.T_ATTEMPT_COMMIT_PENDING)); }
@SuppressWarnings("unchecked") @Override public TaskAttemptStateInternal transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { if(taskAttempt.getID().getTaskId().getTaskType() == TaskType.REDUCE) { // after a reduce task has succeeded, its outputs are in safe in HDFS. // logically such a task should not be killed. we only come here when // there is a race condition in the event queue. E.g. some logic sends // a kill request to this attempt when the successful completion event // for this task is already in the event queue. so the kill event will // get executed immediately after the attempt is marked successful and // result in this transition being exercised. // ignore this for reduce tasks LOG.info("Ignoring killed event for successful reduce task attempt" + taskAttempt.getID().toString()); return TaskAttemptStateInternal.SUCCEEDED; } if(event instanceof TaskAttemptKillEvent) { TaskAttemptKillEvent msgEvent = (TaskAttemptKillEvent) event; //add to diagnostic taskAttempt.addDiagnosticInfo(msgEvent.getMessage()); } // not setting a finish time since it was set on success assert (taskAttempt.getFinishTime() != 0); assert (taskAttempt.getLaunchTime() != 0); taskAttempt.eventHandler .handle(createJobCounterUpdateEventTAKilled(taskAttempt, true)); TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent( taskAttempt, TaskAttemptStateInternal.KILLED); taskAttempt.eventHandler.handle(new JobHistoryEvent(taskAttempt.attemptId .getTaskId().getJobId(), tauce)); taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, TaskEventType.T_ATTEMPT_KILLED)); return TaskAttemptStateInternal.KILLED; }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { //set the finish time taskAttempt.setFinishTime(); if (taskAttempt.getLaunchTime() != 0) { taskAttempt.eventHandler .handle(createJobCounterUpdateEventTAKilled(taskAttempt, false)); TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(taskAttempt, TaskAttemptStateInternal.KILLED); taskAttempt.eventHandler.handle(new JobHistoryEvent( taskAttempt.attemptId.getTaskId().getJobId(), tauce)); }else { LOG.debug("Not generating HistoryFinish event since start event not " + "generated for taskAttempt: " + taskAttempt.getID()); } if (event instanceof TaskAttemptKillEvent) { taskAttempt.addDiagnosticInfo( ((TaskAttemptKillEvent) event).getMessage()); } // taskAttempt.logAttemptFinishedEvent(TaskAttemptStateInternal.KILLED); Not logging Map/Reduce attempts in case of failure. taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, TaskEventType.T_ATTEMPT_KILLED)); }
protected void scheduleTasks(Set<TaskId> taskIDs, boolean recoverTaskOutput) { for (TaskId taskID : taskIDs) { TaskInfo taskInfo = completedTasksFromPreviousRun.remove(taskID); if (taskInfo != null) { eventHandler.handle(new TaskRecoverEvent(taskID, taskInfo, committer, recoverTaskOutput)); } else { eventHandler.handle(new TaskEvent(taskID, TaskEventType.T_SCHEDULE)); } } }
@Override public void transition(JobImpl job, JobEvent event) { job.addDiagnostic(JOB_KILLED_DIAG); for (Task task : job.tasks.values()) { job.eventHandler.handle( new TaskEvent(task.getID(), TaskEventType.T_KILL)); } job.metrics.endRunningJob(job); }
protected void internalError(TaskEventType type) { LOG.error("Invalid event " + type + " on Task " + this.taskId); eventHandler.handle(new JobDiagnosticsUpdateEvent( this.taskId.getJobId(), "Invalid event " + type + " on Task " + this.taskId)); eventHandler.handle(new JobEvent(this.taskId.getJobId(), JobEventType.INTERNAL_ERROR)); }
@Override public void handle(TaskEvent event) { TaskId taskID = event.getTaskID(); Task task = myJob.getTask(taskID); Assert.assertEquals ("Wrong type event", TaskEventType.T_ADD_SPEC_ATTEMPT, event.getType()); System.out.println("SpeculationRequestEventHandler.handle adds a speculation task for " + taskID); addAttempt(task); }
@Test (timeout=10000) public void testFailAbortDoesntHang() throws IOException { Configuration conf = new Configuration(); conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); conf.set(MRJobConfig.MR_AM_COMMITTER_CANCEL_TIMEOUT_MS, "1000"); DrainDispatcher dispatcher = new DrainDispatcher(); dispatcher.init(conf); dispatcher.start(); OutputCommitter committer = Mockito.mock(OutputCommitter.class); CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer); commitHandler.init(conf); commitHandler.start(); //Job has only 1 mapper task. No reducers conf.setInt(MRJobConfig.NUM_REDUCES, 0); conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 1); JobImpl job = createRunningStubbedJob(conf, dispatcher, 1, null); //Fail / finish all the tasks. This should land the JobImpl directly in the //FAIL_ABORT state for(Task t: job.tasks.values()) { TaskImpl task = (TaskImpl) t; task.handle(new TaskEvent(task.getID(), TaskEventType.T_SCHEDULE)); for(TaskAttempt ta: task.getAttempts().values()) { task.handle(new TaskTAttemptEvent(ta.getID(), TaskEventType.T_ATTEMPT_FAILED)); } } dispatcher.await(); //Verify abortJob is called once and the job failed Mockito.verify(committer, Mockito.timeout(2000).times(1)) .abortJob((JobContext) Mockito.any(), (State) Mockito.any()); assertJobState(job, JobStateInternal.FAILED); dispatcher.stop(); }
@Test public void testKillSuccessfulTask() { LOG.info("--- START: testKillSuccesfulTask ---"); mockTask = createMockTask(TaskType.MAP); TaskId taskId = getNewTaskID(); scheduleTaskAttempt(taskId); launchTaskAttempt(getLastAttempt().getAttemptId()); commitTaskAttempt(getLastAttempt().getAttemptId()); mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ATTEMPT_SUCCEEDED)); assertTaskSucceededState(); mockTask.handle(new TaskEvent(taskId, TaskEventType.T_KILL)); assertTaskSucceededState(); }
@Test public void testFailureDuringTaskAttemptCommit() { mockTask = createMockTask(TaskType.MAP); TaskId taskId = getNewTaskID(); scheduleTaskAttempt(taskId); launchTaskAttempt(getLastAttempt().getAttemptId()); updateLastAttemptState(TaskAttemptState.COMMIT_PENDING); commitTaskAttempt(getLastAttempt().getAttemptId()); // During the task attempt commit there is an exception which causes // the attempt to fail updateLastAttemptState(TaskAttemptState.FAILED); failRunningTaskAttempt(getLastAttempt().getAttemptId()); assertEquals(2, taskAttempts.size()); updateLastAttemptState(TaskAttemptState.SUCCEEDED); commitTaskAttempt(getLastAttempt().getAttemptId()); mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ATTEMPT_SUCCEEDED)); assertFalse("First attempt should not commit", mockTask.canCommit(taskAttempts.get(0).getAttemptId())); assertTrue("Second attempt should commit", mockTask.canCommit(getLastAttempt().getAttemptId())); assertTaskSucceededState(); }
private void runSpeculativeTaskAttemptSucceeds( TaskEventType firstAttemptFinishEvent) { TaskId taskId = getNewTaskID(); scheduleTaskAttempt(taskId); launchTaskAttempt(getLastAttempt().getAttemptId()); updateLastAttemptState(TaskAttemptState.RUNNING); // Add a speculative task attempt that succeeds mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ADD_SPEC_ATTEMPT)); launchTaskAttempt(getLastAttempt().getAttemptId()); commitTaskAttempt(getLastAttempt().getAttemptId()); mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ATTEMPT_SUCCEEDED)); // The task should now have succeeded assertTaskSucceededState(); // Now complete the first task attempt, after the second has succeeded mockTask.handle(new TaskTAttemptEvent(taskAttempts.get(0).getAttemptId(), firstAttemptFinishEvent)); // The task should still be in the succeeded state assertTaskSucceededState(); // The task should contain speculative a task attempt assertTaskAttemptAvataar(Avataar.SPECULATIVE); }