@Test public void testJobsQueryStateNone() throws JSONException, Exception { WebResource r = resource(); ArrayList<JobState> JOB_STATES = new ArrayList<JobState>(Arrays.asList(JobState.values())); // find a state that isn't in use Map<JobId, Job> jobsMap = appContext.getAllJobs(); for (Map.Entry<JobId, Job> entry : jobsMap.entrySet()) { JOB_STATES.remove(entry.getValue().getState()); } assertTrue("No unused job states", JOB_STATES.size() > 0); JobState notInUse = JOB_STATES.get(0); ClientResponse response = r.path("ws").path("v1").path("history") .path("mapreduce").path("jobs").queryParam("state", notInUse.toString()) .accept(MediaType.APPLICATION_JSON).get(ClientResponse.class); assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType()); JSONObject json = response.getEntity(JSONObject.class); assertEquals("incorrect number of elements", 1, json.length()); assertEquals("jobs is not null", JSONObject.NULL, json.get("jobs")); }
@Override public JobState getState() { readLock.lock(); try { JobState state = getExternalState(getInternalState()); if (!appContext.hasSuccessfullyUnregistered() && (state == JobState.SUCCEEDED || state == JobState.FAILED || state == JobState.KILLED || state == JobState.ERROR)) { return lastNonFinalState; } else { return state; } } finally { readLock.unlock(); } }
private JobState getExternalState(JobStateInternal smState) { switch (smState) { case KILL_WAIT: case KILL_ABORT: return JobState.KILLED; case SETUP: case COMMITTING: return JobState.RUNNING; case FAIL_WAIT: case FAIL_ABORT: return JobState.FAILED; case REBOOT: if (appContext.isLastAMRetry()) { return JobState.ERROR; } else { // In case of not last retry, return the external state as RUNNING since // otherwise JobClient will exit when it polls the AM for job state return JobState.RUNNING; } default: return JobState.valueOf(smState.name()); } }
@Test public void testJobError() throws Exception { MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); //send an invalid event on task at current state app.getContext().getEventHandler().handle( new TaskEvent( task.getID(), TaskEventType.T_SCHEDULE)); //this must lead to job error app.waitForState(job, JobState.ERROR); }
@Test public void testJobRebootNotLastRetryOnUnregistrationFailure() throws Exception { MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); //send an reboot event app.getContext().getEventHandler().handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); // return exteranl state as RUNNING since otherwise the JobClient will // prematurely exit. app.waitForState(job, JobState.RUNNING); }
@Test public void testJobRebootOnLastRetryOnUnregistrationFailure() throws Exception { // make startCount as 2 since this is last retry which equals to // DEFAULT_MAX_AM_RETRY // The last param mocks the unregistration failure MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true, 2, false); Configuration conf = new Configuration(); Job job = app.submit(conf); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); //send an reboot event app.getContext().getEventHandler().handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); app.waitForInternalState((JobImpl) job, JobStateInternal.REBOOT); // return exteranl state as RUNNING if this is the last retry while // unregistration fails app.waitForState(job, JobState.RUNNING); }
private void testMRAppHistory(MRApp app) throws Exception { Configuration conf = new Configuration(); Job job = app.submit(conf); app.waitForState(job, JobState.FAILED); Map<TaskId, Task> tasks = job.getTasks(); Assert.assertEquals("Num tasks is not correct", 1, tasks.size()); Task task = tasks.values().iterator().next(); Assert.assertEquals("Task state not correct", TaskState.FAILED, task .getReport().getTaskState()); Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next() .getAttempts(); Assert.assertEquals("Num attempts is not correct", 4, attempts.size()); Iterator<TaskAttempt> it = attempts.values().iterator(); TaskAttemptReport report = it.next().getReport(); Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED, report.getTaskAttemptState()); Assert.assertEquals("Diagnostic Information is not Correct", "Test Diagnostic Event", report.getDiagnosticInfo()); report = it.next().getReport(); Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED, report.getTaskAttemptState()); }
@Test public void testTransitionsAtFailed() throws IOException { Configuration conf = new Configuration(); AsyncDispatcher dispatcher = new AsyncDispatcher(); dispatcher.init(conf); dispatcher.start(); OutputCommitter committer = mock(OutputCommitter.class); doThrow(new IOException("forcefail")) .when(committer).setupJob(any(JobContext.class)); CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer); commitHandler.init(conf); commitHandler.start(); AppContext mockContext = mock(AppContext.class); when(mockContext.hasSuccessfullyUnregistered()).thenReturn(false); JobImpl job = createStubbedJob(conf, dispatcher, 2, mockContext); JobId jobId = job.getID(); job.handle(new JobEvent(jobId, JobEventType.JOB_INIT)); assertJobState(job, JobStateInternal.INITED); job.handle(new JobStartEvent(jobId)); assertJobState(job, JobStateInternal.FAILED); job.handle(new JobEvent(jobId, JobEventType.JOB_TASK_COMPLETED)); assertJobState(job, JobStateInternal.FAILED); job.handle(new JobEvent(jobId, JobEventType.JOB_TASK_ATTEMPT_COMPLETED)); assertJobState(job, JobStateInternal.FAILED); job.handle(new JobEvent(jobId, JobEventType.JOB_MAP_TASK_RESCHEDULED)); assertJobState(job, JobStateInternal.FAILED); job.handle(new JobEvent(jobId, JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE)); assertJobState(job, JobStateInternal.FAILED); Assert.assertEquals(JobState.RUNNING, job.getState()); when(mockContext.hasSuccessfullyUnregistered()).thenReturn(true); Assert.assertEquals(JobState.FAILED, job.getState()); dispatcher.stop(); commitHandler.stop(); }
private static void completeJobTasks(JobImpl job) { // complete the map tasks and the reduce tasks so we start committing int numMaps = job.getTotalMaps(); for (int i = 0; i < numMaps; ++i) { job.handle(new JobTaskEvent( MRBuilderUtils.newTaskId(job.getID(), 1, TaskType.MAP), TaskState.SUCCEEDED)); Assert.assertEquals(JobState.RUNNING, job.getState()); } int numReduces = job.getTotalReduces(); for (int i = 0; i < numReduces; ++i) { job.handle(new JobTaskEvent( MRBuilderUtils.newTaskId(job.getID(), 1, TaskType.MAP), TaskState.SUCCEEDED)); Assert.assertEquals(JobState.RUNNING, job.getState()); } }
public void waitForState(Job job, JobState finalState) throws Exception { int timeoutSecs = 0; JobReport report = job.getReport(); while (!finalState.equals(report.getJobState()) && timeoutSecs++ < 20) { System.out.println("Job State is : " + report.getJobState() + " Waiting for state : " + finalState + " map progress : " + report.getMapProgress() + " reduce progress : " + report.getReduceProgress()); report = job.getReport(); Thread.sleep(500); } System.out.println("Job State is : " + report.getJobState()); Assert.assertEquals("Job state is not correct (timedout)", finalState, job.getState()); }
@Test //First attempt is failed and second attempt is passed //The job succeeds. public void testFailTask() throws Exception { MRApp app = new MockFirstFailingAttemptMRApp(1, 0); Configuration conf = new Configuration(); // this test requires two task attempts, but uberization overrides max to 1 conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); Job job = app.submit(conf); app.waitForState(job, JobState.SUCCEEDED); Map<TaskId,Task> tasks = job.getTasks(); Assert.assertEquals("Num tasks is not correct", 1, tasks.size()); Task task = tasks.values().iterator().next(); Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, task.getReport().getTaskState()); Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts(); Assert.assertEquals("Num attempts is not correct", 2, attempts.size()); //one attempt must be failed //and another must have succeeded Iterator<TaskAttempt> it = attempts.values().iterator(); Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED, it.next().getReport().getTaskAttemptState()); Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, it.next().getReport().getTaskAttemptState()); }
@Test //All Task attempts are timed out, leading to Job failure public void testTimedOutTask() throws Exception { MRApp app = new TimeOutTaskMRApp(1, 0); Configuration conf = new Configuration(); int maxAttempts = 2; conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts); // disable uberization (requires entire job to be reattempted, so max for // subtask attempts is overridden to 1) conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); Job job = app.submit(conf); app.waitForState(job, JobState.FAILED); Map<TaskId,Task> tasks = job.getTasks(); Assert.assertEquals("Num tasks is not correct", 1, tasks.size()); Task task = tasks.values().iterator().next(); Assert.assertEquals("Task state not correct", TaskState.FAILED, task.getReport().getTaskState()); Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts(); Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts.size()); for (TaskAttempt attempt : attempts.values()) { Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED, attempt.getReport().getTaskAttemptState()); } }
@Test(timeout = 20000) public void testComponentStopOrder() throws Exception { @SuppressWarnings("resource") TestMRApp app = new TestMRApp(1, 1, true, this.getClass().getName(), true); JobImpl job = (JobImpl) app.submit(new Configuration()); app.waitForState(job, JobState.SUCCEEDED); app.verifyCompleted(); int waitTime = 20 * 1000; while (waitTime > 0 && app.numStops < 2) { Thread.sleep(100); waitTime -= 100; } // assert JobHistoryEventHandlerStopped and then clientServiceStopped Assert.assertEquals(1, app.JobHistoryEventHandlerStopped); Assert.assertEquals(2, app.clientServiceStopped); }
@Test public void testNotificationOnLastRetryNormalShutdown() throws Exception { HttpServer2 server = startHttpServer(); // Act like it is the second attempt. Default max attempts is 2 MRApp app = spy(new MRAppWithCustomContainerAllocator( 2, 2, true, this.getClass().getName(), true, 2, true)); doNothing().when(app).sysexit(); JobConf conf = new JobConf(); conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL, JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus"); JobImpl job = (JobImpl)app.submit(conf); app.waitForInternalState(job, JobStateInternal.SUCCEEDED); // Unregistration succeeds: successfullyUnregistered is set app.shutDownJob(); Assert.assertTrue(app.isLastAMRetry()); Assert.assertEquals(1, JobEndServlet.calledTimes); Assert.assertEquals("jobid=" + job.getID() + "&status=SUCCEEDED", JobEndServlet.requestUri.getQuery()); Assert.assertEquals(JobState.SUCCEEDED.toString(), JobEndServlet.foundJobState); server.stop(); }
@Test public void testAbsentNotificationOnNotLastRetryUnregistrationFailure() throws Exception { HttpServer2 server = startHttpServer(); MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false, this.getClass().getName(), true, 1, false)); doNothing().when(app).sysexit(); JobConf conf = new JobConf(); conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL, JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus"); JobImpl job = (JobImpl)app.submit(conf); app.waitForState(job, JobState.RUNNING); app.getContext().getEventHandler() .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT)); app.waitForInternalState(job, JobStateInternal.REBOOT); // Now shutdown. // Unregistration fails: isLastAMRetry is recalculated, this is not app.shutDownJob(); // Not the last AM attempt. So user should that the job is still running. app.waitForState(job, JobState.RUNNING); Assert.assertFalse(app.isLastAMRetry()); Assert.assertEquals(0, JobEndServlet.calledTimes); Assert.assertNull(JobEndServlet.requestUri); Assert.assertNull(JobEndServlet.foundJobState); server.stop(); }
@Test(timeout=20000) public void testStagingCleanupOrder() throws Exception { MRAppTestCleanup app = new MRAppTestCleanup(1, 1, true, this.getClass().getName(), true); JobImpl job = (JobImpl)app.submit(new Configuration()); app.waitForState(job, JobState.SUCCEEDED); app.verifyCompleted(); int waitTime = 20 * 1000; while (waitTime > 0 && app.numStops < 2) { Thread.sleep(100); waitTime -= 100; } // assert ContainerAllocatorStopped and then tagingDirCleanedup Assert.assertEquals(1, app.ContainerAllocatorStopped); Assert.assertEquals(2, app.stagingDirCleanedup); }
public ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm, JobID jobId, MRClientProtocol historyServerProxy) { this.conf = new Configuration(conf); // Cloning for modifying. // For faster redirects from AM to HS. this.conf.setInt( CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, this.conf.getInt(MRJobConfig.MR_CLIENT_TO_AM_IPC_MAX_RETRIES, MRJobConfig.DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES)); this.conf.setInt( CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, this.conf.getInt(MRJobConfig.MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS, MRJobConfig.DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS)); this.rm = rm; this.jobId = jobId; this.historyServerProxy = historyServerProxy; this.appId = TypeConverter.toYarn(jobId).getAppId(); notRunningJobs = new HashMap<JobState, HashMap<String, NotRunningJob>>(); }
private NotRunningJob getNotRunningJob(ApplicationReport applicationReport, JobState state) { synchronized (notRunningJobs) { HashMap<String, NotRunningJob> map = notRunningJobs.get(state); if (map == null) { map = new HashMap<String, NotRunningJob>(); notRunningJobs.put(state, map); } String user = (applicationReport == null) ? UNKNOWN_USER : applicationReport.getUser(); NotRunningJob notRunningJob = map.get(user); if (notRunningJob == null) { notRunningJob = new NotRunningJob(applicationReport, state); map.put(user, notRunningJob); } return notRunningJob; } }
@Override public GetJobReportResponse getJobReport(GetJobReportRequest request) throws IOException { amContact = true; JobReport jobReport = recordFactory.newRecordInstance(JobReport.class); jobReport.setJobId(request.getJobId()); jobReport.setJobState(JobState.RUNNING); jobReport.setJobName("TestClientRedirect-jobname"); jobReport.setUser("TestClientRedirect-user"); jobReport.setStartTime(0L); jobReport.setFinishTime(1L); GetJobReportResponse response = recordFactory .newRecordInstance(GetJobReportResponse.class); response.setJobReport(jobReport); return response; }
public static int fromYarn(JobState state) { switch (state) { case NEW: case INITED: return org.apache.hadoop.mapred.JobStatus.PREP; case RUNNING: return org.apache.hadoop.mapred.JobStatus.RUNNING; case KILLED: return org.apache.hadoop.mapred.JobStatus.KILLED; case SUCCEEDED: return org.apache.hadoop.mapred.JobStatus.SUCCEEDED; case FAILED: case ERROR: return org.apache.hadoop.mapred.JobStatus.FAILED; } throw new YarnRuntimeException("Unrecognized job state: " + state); }
public static JobReport newJobReport(JobId jobId, String jobName, String userName, JobState state, long submitTime, long startTime, long finishTime, float setupProgress, float mapProgress, float reduceProgress, float cleanupProgress, String jobFile, List<AMInfo> amInfos, boolean isUber, String diagnostics) { JobReport report = Records.newRecord(JobReport.class); report.setJobId(jobId); report.setJobName(jobName); report.setUser(userName); report.setJobState(state); report.setSubmitTime(submitTime); report.setStartTime(startTime); report.setFinishTime(finishTime); report.setSetupProgress(setupProgress); report.setCleanupProgress(cleanupProgress); report.setMapProgress(mapProgress); report.setReduceProgress(reduceProgress); report.setJobFile(jobFile); report.setAMInfos(amInfos); report.setIsUber(isUber); report.setDiagnostics(diagnostics); return report; }
@Test public void testEnums() throws Exception { for (YarnApplicationState applicationState : YarnApplicationState.values()) { TypeConverter.fromYarn(applicationState, FinalApplicationStatus.FAILED); } // ad hoc test of NEW_SAVING, which is newly added Assert.assertEquals(State.PREP, TypeConverter.fromYarn( YarnApplicationState.NEW_SAVING, FinalApplicationStatus.FAILED)); for (TaskType taskType : TaskType.values()) { TypeConverter.fromYarn(taskType); } for (JobState jobState : JobState.values()) { TypeConverter.fromYarn(jobState); } for (QueueState queueState : QueueState.values()) { TypeConverter.fromYarn(queueState); } for (TaskState taskState : TaskState.values()) { TypeConverter.fromYarn(taskState); } }
@Test public void testFromYarnJobReport() throws Exception { int jobStartTime = 612354; int jobFinishTime = 612355; JobState state = JobState.RUNNING; JobId jobId = Records.newRecord(JobId.class); JobReport jobReport = Records.newRecord(JobReport.class); ApplicationId applicationId = ApplicationId.newInstance(0, 0); jobId.setAppId(applicationId); jobId.setId(0); jobReport.setJobId(jobId); jobReport.setJobState(state); jobReport.setStartTime(jobStartTime); jobReport.setFinishTime(jobFinishTime); jobReport.setUser("TestTypeConverter-user"); JobStatus jobStatus = TypeConverter.fromYarn(jobReport, "dummy-jobfile"); Assert.assertEquals(jobStartTime, jobStatus.getStartTime()); Assert.assertEquals(jobFinishTime, jobStatus.getFinishTime()); Assert.assertEquals(state.toString(), jobStatus.getState().toString()); }
private Job getJob() { Job job = mock(Job.class); JobId jobId = new JobIdPBImpl(); ApplicationId appId = ApplicationIdPBImpl.newInstance(System.currentTimeMillis(),4); jobId.setAppId(appId); jobId.setId(1); when(job.getID()).thenReturn(jobId); JobReport report = mock(JobReport.class); when(report.getStartTime()).thenReturn(100010L); when(report.getFinishTime()).thenReturn(100015L); when(job.getReport()).thenReturn(report); when(job.getName()).thenReturn("JobName"); when(job.getUserName()).thenReturn("UserName"); when(job.getQueueName()).thenReturn("QueueName"); when(job.getState()).thenReturn(JobState.SUCCEEDED); when(job.getTotalMaps()).thenReturn(3); when(job.getCompletedMaps()).thenReturn(2); when(job.getTotalReduces()).thenReturn(2); when(job.getCompletedReduces()).thenReturn(1); when(job.getCompletedReduces()).thenReturn(1); return job; }
@Test (timeout=100000) public void testCompletedJob() throws Exception { HistoryFileInfo info = mock(HistoryFileInfo.class); when(info.getConfFile()).thenReturn(fullConfPath); //Re-initialize to verify the delayed load. completedJob = new CompletedJob(conf, jobId, fullHistoryPath, loadTasks, "user", info, jobAclsManager); //Verify tasks loaded based on loadTask parameter. assertEquals(loadTasks, completedJob.tasksLoaded.get()); assertEquals(1, completedJob.getAMInfos().size()); assertEquals(10, completedJob.getCompletedMaps()); assertEquals(1, completedJob.getCompletedReduces()); assertEquals(12, completedJob.getTasks().size()); //Verify tasks loaded at this point. assertEquals(true, completedJob.tasksLoaded.get()); assertEquals(10, completedJob.getTasks(TaskType.MAP).size()); assertEquals(2, completedJob.getTasks(TaskType.REDUCE).size()); assertEquals("user", completedJob.getUserName()); assertEquals(JobState.SUCCEEDED, completedJob.getState()); JobReport jobReport = completedJob.getReport(); assertEquals("user", jobReport.getUser()); assertEquals(JobState.SUCCEEDED, jobReport.getJobState()); }