private void waitTillRunState(JobInfo jInfo, JobID jobID, JTProtocol remoteJTClient) throws Exception { int count = 0; while (jInfo != null && jInfo.getStatus().getRunState() != JobStatus.RUNNING) { UtilsForTests.waitFor(10000); count++; jInfo = remoteJTClient.getJobInfo(jobID); //If the count goes beyond 100 seconds, then break; This is to avoid //infinite loop. if (count > 10) { Assert.fail("job has not reached running state for more than" + "100 seconds. Failing at this point"); } } }
/** * Verifying the job summary information for killed job. */ @Test public void testJobSummaryInfoOfKilledJob() throws IOException, InterruptedException { SleepJob job = new SleepJob(); conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); job.setConf(conf); conf = job.setupJobConf(2, 1, 4000, 4000, 100, 100); JobConf jobConf = new JobConf(conf); RunningJob runJob = jobClient.submitJob(jobConf); JobID jobId = runJob.getID(); Assert.assertTrue("Job has not been started for 1 min.", jtClient.isJobStarted(jobId)); jobClient.killJob(jobId); LOG.info("Waiting till the job is completed..."); Assert.assertTrue("Job has not been completed for 1 min.", jtClient.isJobStopped(jobId)); JobInfo jInfo = remoteJTClient.getJobInfo(jobId); Assert.assertEquals("Job has not been succeeded", jInfo.getStatus().getRunState(), JobStatus.KILLED); verifyJobSummaryInfo(jInfo,jobId); }
private void verifyJobSummaryInfo(JobInfo jInfo, JobID id) throws IOException { java.util.HashMap<String,String> map = jtClient.getJobSummary(id); Assert.assertEquals("Job id has not been matched", id.toString(), map.get("jobId")); Assert.assertEquals("User name has not been matched in JobSummary", jInfo.getStatus().getUsername(), map.get("user")); Assert.assertEquals("StartTime has not been matched in JobSummary", String.valueOf(jInfo.getStatus().getStartTime()), map.get("startTime")); Assert.assertEquals("LaunchTime has not been matched in JobSummary", String.valueOf(jInfo.getLaunchTime()), map.get("launchTime")); Assert.assertEquals("FinshedTime has not been matched in JobSummary", String.valueOf(jInfo.getFinishTime()), map.get("finishTime")); Assert.assertEquals("Maps are not matched in Job summary", String.valueOf(jInfo.numMaps()) , map.get("numMaps")); Assert.assertEquals("Reducers are not matched in Job summary", String.valueOf(jInfo.numReduces()), map.get("numReduces")); Assert.assertEquals("Number of slots per map is not matched in Job summary", String.valueOf(jInfo.getNumSlotsPerMap()), map.get("numSlotsPerMap")); Assert.assertEquals("Number of slots per reduce is not matched in Job summary", String.valueOf(jInfo.getNumSlotsPerReduce()), map.get("numSlotsPerReduce")); }
public JobID runSleepJob(boolean signalJob) throws Exception{ SleepJob job = new SleepJob(); job.setConf(conf); conf = job.setupJobConf(5, 1, 100, 5, 100, 5); JobConf jconf = new JobConf(conf); //Controls the job till all verification is done FinishTaskControlAction.configureControlActionForJob(conf); //Submitting the job RunningJob rJob = cluster.getJTClient().getClient().submitJob(jconf); JobID jobId = rJob.getID(); JobInfo jInfo = remoteJTClient.getJobInfo(jobId); LOG.info("jInfo is :" + jInfo); boolean jobStarted = cluster.getJTClient().isJobStarted(jobId); Assert.assertTrue("Job has not started even after a minute", jobStarted ); if(signalJob) { cluster.signalAllTasks(jobId); Assert.assertTrue("Job has not stopped yet", cluster.getJTClient().isJobStopped(jobId)); } return jobId; }
/** * The method runs the high ram job * @param conf configuration for unning the job * @param jobClient instance * @param remoteJTClient instance * @return the job id of the high ram job * @throws Exception is thrown when the method fails to run the high ram job */ public JobID runHighRamJob (Configuration conf, JobClient jobClient, JTProtocol remoteJTClient,String assertMessage) throws Exception { SleepJob job = new SleepJob(); String jobArgs []= {"-D","mapred.cluster.max.map.memory.mb=2048", "-D","mapred.cluster.max.reduce.memory.mb=2048", "-D","mapred.cluster.map.memory.mb=1024", "-D","mapreduce.job.complete.cancel.delegation.tokens=false", "-D","mapred.cluster.reduce.memory.mb=1024", "-m", "6", "-r", "2", "-mt", "2000", "-rt", "2000", "-recordt","100"}; JobConf jobConf = new JobConf(conf); jobConf.setMemoryForMapTask(2048); jobConf.setMemoryForReduceTask(2048); int exitCode = ToolRunner.run(jobConf, job, jobArgs); Assert.assertEquals("Exit Code:", 0, exitCode); UtilsForTests.waitFor(1000); JobID jobId = jobClient.getAllJobs()[0].getJobID(); JobInfo jInfo = remoteJTClient.getJobInfo(jobId); Assert.assertEquals(assertMessage, jInfo.getStatus().getRunState(), JobStatus.SUCCEEDED); return jobId; }
private TaskInfo getTaskInfoOfRunningStreamJob(JobID jobId) throws IOException { TaskInfo taskInfo = null; wovenClient = cluster.getJTClient().getProxy(); JobInfo jInfo = wovenClient.getJobInfo(jobId); JobStatus jobStatus = jInfo.getStatus(); // Make sure that map is running and start progress 10%. while (jobStatus.mapProgress() < 0.1f) { UtilsForTests.waitFor(100); jobStatus = wovenClient.getJobInfo(jobId).getStatus(); } TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId); for (TaskInfo taskinfo : taskInfos) { if (!taskinfo.isSetupOrCleanup()) { taskInfo = taskinfo; } } return taskInfo; }
/** * Verifying the job summary information for failed job. */ @Test public void testJobSummaryInfoOfFailedJob() throws IOException, InterruptedException { conf = remoteJTClient.getDaemonConf(); conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); JobConf jobConf = new JobConf(conf); jobConf.setJobName("Fail Job"); jobConf.setJarByClass(GenerateTaskChildProcess.class); jobConf.setMapperClass(GenerateTaskChildProcess.FailMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setNumMapTasks(1); jobConf.setNumReduceTasks(1); cleanup(outputDir, conf); FileInputFormat.setInputPaths(jobConf, inputDir); FileOutputFormat.setOutputPath(jobConf, outputDir); RunningJob runJob = jobClient.submitJob(jobConf); JobID jobId = runJob.getID(); Assert.assertTrue("Job has not been started for 1 min.", jtClient.isJobStarted(jobId)); LOG.info("Waiting till the job is completed..."); Assert.assertTrue("Job has not been completed for 1 min.", jtClient.isJobStopped(jobId)); JobInfo jInfo = remoteJTClient.getJobInfo(jobId); Assert.assertEquals("Job has not been failed", jInfo.getStatus().getRunState(), JobStatus.FAILED); verifyJobSummaryInfo(jInfo,jobId); }
/** * Submit the job in different queue and verifying * the job queue information in job summary * after job is completed. */ @Test public void testJobQueueInfoInJobSummary() throws IOException, InterruptedException { SleepJob job = new SleepJob(); job.setConf(conf); conf = job.setupJobConf(2, 1, 4000, 4000, 100, 100); conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); JobConf jobConf = new JobConf(conf); JobQueueInfo [] queues = jobClient.getQueues(); for (JobQueueInfo queueInfo : queues ){ if (!queueInfo.getQueueName().equals("default")) { queueName = queueInfo.getQueueName(); break; } } Assert.assertNotNull("No multiple queues in the cluster.",queueName); LOG.info("queueName:" + queueName); jobConf.setQueueName(queueName); RunningJob runJob = jobClient.submitJob(jobConf); JobID jobId = runJob.getID(); Assert.assertTrue("Job has not been started for 1 min.", jtClient.isJobStarted(jobId)); LOG.info("Waiting till the job is completed..."); Assert.assertTrue("Job has not been completed for 1 min.", jtClient.isJobStopped(jobId)); JobInfo jInfo = remoteJTClient.getJobInfo(jobId); Assert.assertEquals("Job has not been succeeded", jInfo.getStatus().getRunState(), JobStatus.SUCCEEDED); verifyJobSummaryInfo(jInfo,jobId); }
/** * Verify the job summary information for high RAM jobs. */ @Test public void testJobSummaryInfoOfHighMemoryJob() throws IOException, Exception { final HighRamJobHelper helper = new HighRamJobHelper(); JobID jobId = helper.runHighRamJob(conf, jobClient, remoteJTClient, "Job did not succeed"); JobInfo jInfo = remoteJTClient.getJobInfo(jobId); verifyJobSummaryInfo(jInfo,jobId); }
@Test public void testJobSummaryInfoForDifferentUser() throws Exception { UserGroupInformation proxyUGI; UserGroupInformation ugi = UserGroupInformation.getLoginUser(); ArrayList<String> users = cluster.getHadoopMultiUsersList(); Assert.assertTrue("proxy users are not found.", users.size() > 0); if (conf.get("hadoop.security.authentication").equals("simple")) { proxyUGI = UserGroupInformation.createRemoteUser( users.get(0)); } else { proxyUGI = UserGroupInformation.createProxyUser( users.get(0), ugi); } SleepJob job = new SleepJob(); job.setConf(conf); final JobConf jobConf = job.setupJobConf(2, 1, 2000, 2000, 100, 100); final JobClient jClient = proxyUGI.doAs(new PrivilegedExceptionAction<JobClient>() { public JobClient run() throws IOException { return new JobClient(jobConf); } }); RunningJob runJob = proxyUGI.doAs( new PrivilegedExceptionAction<RunningJob>() { public RunningJob run() throws IOException { return jClient.submitJob(jobConf); } }); JobID jobId = runJob.getID(); Assert.assertTrue("Job has not been started for 1 min.", jtClient.isJobStarted(jobId)); LOG.info("Waiting till the job is completed..."); Assert.assertTrue("Job has not been completed for 1 min.", jtClient.isJobStopped(jobId)); JobInfo jInfo = remoteJTClient.getJobInfo(jobId); Assert.assertEquals("Job has not been succeeded", jInfo.getStatus().getRunState(), JobStatus.SUCCEEDED); verifyJobSummaryInfo(jInfo,jobId); }
/** * Set the invalid mapred local directory location and run the job. * Verify the job status. * @throws Exception - if an error occurs. */ @Test public void testJobStatusForInvalidTaskControllerConf() throws Exception { conf = remoteJTClient.getDaemonConf(); if (conf.get("mapred.task.tracker.task-controller"). equals("org.apache.hadoop.mapred.LinuxTaskController")) { StringBuffer mapredLocalDir = new StringBuffer(); LOG.info("JobConf.MAPRED_LOCAL_DIR_PROPERTY:" + conf.get(JobConf.MAPRED_LOCAL_DIR_PROPERTY)); mapredLocalDir.append(conf.get(JobConf.MAPRED_LOCAL_DIR_PROPERTY)); mapredLocalDir.append(","); mapredLocalDir.append("/mapred/local"); String jobArgs []= {"-D","mapred.local.dir=" + mapredLocalDir.toString(), "-m", "1", "-r", "1", "-mt", "1000", "-rt", "1000", "-recordt","100"}; SleepJob job = new SleepJob(); JobConf jobConf = new JobConf(conf); int exitStatus = ToolRunner.run(jobConf, job, jobArgs); Assert.assertEquals("Exit Code:", 0, exitStatus); UtilsForTests.waitFor(100); JobClient jobClient = jtClient.getClient(); JobID jobId =jobClient.getAllJobs()[0].getJobID(); LOG.info("JobId:" + jobId); if (jobId != null) { JobInfo jInfo = remoteJTClient.getJobInfo(jobId); Assert.assertEquals("Job has not been succeeded", jInfo.getStatus().getRunState(), JobStatus.SUCCEEDED); } } else { Assert.assertTrue("Linux Task controller not found.", false); } }
/** * Verifying the running job status whether it succeeds or not * after failing some of its tasks. */ @Test public void testFailedTaskJobStatus() throws IOException, InterruptedException { conf = remoteJTClient.getDaemonConf(); TaskInfo taskInfo = null; SleepJob job = new SleepJob(); job.setConf(conf); JobConf jobConf = job.setupJobConf(1, 1, 10000, 4000, 100, 100); RunningJob runJob = jobClient.submitJob(jobConf); JobID jobId = runJob.getID(); JobInfo jInfo = remoteJTClient.getJobInfo(jobId); Assert.assertTrue("Job has not been started for 1 min.", jtClient.isJobStarted(jobId)); TaskInfo[] taskInfos = remoteJTClient.getTaskInfo(jobId); for (TaskInfo taskinfo : taskInfos) { if (!taskinfo.isSetupOrCleanup() && taskinfo.getTaskID().isMap()) { taskInfo = taskinfo; break; } } Assert.assertTrue("Task has not been started for 1 min.", jtClient.isTaskStarted(taskInfo)); // Fail the running task. NetworkedJob networkJob = jobClient.new NetworkedJob(jInfo.getStatus()); TaskID tID = TaskID.downgrade(taskInfo.getTaskID()); TaskAttemptID taskAttID = new TaskAttemptID(tID , 0); networkJob.killTask(taskAttID, true); LOG.info("Waiting till the job is completed..."); while (!jInfo.getStatus().isJobComplete()) { UtilsForTests.waitFor(100); jInfo = remoteJTClient.getJobInfo(jobId); } Assert.assertEquals("JobStatus", JobStatus.SUCCEEDED, jInfo.getStatus().getRunState()); }
public void checkTaskCompletionEvent (TaskAttemptID taskAttemptID, JobInfo jInfo) throws Exception { boolean match = false; int count = 0; while (!match) { TaskCompletionEvent[] taskCompletionEvents = jobClient.new NetworkedJob(jInfo.getStatus()).getTaskCompletionEvents(0); for (TaskCompletionEvent taskCompletionEvent : taskCompletionEvents) { LOG.info("taskCompletionEvent.getTaskAttemptId().toString() is : " + taskCompletionEvent.getTaskAttemptId().toString()); LOG.info("compared to taskAttemptID.toString() :" + taskAttemptID.toString()); if ((taskCompletionEvent.getTaskAttemptId().toString()). equals(taskAttemptID.toString())){ match = true; //Sleeping for 10 seconds giving time for the next task //attempt to run Thread.sleep(10000); break; } } if (!match) { LOG.info("Thread is sleeping for 10 seconds"); Thread.sleep(10000); count++; } //If the count goes beyond a point, then break; This is to avoid //infinite loop under unforeseen circumstances.Testcase will anyway //fail later. if (count > 10) { Assert.fail("Since the task attemptid is not appearing in the" + "TaskCompletionEvent, it seems this task attempt was not killed"); } } }
/** * Run the job with two distributed cache files and verify * whether job is succeeded or not. * @throws Exception */ @Test public void testCacheFilesLocalization() throws Exception { conf = wovenClient.getDaemonConf(); SleepJob job = new SleepJob(); job.setConf(conf); JobConf jobConf = job.setupJobConf(4, 1, 4000, 4000, 1000, 1000); DistributedCache.createSymlink(jobConf); DistributedCache.addCacheFile(cacheFileURI1, jobConf); DistributedCache.addCacheFile(cacheFileURI2, jobConf); RunningJob runJob = jobClient.submitJob(jobConf); JobID jobId = runJob.getID(); Assert.assertTrue("Job has not been started for 1 min.", jtClient.isJobStarted(jobId)); TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId); Assert.assertTrue("Cache File1 has not been localize", checkLocalization(taskInfos,cacheFile1)); Assert.assertTrue("Cache File2 has not been localize", checkLocalization(taskInfos,cacheFile2)); JobInfo jInfo = wovenClient.getJobInfo(jobId); LOG.info("Waiting till the job is completed..."); while (!jInfo.getStatus().isJobComplete()) { UtilsForTests.waitFor(100); jInfo = wovenClient.getJobInfo(jobId); } Assert.assertEquals("Job has not been succeeded", jInfo.getStatus().getRunState(), JobStatus.SUCCEEDED); }
/** * Run the job with distributed cache files and remove one cache * file from the DFS when it is localized.verify whether the job * is failed or not. * @throws Exception */ @Test public void testDeleteCacheFileInDFSAfterLocalized() throws Exception { conf = wovenClient.getDaemonConf(); SleepJob job = new SleepJob(); job.setConf(conf); JobConf jobConf = job.setupJobConf(4, 1, 4000, 4000, 1000, 1000); cacheFileURI3 = createCacheFile(tmpFolderPath, cacheFile3); DistributedCache.createSymlink(jobConf); DistributedCache.addCacheFile(cacheFileURI3, jobConf); RunningJob runJob = jobClient.submitJob(jobConf); JobID jobId = runJob.getID(); Assert.assertTrue("Job has not been started for 1 min.", jtClient.isJobStarted(jobId)); TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId); boolean iscacheFileLocalized = checkLocalization(taskInfos,cacheFile3); Assert.assertTrue("CacheFile has not been localized", iscacheFileLocalized); deleteCacheFile(new Path(tmpFolderPath, cacheFile3)); JobInfo jInfo = wovenClient.getJobInfo(jobId); LOG.info("Waiting till the job is completed..."); while (!jInfo.getStatus().isJobComplete()) { UtilsForTests.waitFor(100); jInfo = wovenClient.getJobInfo(jobId); } Assert.assertEquals("Job has not been failed", jInfo.getStatus().getRunState(), JobStatus.FAILED); }
/** * Submit a job and create folders and files in work folder with * non-writable permissions under task attempt id folder. * Kill the job and verify whether the files and folders * are cleaned up or not. * @throws IOException */ @Test public void testJobCleanupAfterJobKill() throws IOException { HashMap<TTClient,ArrayList<String>> map = new HashMap<TTClient,ArrayList<String>>(); JobID jobId = createJobAndSubmit().getID(); Assert.assertTrue("Job has not been started for 1 min", jtClient.isJobStarted(jobId)); TaskInfo [] taskInfos = rtClient.getTaskInfo(jobId); for (TaskInfo taskinfo : taskInfos) { if (!taskinfo.isSetupOrCleanup()) { Assert.assertTrue("Task has not been started for 1 min ", jtClient.isTaskStarted(taskinfo)); String tasktracker = getTaskTracker(taskinfo); Assert.assertNotNull("TaskTracker has not been found", tasktracker); TTClient ttclient = getTTClient(tasktracker); map.put(ttClient, getTTClientMapRedLocalDirs(ttClient, taskinfo, jobId)); } } jtClient.getClient().killJob(jobId); LOG.info("Waiting till the job is completed..."); Assert.assertTrue("Job has not been completed for 1 min", jtClient.isJobStopped(jobId)); JobInfo jobInfo = rtClient.getJobInfo(jobId); Assert.assertEquals("Job has not been killed", jobInfo.getStatus().getRunState(), JobStatus.KILLED); UtilsForTests.waitFor(3000); Assert.assertTrue("Job directories have not been cleaned up properly " + "after completion of job", verifyJobDirectoryCleanup(map)); }