private void waitTillRunState(JobInfo jInfo, JobID jobID, JTProtocol remoteJTClient) throws Exception { int count = 0; while (jInfo != null && jInfo.getStatus().getRunState() != JobStatus.RUNNING) { UtilsForTests.waitFor(10000); count++; jInfo = remoteJTClient.getJobInfo(jobID); //If the count goes beyond 100 seconds, then break; This is to avoid //infinite loop. if (count > 10) { Assert.fail("job has not reached running state for more than" + "100 seconds. Failing at this point"); } } }
/** Black List more than 25 % of task trackers , run the high ram * job and make sure that no exception is thrown. * @throws Exception If fails to blacklist TT or run high ram high */ @Test public void testHiRamJobBlackListedTaskTrackers() throws Exception { final HighRamJobHelper hRamHelper = new HighRamJobHelper(); List<TTClient> bListedTT = new ArrayList<TTClient>(); List<TTClient> tClient = cluster.getTTClients(); int count = tClient.size(); int moreThan25Per = count / 4 +1; LOG.info ("More than 25 % of TTclient is "+moreThan25Per); for (int i=0; i < moreThan25Per ; ++i) { TTClient client = tClient.get(i); bListedTT.add(client); blackListTT(client); } //Now run the high ram job JobClient jobClient = cluster.getJTClient().getClient(); JTProtocol remoteJTClient = cluster.getJTClient().getProxy(); Configuration conf = remoteJTClient.getDaemonConf(); hRamHelper.runHighRamJob(conf, jobClient, remoteJTClient, "Job did not succeed"); //put the task tracker back in healthy state for( int i =0; i < bListedTT.size() ; ++i) { unBlackListTT(bListedTT.get(i)); } }
/** * The method runs the high ram job * @param conf configuration for unning the job * @param jobClient instance * @param remoteJTClient instance * @return the job id of the high ram job * @throws Exception is thrown when the method fails to run the high ram job */ public JobID runHighRamJob (Configuration conf, JobClient jobClient, JTProtocol remoteJTClient,String assertMessage) throws Exception { SleepJob job = new SleepJob(); String jobArgs []= {"-D","mapred.cluster.max.map.memory.mb=2048", "-D","mapred.cluster.max.reduce.memory.mb=2048", "-D","mapred.cluster.map.memory.mb=1024", "-D","mapreduce.job.complete.cancel.delegation.tokens=false", "-D","mapred.cluster.reduce.memory.mb=1024", "-m", "6", "-r", "2", "-mt", "2000", "-rt", "2000", "-recordt","100"}; JobConf jobConf = new JobConf(conf); jobConf.setMemoryForMapTask(2048); jobConf.setMemoryForReduceTask(2048); int exitCode = ToolRunner.run(jobConf, job, jobArgs); Assert.assertEquals("Exit Code:", 0, exitCode); UtilsForTests.waitFor(1000); JobID jobId = jobClient.getAllJobs()[0].getJobID(); JobInfo jInfo = remoteJTClient.getJobInfo(jobId); Assert.assertEquals(assertMessage, jInfo.getStatus().getRunState(), JobStatus.SUCCEEDED); return jobId; }
private TaskInfo getTaskInfo(JobID jobId, boolean isMap) throws IOException { JTProtocol wovenClient = cluster.getJTClient().getProxy(); JobInfo jInfo = wovenClient.getJobInfo(jobId); TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId); for (TaskInfo taskinfo : taskInfos) { if (!taskinfo.isSetupOrCleanup()) { if (taskinfo.getTaskID().isMap() == isMap) { return taskinfo; } } } return null; }
private void runAndVerify(Configuration job, Tool tool, String[] args) throws Exception { // This calculates the previous number fo jobs submitted before a new // job gets submitted. int prevJobsNum = 0; // JTProtocol wovenClient JTProtocol wovenClient = cluster.getJTClient().getProxy(); // JobStatus JobStatus[] jobStatus = null; // JobID JobID id = null; // RunningJob rJob; RunningJob rJob = null; // JobInfo jInfo; JobInfo jInfo = null; //Getting the previous job numbers that are submitted. jobStatus = client.getAllJobs(); prevJobsNum = jobStatus.length; // Run RandomWriter Assert.assertEquals(ToolRunner.run(job, tool, args), 0); //Waiting for the job to appear in the jobstatus jobStatus = client.getAllJobs(); while (jobStatus.length - prevJobsNum == 0) { LOG.info("Waiting for the job to appear in the jobStatus"); Thread.sleep(1000); jobStatus = client.getAllJobs(); } //Getting the jobId of the just submitted job //The just submitted job is always added in the first slot of jobstatus id = jobStatus[0].getJobID(); rJob = client.getJob(id); jInfo = wovenClient.getJobInfo(id); //Making sure that the job is complete. while (jInfo != null && !jInfo.getStatus().isJobComplete()) { Thread.sleep(10000); jInfo = wovenClient.getJobInfo(id); } cluster.getJTClient().verifyCompletedJob(id); }
/** * This tests the corrupted disk. If a disk does not exist, still * the job should run successfully. */ @Test public void testCorruptedDiskJob() throws Exception { // Scale down the default settings for RandomWriter for the test-case // Generates NUM_HADOOP_SLAVES * RW_MAPS_PER_HOST * RW_BYTES_PER_MAP conf.setInt("test.randomwrite.bytes_per_map", RW_BYTES_PER_MAP); conf.setInt("test.randomwriter.maps_per_host", RW_MAPS_PER_HOST); String[] rwArgs = {inputDir.toString()}; // JTProtocol remoteJTClient JTProtocol remoteJTClient = cluster.getJTClient().getProxy(); // JobInfo jInfo; JobInfo jInfo = null; dfs.delete(inputDir, true); // Run RandomWriter Assert.assertEquals(ToolRunner.run(conf, new RandomWriter(), rwArgs), 0); jobStatus = client.getAllJobs(); JobID id = null; //Getting the jobId of the just submitted job id = jobStatus[0].getJobID(); LOG.info("jobid is :" + id.toString()); Assert.assertTrue("Failed to complete the job", cluster.getJTClient().isJobStopped(id)); jInfo = remoteJTClient.getJobInfo(id); JobStatus jStatus = jInfo.getStatus(); if (jStatus != null) { Assert.assertEquals("Job has not succeeded...", JobStatus.SUCCEEDED, jStatus.getRunState()); } }
/** * Increase the memory limit for map task and verify whether the * task manager logs the process tree status before killing or not. * @throws IOException - If an I/O error occurs. */ @Test public void testStreamingJobProcTreeCleanOfMapTask() throws IOException { String runtimeArgs [] = { "-D", "mapred.job.name=ProcTreeStreamJob", "-D", "mapred.map.tasks=1", "-D", "mapred.reduce.tasks=0", "-D", "mapred.map.max.attempts=1", "-D", "mapred.cluster.max.map.memory.mb=2048", "-D", "mapred.cluster.reduce.memory.mb=1024", "-D", "mapred.cluster.max.reduce.memory.mb=2048", "-D", "mapred.cluster.map.memory.mb=1024", "-D", "mapred.job.map.memory.mb=512" }; String [] otherArgs = new String[] { "-input", inputDir.toString(), "-output", outputDir.toString(), "-mapper", "ProcessTree.sh", }; JobID jobId = getJobId(runtimeArgs, otherArgs); LOG.info("Job ID:" + jobId); if (jobId == null) { jobId = getJobId(runtimeArgs, otherArgs); } Assert.assertNotNull("Job ID not found for 1 min", jobId); Assert.assertTrue("Job has not been started for 1 min.", cluster.getJTClient().isJobStarted(jobId)); TaskInfo taskInfo = getTaskInfo(jobId, true); Assert.assertNotNull("TaskInfo is null",taskInfo); Assert.assertTrue("Task has not been started for 1 min.", cluster.getJTClient().isTaskStarted(taskInfo)); JTProtocol wovenClient = cluster.getJTClient().getProxy(); int counter = 0; TaskInfo tempTaskInfo; while (counter++ < 60) { if (taskInfo.getTaskStatus().length == 0) { UtilsForTests.waitFor(1000); tempTaskInfo = taskInfo; taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID()); }else if (taskInfo.getTaskStatus()[0].getRunState() == TaskStatus.State.RUNNING) { UtilsForTests.waitFor(1000); tempTaskInfo = taskInfo; taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID()); } else { break; } if (taskInfo == null) { taskInfo = tempTaskInfo; break; } } verifyProcessTreeOverLimit(taskInfo,jobId); JobInfo jInfo = wovenClient.getJobInfo(jobId); LOG.info("Waiting till the job is completed..."); counter = 0; while (counter++ < 60) { if (jInfo == null) { break; } else if (jInfo.getStatus().isJobComplete()) { break; } UtilsForTests.waitFor(100); jInfo = wovenClient.getJobInfo(jobId); } UtilsForTests.waitFor(1000); }
/** * Increase the memory limit for reduce task and verify whether the * task manager logs the process tree status before killing or not. * @throws IOException - If an I/O error occurs. */ @Test public void testStreamingJobProcTreeCleanOfReduceTask() throws IOException { String runtimeArgs [] = { "-D", "mapred.job.name=ProcTreeStreamJob", "-D", "mapred.reduce.tasks=1", "-D", "mapred.map.tasks=1", "-D", "mapred.reduce.max.attempts=1", "-D", "mapred.cluster.max.map.memory.mb=2048", "-D", "mapred.cluster.map.memory.mb=1024", "-D", "mapred.cluster.max.reduce.memory.mb=20248", "-D", "mapred.cluster.reduce.memory.mb=1024", "-D", "mapred.job.reduce.memory.mb=512"}; String [] otherArgs = new String[] { "-input", inputDir.toString(), "-output", outputDir.toString(), "-mapper", "/bin/cat", "-reducer", "ProcessTree.sh" }; cleanup(outputDir, conf); JobID jobId = getJobId(runtimeArgs, otherArgs); if (jobId == null) { jobId = getJobId(runtimeArgs, otherArgs); } Assert.assertNotNull("Job ID not found for 1 min", jobId); Assert.assertTrue("Job has not been started for 1 min.", cluster.getJTClient().isJobStarted(jobId)); TaskInfo taskInfo = getTaskInfo(jobId, false); Assert.assertNotNull("TaskInfo is null",taskInfo); Assert.assertTrue("Task has not been started for 1 min.", cluster.getJTClient().isTaskStarted(taskInfo)); JTProtocol wovenClient = cluster.getJTClient().getProxy(); int counter = 0; TaskInfo tempTaskInfo; while (counter++ < 60) { if (taskInfo.getTaskStatus().length == 0) { UtilsForTests.waitFor(1000); tempTaskInfo = taskInfo; taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID()); }else if (taskInfo.getTaskStatus()[0].getRunState() == TaskStatus.State.RUNNING) { UtilsForTests.waitFor(1000); tempTaskInfo = taskInfo; taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID()); } else { break; } if (taskInfo == null) { taskInfo = tempTaskInfo; break; } } verifyProcessTreeOverLimit(taskInfo,jobId); JobInfo jInfo = wovenClient.getJobInfo(jobId); LOG.info("Waiting till the job is completed..."); counter = 0; while (counter++ < 60) { if(jInfo == null) { break; } else if (jInfo.getStatus().isJobComplete()) { break; } UtilsForTests.waitFor(1000); jInfo = wovenClient.getJobInfo(jobId); } }