private TTClient getTTClientIns(TaskInfo taskInfo) throws IOException{ String [] taskTrackers = taskInfo.getTaskTrackers(); int counter = 0; TTClient ttClient = null; while (counter < 60) { if (taskTrackers.length != 0) { break; } UtilsForTests.waitFor(100); taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID()); taskTrackers = taskInfo.getTaskTrackers(); counter ++; } if ( taskTrackers.length != 0) { String hostName = taskTrackers[0].split("_")[1]; hostName = hostName.split(":")[0]; ttClient = cluster.getTTClient(hostName); } return ttClient; }
private boolean checkLocalization(TaskInfo[] taskInfos, String cacheFile) throws Exception { boolean iscacheFileLocalized = false; for (TaskInfo taskinfo : taskInfos) { if (!taskinfo.isSetupOrCleanup()) { String[] taskTrackers = taskinfo.getTaskTrackers(); List<TTClient> ttList = getTTClients(taskTrackers); for (TTClient ttClient : ttList) { iscacheFileLocalized = checkCacheFile(ttClient,cacheFile); if(iscacheFileLocalized) { return true; } } } } return false; }
private static String getTaskTracker(TaskInfo taskInfo) throws IOException { String taskTracker = null; String taskTrackers [] = taskInfo.getTaskTrackers(); int counter = 0; while (counter < 30) { if (taskTrackers.length != 0) { taskTracker = taskTrackers[0]; break; } UtilsForTests.waitFor(1000); taskInfo = rtClient.getTaskInfo(taskInfo.getTaskID()); taskTrackers = taskInfo.getTaskTrackers(); counter ++; } return taskTracker; }
private void verifyProcessTreeOverLimit(TaskInfo taskInfo, JobID jobId) throws IOException { String taskOverLimitPatternString = "TaskTree \\[pid=[0-9]*,tipID=.*\\] is " + "running beyond memory-limits. " + "Current usage : [0-9]*bytes. Limit : %sbytes. Killing task."; Pattern taskOverLimitPattern = Pattern.compile(String.format(taskOverLimitPatternString, String.valueOf(512 * 1024 * 1024L))); LOG.info("Task OverLimit Pattern:" + taskOverLimitPattern); TaskID tID = TaskID.downgrade(taskInfo.getTaskID()); TaskAttemptID taskAttID = new TaskAttemptID(tID, 0); JobClient jobClient = cluster.getJTClient().getClient(); RunningJob runJob = jobClient.getJob(jobId); String[] taskDiagnostics = runJob.getTaskDiagnostics(taskAttID); Assert.assertNotNull("Task diagnostics is null.", taskDiagnostics); for (String strVal : taskDiagnostics) { Matcher mat = taskOverLimitPattern.matcher(strVal); Assert.assertTrue("Taskover limit error message is not matched.", mat.find()); } }
private TaskInfo getTaskInfoOfRunningStreamJob(JobID jobId) throws IOException { TaskInfo taskInfo = null; wovenClient = cluster.getJTClient().getProxy(); JobInfo jInfo = wovenClient.getJobInfo(jobId); JobStatus jobStatus = jInfo.getStatus(); // Make sure that map is running and start progress 10%. while (jobStatus.mapProgress() < 0.1f) { UtilsForTests.waitFor(100); jobStatus = wovenClient.getJobInfo(jobId).getStatus(); } TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId); for (TaskInfo taskinfo : taskInfos) { if (!taskinfo.isSetupOrCleanup()) { taskInfo = taskinfo; } } return taskInfo; }
/** * Verifying the running job status whether it succeeds or not * after failing some of its tasks. */ @Test public void testFailedTaskJobStatus() throws IOException, InterruptedException { conf = remoteJTClient.getDaemonConf(); TaskInfo taskInfo = null; SleepJob job = new SleepJob(); job.setConf(conf); JobConf jobConf = job.setupJobConf(1, 1, 10000, 4000, 100, 100); RunningJob runJob = jobClient.submitJob(jobConf); JobID jobId = runJob.getID(); JobInfo jInfo = remoteJTClient.getJobInfo(jobId); Assert.assertTrue("Job has not been started for 1 min.", jtClient.isJobStarted(jobId)); TaskInfo[] taskInfos = remoteJTClient.getTaskInfo(jobId); for (TaskInfo taskinfo : taskInfos) { if (!taskinfo.isSetupOrCleanup() && taskinfo.getTaskID().isMap()) { taskInfo = taskinfo; break; } } Assert.assertTrue("Task has not been started for 1 min.", jtClient.isTaskStarted(taskInfo)); // Fail the running task. NetworkedJob networkJob = jobClient.new NetworkedJob(jInfo.getStatus()); TaskID tID = TaskID.downgrade(taskInfo.getTaskID()); TaskAttemptID taskAttID = new TaskAttemptID(tID , 0); networkJob.killTask(taskAttID, true); LOG.info("Waiting till the job is completed..."); while (!jInfo.getStatus().isJobComplete()) { UtilsForTests.waitFor(100); jInfo = remoteJTClient.getJobInfo(jobId); } Assert.assertEquals("JobStatus", JobStatus.SUCCEEDED, jInfo.getStatus().getRunState()); }
/** * Run the job with two distributed cache files and verify * whether job is succeeded or not. * @throws Exception */ @Test public void testCacheFilesLocalization() throws Exception { conf = wovenClient.getDaemonConf(); SleepJob job = new SleepJob(); job.setConf(conf); JobConf jobConf = job.setupJobConf(4, 1, 4000, 4000, 1000, 1000); DistributedCache.createSymlink(jobConf); DistributedCache.addCacheFile(cacheFileURI1, jobConf); DistributedCache.addCacheFile(cacheFileURI2, jobConf); RunningJob runJob = jobClient.submitJob(jobConf); JobID jobId = runJob.getID(); Assert.assertTrue("Job has not been started for 1 min.", jtClient.isJobStarted(jobId)); TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId); Assert.assertTrue("Cache File1 has not been localize", checkLocalization(taskInfos,cacheFile1)); Assert.assertTrue("Cache File2 has not been localize", checkLocalization(taskInfos,cacheFile2)); JobInfo jInfo = wovenClient.getJobInfo(jobId); LOG.info("Waiting till the job is completed..."); while (!jInfo.getStatus().isJobComplete()) { UtilsForTests.waitFor(100); jInfo = wovenClient.getJobInfo(jobId); } Assert.assertEquals("Job has not been succeeded", jInfo.getStatus().getRunState(), JobStatus.SUCCEEDED); }
/** * Run the job with distributed cache files and remove one cache * file from the DFS when it is localized.verify whether the job * is failed or not. * @throws Exception */ @Test public void testDeleteCacheFileInDFSAfterLocalized() throws Exception { conf = wovenClient.getDaemonConf(); SleepJob job = new SleepJob(); job.setConf(conf); JobConf jobConf = job.setupJobConf(4, 1, 4000, 4000, 1000, 1000); cacheFileURI3 = createCacheFile(tmpFolderPath, cacheFile3); DistributedCache.createSymlink(jobConf); DistributedCache.addCacheFile(cacheFileURI3, jobConf); RunningJob runJob = jobClient.submitJob(jobConf); JobID jobId = runJob.getID(); Assert.assertTrue("Job has not been started for 1 min.", jtClient.isJobStarted(jobId)); TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId); boolean iscacheFileLocalized = checkLocalization(taskInfos,cacheFile3); Assert.assertTrue("CacheFile has not been localized", iscacheFileLocalized); deleteCacheFile(new Path(tmpFolderPath, cacheFile3)); JobInfo jInfo = wovenClient.getJobInfo(jobId); LOG.info("Waiting till the job is completed..."); while (!jInfo.getStatus().isJobComplete()) { UtilsForTests.waitFor(100); jInfo = wovenClient.getJobInfo(jobId); } Assert.assertEquals("Job has not been failed", jInfo.getStatus().getRunState(), JobStatus.FAILED); }
/** * Submit a job and create folders and files in work folder with * non-writable permissions under task attempt id folder. * Wait till the job completes and verify whether the files * and folders are cleaned up or not. * @throws IOException */ @Test public void testJobCleanupAfterJobCompletes() throws IOException { HashMap<TTClient,ArrayList<String>> map = new HashMap<TTClient,ArrayList<String>>(); JobID jobId = createJobAndSubmit().getID(); Assert.assertTrue("Job has not been started for 1 min", jtClient.isJobStarted(jobId)); TaskInfo [] taskInfos = rtClient.getTaskInfo(jobId); for (TaskInfo taskinfo : taskInfos) { if (!taskinfo.isSetupOrCleanup()) { Assert.assertTrue("Task has not been started for 1 min ", jtClient.isTaskStarted(taskinfo)); String tasktracker = getTaskTracker(taskinfo); Assert.assertNotNull("TaskTracker has not been found", tasktracker); TTClient ttclient = getTTClient(tasktracker); UtilsForTests.waitFor(100); map.put(ttClient, getTTClientMapRedLocalDirs(ttClient, taskinfo, jobId)); } } LOG.info("Waiting till the job is completed..."); Assert.assertTrue("Job has not been completed for 1 min", jtClient.isJobStopped(jobId)); UtilsForTests.waitFor(3000); Assert.assertTrue("Job directories have not been cleaned up properly " + "after completion of job", verifyJobDirectoryCleanup(map)); }
/** * Submit a job and create folders and files in work folder with * non-writable permissions under task attempt id folder. * Kill the job and verify whether the files and folders * are cleaned up or not. * @throws IOException */ @Test public void testJobCleanupAfterJobKill() throws IOException { HashMap<TTClient,ArrayList<String>> map = new HashMap<TTClient,ArrayList<String>>(); JobID jobId = createJobAndSubmit().getID(); Assert.assertTrue("Job has not been started for 1 min", jtClient.isJobStarted(jobId)); TaskInfo [] taskInfos = rtClient.getTaskInfo(jobId); for (TaskInfo taskinfo : taskInfos) { if (!taskinfo.isSetupOrCleanup()) { Assert.assertTrue("Task has not been started for 1 min ", jtClient.isTaskStarted(taskinfo)); String tasktracker = getTaskTracker(taskinfo); Assert.assertNotNull("TaskTracker has not been found", tasktracker); TTClient ttclient = getTTClient(tasktracker); map.put(ttClient, getTTClientMapRedLocalDirs(ttClient, taskinfo, jobId)); } } jtClient.getClient().killJob(jobId); LOG.info("Waiting till the job is completed..."); Assert.assertTrue("Job has not been completed for 1 min", jtClient.isJobStopped(jobId)); JobInfo jobInfo = rtClient.getJobInfo(jobId); Assert.assertEquals("Job has not been killed", jobInfo.getStatus().getRunState(), JobStatus.KILLED); UtilsForTests.waitFor(3000); Assert.assertTrue("Job directories have not been cleaned up properly " + "after completion of job", verifyJobDirectoryCleanup(map)); }
private static ArrayList <String> getTTClientMapRedLocalDirs( TTClient ttClient, TaskInfo taskinfo, JobID jobId) throws IOException { ArrayList <String> fileList = null; TaskID taskId = TaskID.downgrade(taskinfo.getTaskID()); FinishTaskControlAction action = new FinishTaskControlAction(taskId); if (ttClient != null ) { String localDirs[] = ttClient.getMapredLocalDirs(); TaskAttemptID taskAttID = new TaskAttemptID(taskId, 0); fileList = createFilesInTaskDir(localDirs, jobId, taskAttID, ttClient); } ttClient.getProxy().sendAction(action); return fileList; }
private TaskInfo getTaskInfo(JobID jobId, boolean isMap) throws IOException { JTProtocol wovenClient = cluster.getJTClient().getProxy(); JobInfo jInfo = wovenClient.getJobInfo(jobId); TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId); for (TaskInfo taskinfo : taskInfos) { if (!taskinfo.isSetupOrCleanup()) { if (taskinfo.getTaskID().isMap() == isMap) { return taskinfo; } } } return null; }
/** * Verifying the running job status whether it succeeds or not * after failing some of its tasks. */ @Test public void testFailedTaskJobStatus() throws IOException, InterruptedException { conf = remoteJTClient.getDaemonConf(); TaskInfo taskInfo = null; SleepJob job = new SleepJob(); job.setConf(conf); JobConf jobConf = job.setupJobConf(1, 1, 10000, 4000, 100, 100); RunningJob runJob = jobClient.submitJob(jobConf); JobID jobId = runJob.getID(); JobInfo jInfo = remoteJTClient.getJobInfo(jobId); Assert.assertTrue("Job has not been started for 1 min.", jtClient.isJobStarted(jobId)); TaskInfo[] taskInfos = remoteJTClient.getTaskInfo(jobId); for (TaskInfo taskinfo : taskInfos) { if (!taskinfo.isSetupOrCleanup() && taskinfo.getTaskID().isMap()) { taskInfo = taskinfo; break; } } Assert.assertTrue("Task has not been started for 1 min.", jtClient.isTaskStarted(taskInfo)); // Fail the running task. RunningJob networkJob = jobClient.getJob(jobId); TaskID tID = TaskID.downgrade(taskInfo.getTaskID()); TaskAttemptID taskAttID = new TaskAttemptID(tID , 0); networkJob.killTask(taskAttID, true); LOG.info("Waiting till the job is completed..."); while (!jInfo.getStatus().isJobComplete()) { UtilsForTests.waitFor(100); jInfo = remoteJTClient.getJobInfo(jobId); } Assert.assertEquals("JobStatus", JobStatus.SUCCEEDED, jInfo.getStatus().getRunState()); }