private long storedPerAttemptValue (Map<TaskAttempt, AtomicLong> data, TaskAttemptId attemptID) { TaskId taskID = attemptID.getTaskId(); JobId jobID = taskID.getJobId(); Job job = context.getJob(jobID); Task task = job.getTask(taskID); if (task == null) { return -1L; } TaskAttempt taskAttempt = task.getAttempt(attemptID); if (taskAttempt == null) { return -1L; } AtomicLong estimate = data.get(taskAttempt); return estimate == null ? -1L : estimate.get(); }
public JobTaskAttemptCounterInfo(TaskAttempt taskattempt) { this.id = MRApps.toString(taskattempt.getID()); total = taskattempt.getCounters(); taskAttemptCounterGroup = new ArrayList<TaskCounterGroupInfo>(); if (total != null) { for (CounterGroup g : total) { if (g != null) { TaskCounterGroupInfo cginfo = new TaskCounterGroupInfo(g.getName(), g); if (cginfo != null) { taskAttemptCounterGroup.add(cginfo); } } } } }
public TaskAttemptInfo(TaskAttempt ta, TaskType type, Boolean isRunning) { final TaskAttemptReport report = ta.getReport(); this.type = type.toString(); this.id = MRApps.toString(ta.getID()); this.nodeHttpAddress = ta.getNodeHttpAddress(); this.startTime = report.getStartTime(); this.finishTime = report.getFinishTime(); this.assignedContainerId = ConverterUtils.toString(report.getContainerId()); this.assignedContainer = report.getContainerId(); this.progress = report.getProgress() * 100; this.status = report.getStateString(); this.state = report.getTaskAttemptState(); this.elapsedTime = Times .elapsed(this.startTime, this.finishTime, isRunning); if (this.elapsedTime == -1) { this.elapsedTime = 0; } this.diagnostics = report.getDiagnosticInfo(); this.rack = ta.getNodeRackName(); }
public ReduceTaskAttemptInfo(TaskAttempt ta, TaskType type) { super(ta, type, false); this.shuffleFinishTime = ta.getShuffleFinishTime(); this.mergeFinishTime = ta.getSortFinishTime(); this.elapsedShuffleTime = Times.elapsed(this.startTime, this.shuffleFinishTime, false); if (this.elapsedShuffleTime == -1) { this.elapsedShuffleTime = 0; } this.elapsedMergeTime = Times.elapsed(this.shuffleFinishTime, this.mergeFinishTime, false); if (this.elapsedMergeTime == -1) { this.elapsedMergeTime = 0; } this.elapsedReduceTime = Times.elapsed(this.mergeFinishTime, this.finishTime, false); if (this.elapsedReduceTime == -1) { this.elapsedReduceTime = 0; } }
@GET @Path("/jobs/{jobid}/tasks/{taskid}/attempts") @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) public TaskAttemptsInfo getJobTaskAttempts(@Context HttpServletRequest hsr, @PathParam("jobid") String jid, @PathParam("taskid") String tid) { init(); TaskAttemptsInfo attempts = new TaskAttemptsInfo(); Job job = getJobFromJobIdString(jid, appCtx); checkAccess(job, hsr); Task task = getTaskFromTaskIdString(tid, job); for (TaskAttempt ta : task.getAttempts().values()) { if (ta != null) { if (task.getType() == TaskType.REDUCE) { attempts.add(new ReduceTaskAttemptInfo(ta, task.getType())); } else { attempts.add(new TaskAttemptInfo(ta, task.getType(), true)); } } } return attempts; }
@GET @Path("/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}") @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) public TaskAttemptInfo getJobTaskAttemptId(@Context HttpServletRequest hsr, @PathParam("jobid") String jid, @PathParam("taskid") String tid, @PathParam("attemptid") String attId) { init(); Job job = getJobFromJobIdString(jid, appCtx); checkAccess(job, hsr); Task task = getTaskFromTaskIdString(tid, job); TaskAttempt ta = getTaskAttemptFromTaskAttemptString(attId, task); if (task.getType() == TaskType.REDUCE) { return new ReduceTaskAttemptInfo(ta, task.getType()); } else { return new TaskAttemptInfo(ta, task.getType(), true); } }
@Override protected Collection<TaskAttempt> getTaskAttempts() { List<TaskAttempt> fewTaskAttemps = new ArrayList<TaskAttempt>(); String taskTypeStr = $(TASK_TYPE); TaskType taskType = MRApps.taskType(taskTypeStr); String attemptStateStr = $(ATTEMPT_STATE); TaskAttemptStateUI neededState = MRApps .taskAttemptState(attemptStateStr); for (Task task : super.app.getJob().getTasks(taskType).values()) { Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts(); for (TaskAttempt attempt : attempts.values()) { if (neededState.correspondsTo(attempt.getState())) { fewTaskAttemps.add(attempt); } } } return fewTaskAttemps; }
@Override public Map<TaskAttemptId, TaskAttempt> getAttempts() { readLock.lock(); try { if (attempts.size() <= 1) { return attempts; } Map<TaskAttemptId, TaskAttempt> result = new LinkedHashMap<TaskAttemptId, TaskAttempt>(); result.putAll(attempts); return result; } finally { readLock.unlock(); } }
@Override public Counters getCounters() { Counters counters = null; readLock.lock(); try { TaskAttempt bestAttempt = selectBestAttempt(); if (bestAttempt != null) { counters = bestAttempt.getCounters(); } else { counters = TaskAttemptImpl.EMPTY_COUNTERS; // counters.groups = new HashMap<CharSequence, CounterGroup>(); } return counters; } finally { readLock.unlock(); } }
private void handleTaskAttemptCompletion(TaskAttemptId attemptId, TaskAttemptCompletionEventStatus status) { TaskAttempt attempt = attempts.get(attemptId); //raise the completion event only if the container is assigned // to nextAttemptNumber if (attempt.getNodeHttpAddress() != null) { TaskAttemptCompletionEvent tce = recordFactory .newRecordInstance(TaskAttemptCompletionEvent.class); tce.setEventId(-1); String scheme = (encryptedShuffle) ? "https://" : "http://"; tce.setMapOutputServerAddress(StringInterner.weakIntern(scheme + attempt.getNodeHttpAddress().split(":")[0] + ":" + attempt.getShufflePort())); tce.setStatus(status); tce.setAttemptId(attempt.getID()); int runTime = 0; if (attempt.getFinishTime() != 0 && attempt.getLaunchTime() !=0) runTime = (int)(attempt.getFinishTime() - attempt.getLaunchTime()); tce.setAttemptRunTime(runTime); //raise the event to job so that it adds the completion event to its //data structures eventHandler.handle(new JobTaskAttemptCompletedEvent(tce)); } }
private void writeBadOutput(TaskAttempt attempt, Configuration conf) throws Exception { TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID())); TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat .getRecordWriter(tContext); NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key2, val2); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val2); theRecordWriter.write(nullWritable, val1); theRecordWriter.write(key1, nullWritable); theRecordWriter.write(key2, null); theRecordWriter.write(null, null); theRecordWriter.write(key1, val1); } finally { theRecordWriter.close(tContext); } OutputFormat outputFormat = ReflectionUtils.newInstance( tContext.getOutputFormatClass(), conf); OutputCommitter committer = outputFormat.getOutputCommitter(tContext); committer.commitTask(tContext); }
private void writeOutput(TaskAttempt attempt, Configuration conf) throws Exception { TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID())); TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat .getRecordWriter(tContext); NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(tContext); } OutputFormat outputFormat = ReflectionUtils.newInstance( tContext.getOutputFormatClass(), conf); OutputCommitter committer = outputFormat.getOutputCommitter(tContext); committer.commitTask(tContext); }
public void verifyHsTaskAttempts(JSONObject json, Task task) throws JSONException { assertEquals("incorrect number of elements", 1, json.length()); JSONObject attempts = json.getJSONObject("taskAttempts"); assertEquals("incorrect number of elements", 1, json.length()); JSONArray arr = attempts.getJSONArray("taskAttempt"); for (TaskAttempt att : task.getAttempts().values()) { TaskAttemptId id = att.getID(); String attid = MRApps.toString(id); Boolean found = false; for (int i = 0; i < arr.length(); i++) { JSONObject info = arr.getJSONObject(i); if (attid.matches(info.getString("id"))) { found = true; verifyHsTaskAttempt(info, att, task.getType()); } } assertTrue("task attempt with id: " + attid + " not in web service output", found); } }
public void verifyHsJobTaskAttemptCounters(JSONObject info, TaskAttempt att) throws JSONException { assertEquals("incorrect number of elements", 2, info.length()); WebServicesTestUtils.checkStringMatch("id", MRApps.toString(att.getID()), info.getString("id")); // just do simple verification of fields - not data is correct // in the fields JSONArray counterGroups = info.getJSONArray("taskAttemptCounterGroup"); for (int i = 0; i < counterGroups.length(); i++) { JSONObject counterGroup = counterGroups.getJSONObject(i); String name = counterGroup.getString("counterGroupName"); assertTrue("name not set", (name != null && !name.isEmpty())); JSONArray counters = counterGroup.getJSONArray("counter"); for (int j = 0; j < counters.length(); j++) { JSONObject counter = counters.getJSONObject(j); String counterName = counter.getString("name"); assertTrue("name not set", (counterName != null && !counterName.isEmpty())); long value = counter.getLong("value"); assertTrue("value >= 0", value >= 0); } } }
public void verifyAMTaskAttemptXML(Element element, TaskAttempt att, TaskType ttype) { verifyTaskAttemptGeneric(att, ttype, WebServicesTestUtils.getXmlString(element, "id"), WebServicesTestUtils.getXmlString(element, "state"), WebServicesTestUtils.getXmlString(element, "type"), WebServicesTestUtils.getXmlString(element, "rack"), WebServicesTestUtils.getXmlString(element, "nodeHttpAddress"), WebServicesTestUtils.getXmlString(element, "diagnostics"), WebServicesTestUtils.getXmlString(element, "assignedContainerId"), WebServicesTestUtils.getXmlLong(element, "startTime"), WebServicesTestUtils.getXmlLong(element, "finishTime"), WebServicesTestUtils.getXmlLong(element, "elapsedTime"), WebServicesTestUtils.getXmlFloat(element, "progress")); if (ttype == TaskType.REDUCE) { verifyReduceTaskAttemptGeneric(att, WebServicesTestUtils.getXmlLong(element, "shuffleFinishTime"), WebServicesTestUtils.getXmlLong(element, "mergeFinishTime"), WebServicesTestUtils.getXmlLong(element, "elapsedShuffleTime"), WebServicesTestUtils.getXmlLong(element, "elapsedMergeTime"), WebServicesTestUtils.getXmlLong(element, "elapsedReduceTime")); } }
public void verifyAMTaskAttempt(JSONObject info, TaskAttempt att, TaskType ttype) throws JSONException { if (ttype == TaskType.REDUCE) { assertEquals("incorrect number of elements", 17, info.length()); } else { assertEquals("incorrect number of elements", 12, info.length()); } verifyTaskAttemptGeneric(att, ttype, info.getString("id"), info.getString("state"), info.getString("type"), info.getString("rack"), info.getString("nodeHttpAddress"), info.getString("diagnostics"), info.getString("assignedContainerId"), info.getLong("startTime"), info.getLong("finishTime"), info.getLong("elapsedTime"), (float) info.getDouble("progress")); if (ttype == TaskType.REDUCE) { verifyReduceTaskAttemptGeneric(att, info.getLong("shuffleFinishTime"), info.getLong("mergeFinishTime"), info.getLong("elapsedShuffleTime"), info.getLong("elapsedMergeTime"), info.getLong("elapsedReduceTime")); } }
public void verifyAMTaskAttempts(JSONObject json, Task task) throws JSONException { assertEquals("incorrect number of elements", 1, json.length()); JSONObject attempts = json.getJSONObject("taskAttempts"); assertEquals("incorrect number of elements", 1, json.length()); JSONArray arr = attempts.getJSONArray("taskAttempt"); for (TaskAttempt att : task.getAttempts().values()) { TaskAttemptId id = att.getID(); String attid = MRApps.toString(id); Boolean found = false; for (int i = 0; i < arr.length(); i++) { JSONObject info = arr.getJSONObject(i); if (attid.matches(info.getString("id"))) { found = true; verifyAMTaskAttempt(info, att, task.getType()); } } assertTrue("task attempt with id: " + attid + " not in web service output", found); } }
public void verifyAMTaskAttemptsXML(NodeList nodes, Task task) { assertEquals("incorrect number of elements", 1, nodes.getLength()); for (TaskAttempt att : task.getAttempts().values()) { TaskAttemptId id = att.getID(); String attid = MRApps.toString(id); Boolean found = false; for (int i = 0; i < nodes.getLength(); i++) { Element element = (Element) nodes.item(i); if (attid.matches(WebServicesTestUtils.getXmlString(element, "id"))) { found = true; verifyAMTaskAttemptXML(element, att, task.getType()); } } assertTrue("task with id: " + attid + " not in web service output", found); } }
public void verifyAMJobTaskAttemptCounters(JSONObject info, TaskAttempt att) throws JSONException { assertEquals("incorrect number of elements", 2, info.length()); WebServicesTestUtils.checkStringMatch("id", MRApps.toString(att.getID()), info.getString("id")); // just do simple verification of fields - not data is correct // in the fields JSONArray counterGroups = info.getJSONArray("taskAttemptCounterGroup"); for (int i = 0; i < counterGroups.length(); i++) { JSONObject counterGroup = counterGroups.getJSONObject(i); String name = counterGroup.getString("counterGroupName"); assertTrue("name not set", (name != null && !name.isEmpty())); JSONArray counters = counterGroup.getJSONArray("counter"); for (int j = 0; j < counters.length(); j++) { JSONObject counter = counters.getJSONObject(j); String counterName = counter.getString("name"); assertTrue("name not set", (counterName != null && !counterName.isEmpty())); long value = counter.getLong("value"); assertTrue("value >= 0", value >= 0); } } }
private void testMRAppHistory(MRApp app) throws Exception { Configuration conf = new Configuration(); Job job = app.submit(conf); app.waitForState(job, JobState.FAILED); Map<TaskId, Task> tasks = job.getTasks(); Assert.assertEquals("Num tasks is not correct", 1, tasks.size()); Task task = tasks.values().iterator().next(); Assert.assertEquals("Task state not correct", TaskState.FAILED, task .getReport().getTaskState()); Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next() .getAttempts(); Assert.assertEquals("Num attempts is not correct", 4, attempts.size()); Iterator<TaskAttempt> it = attempts.values().iterator(); TaskAttemptReport report = it.next().getReport(); Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED, report.getTaskAttemptState()); Assert.assertEquals("Diagnostic Information is not Correct", "Test Diagnostic Event", report.getDiagnosticInfo()); report = it.next().getReport(); Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED, report.getTaskAttemptState()); }
public void waitForState(TaskAttempt attempt, TaskAttemptState finalState) throws Exception { int timeoutSecs = 0; TaskAttemptReport report = attempt.getReport(); while (!finalState.equals(report.getTaskAttemptState()) && timeoutSecs++ < 20) { System.out.println("TaskAttempt State is : " + report.getTaskAttemptState() + " Waiting for state : " + finalState + " progress : " + report.getProgress()); report = attempt.getReport(); Thread.sleep(500); } System.out.println("TaskAttempt State is : " + report.getTaskAttemptState()); Assert.assertEquals("TaskAttempt state is not correct (timedout)", finalState, report.getTaskAttemptState()); }
public void verifyCompleted() { for (Job job : getContext().getAllJobs().values()) { JobReport jobReport = job.getReport(); System.out.println("Job start time :" + jobReport.getStartTime()); System.out.println("Job finish time :" + jobReport.getFinishTime()); Assert.assertTrue("Job start time is not less than finish time", jobReport.getStartTime() <= jobReport.getFinishTime()); Assert.assertTrue("Job finish time is in future", jobReport.getFinishTime() <= System.currentTimeMillis()); for (Task task : job.getTasks().values()) { TaskReport taskReport = task.getReport(); System.out.println("Task start time : " + taskReport.getStartTime()); System.out.println("Task finish time : " + taskReport.getFinishTime()); Assert.assertTrue("Task start time is not less than finish time", taskReport.getStartTime() <= taskReport.getFinishTime()); for (TaskAttempt attempt : task.getAttempts().values()) { TaskAttemptReport attemptReport = attempt.getReport(); Assert.assertTrue("Attempt start time is not less than finish time", attemptReport.getStartTime() <= attemptReport.getFinishTime()); } } } }
public void verifyHsTaskAttemptsXML(NodeList nodes, Task task) { assertEquals("incorrect number of elements", 1, nodes.getLength()); for (TaskAttempt att : task.getAttempts().values()) { TaskAttemptId id = att.getID(); String attid = MRApps.toString(id); Boolean found = false; for (int i = 0; i < nodes.getLength(); i++) { Element element = (Element) nodes.item(i); if (attid.matches(WebServicesTestUtils.getXmlString(element, "id"))) { found = true; verifyHsTaskAttemptXML(element, att, task.getType()); } } assertTrue("task with id: " + attid + " not in web service output", found); } }
@Test //All Task attempts are timed out, leading to Job failure public void testTimedOutTask() throws Exception { MRApp app = new TimeOutTaskMRApp(1, 0); Configuration conf = new Configuration(); int maxAttempts = 2; conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts); // disable uberization (requires entire job to be reattempted, so max for // subtask attempts is overridden to 1) conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); Job job = app.submit(conf); app.waitForState(job, JobState.FAILED); Map<TaskId,Task> tasks = job.getTasks(); Assert.assertEquals("Num tasks is not correct", 1, tasks.size()); Task task = tasks.values().iterator().next(); Assert.assertEquals("Task state not correct", TaskState.FAILED, task.getReport().getTaskState()); Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts(); Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts.size()); for (TaskAttempt attempt : attempts.values()) { Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED, attempt.getReport().getTaskAttemptState()); } }
private void updateStatus(MRApp app, TaskAttempt attempt, Phase phase) { TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus(); status.counters = new Counters(); status.fetchFailedMaps = new ArrayList<TaskAttemptId>(); status.id = attempt.getID(); status.mapFinishTime = 0; status.phase = phase; status.progress = 0.5f; status.shuffleFinishTime = 0; status.sortFinishTime = 0; status.stateString = "OK"; status.taskState = attempt.getState(); TaskAttemptStatusUpdateEvent event = new TaskAttemptStatusUpdateEvent(attempt.getID(), status); app.getContext().getEventHandler().handle(event); }
@Override protected Collection<TaskAttempt> getTaskAttempts() { List<TaskAttempt> fewTaskAttemps = new ArrayList<TaskAttempt>(); String taskTypeStr = $(TASK_TYPE); TaskType taskType = MRApps.taskType(taskTypeStr); String attemptStateStr = $(ATTEMPT_STATE); TaskAttemptStateUI neededState = MRApps .taskAttemptState(attemptStateStr); Job j = app.getJob(); Map<TaskId, Task> tasks = j.getTasks(taskType); for (Task task : tasks.values()) { Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts(); for (TaskAttempt attempt : attempts.values()) { if (neededState.correspondsTo(attempt.getState())) { fewTaskAttemps.add(attempt); } } } return fewTaskAttemps; }
@GET @Path("/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts") @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) public TaskAttemptsInfo getJobTaskAttempts(@Context HttpServletRequest hsr, @PathParam("jobid") String jid, @PathParam("taskid") String tid) { init(); TaskAttemptsInfo attempts = new TaskAttemptsInfo(); Job job = AMWebServices.getJobFromJobIdString(jid, ctx); checkAccess(job, hsr); Task task = AMWebServices.getTaskFromTaskIdString(tid, job); for (TaskAttempt ta : task.getAttempts().values()) { if (ta != null) { if (task.getType() == TaskType.REDUCE) { attempts.add(new ReduceTaskAttemptInfo(ta, task.getType())); } else { attempts.add(new TaskAttemptInfo(ta, task.getType(), false)); } } } return attempts; }
@GET @Path("/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}") @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) public TaskAttemptInfo getJobTaskAttemptId(@Context HttpServletRequest hsr, @PathParam("jobid") String jid, @PathParam("taskid") String tid, @PathParam("attemptid") String attId) { init(); Job job = AMWebServices.getJobFromJobIdString(jid, ctx); checkAccess(job, hsr); Task task = AMWebServices.getTaskFromTaskIdString(tid, job); TaskAttempt ta = AMWebServices.getTaskAttemptFromTaskAttemptString(attId, task); if (task.getType() == TaskType.REDUCE) { return new ReduceTaskAttemptInfo(ta, task.getType()); } else { return new TaskAttemptInfo(ta, task.getType(), false); } }
private void constructTaskReport() { loadAllTaskAttempts(); this.report = Records.newRecord(TaskReport.class); report.setTaskId(taskId); long minLaunchTime = Long.MAX_VALUE; for(TaskAttempt attempt: attempts.values()) { minLaunchTime = Math.min(minLaunchTime, attempt.getLaunchTime()); } minLaunchTime = minLaunchTime == Long.MAX_VALUE ? -1 : minLaunchTime; report.setStartTime(minLaunchTime); report.setFinishTime(taskInfo.getFinishTime()); report.setTaskState(getState()); report.setProgress(getProgress()); Counters counters = getCounters(); if (counters == null) { counters = EMPTY_COUNTERS; } report.setCounters(TypeConverter.toYarn(counters)); if (successfulAttempt != null) { report.setSuccessfulAttempt(successfulAttempt); } report.addAllDiagnostics(reportDiagnostics); report .addAllRunningAttempts(new ArrayList<TaskAttemptId>(attempts.keySet())); }
public void verifyHsTaskAttemptXML(Element element, TaskAttempt att, TaskType ttype) { verifyTaskAttemptGeneric(att, ttype, WebServicesTestUtils.getXmlString(element, "id"), WebServicesTestUtils.getXmlString(element, "state"), WebServicesTestUtils.getXmlString(element, "type"), WebServicesTestUtils.getXmlString(element, "rack"), WebServicesTestUtils.getXmlString(element, "nodeHttpAddress"), WebServicesTestUtils.getXmlString(element, "diagnostics"), WebServicesTestUtils.getXmlString(element, "assignedContainerId"), WebServicesTestUtils.getXmlLong(element, "startTime"), WebServicesTestUtils.getXmlLong(element, "finishTime"), WebServicesTestUtils.getXmlLong(element, "elapsedTime"), WebServicesTestUtils.getXmlFloat(element, "progress")); if (ttype == TaskType.REDUCE) { verifyReduceTaskAttemptGeneric(att, WebServicesTestUtils.getXmlLong(element, "shuffleFinishTime"), WebServicesTestUtils.getXmlLong(element, "mergeFinishTime"), WebServicesTestUtils.getXmlLong(element, "elapsedShuffleTime"), WebServicesTestUtils.getXmlLong(element, "elapsedMergeTime"), WebServicesTestUtils.getXmlLong(element, "elapsedReduceTime")); } }
private TaskAttempt getSuccessfulAttempt(Task task) { for (TaskAttempt attempt : task.getAttempts().values()) { if (attempt.getState() == TaskAttemptState.SUCCEEDED) { return attempt; } } return null; }
@GET @Path("/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}/counters") @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) public JobTaskAttemptCounterInfo getJobTaskAttemptIdCounters( @Context HttpServletRequest hsr, @PathParam("jobid") String jid, @PathParam("taskid") String tid, @PathParam("attemptid") String attId) { init(); Job job = getJobFromJobIdString(jid, appCtx); checkAccess(job, hsr); Task task = getTaskFromTaskIdString(tid, job); TaskAttempt ta = getTaskAttemptFromTaskAttemptString(attId, task); return new JobTaskAttemptCounterInfo(ta); }
@Override public void transition(JobImpl job, JobEvent event) { //get number of shuffling reduces int shufflingReduceTasks = 0; for (TaskId taskId : job.reduceTasks) { Task task = job.tasks.get(taskId); if (TaskState.RUNNING.equals(task.getState())) { for(TaskAttempt attempt : task.getAttempts().values()) { if(attempt.getPhase() == Phase.SHUFFLE) { shufflingReduceTasks++; break; } } } } JobTaskAttemptFetchFailureEvent fetchfailureEvent = (JobTaskAttemptFetchFailureEvent) event; for (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId mapId : fetchfailureEvent.getMaps()) { Integer fetchFailures = job.fetchFailuresMapping.get(mapId); fetchFailures = (fetchFailures == null) ? 1 : (fetchFailures+1); job.fetchFailuresMapping.put(mapId, fetchFailures); float failureRate = shufflingReduceTasks == 0 ? 1.0f : (float) fetchFailures / shufflingReduceTasks; // declare faulty if fetch-failures >= max-allowed-failures if (fetchFailures >= job.getMaxFetchFailuresNotifications() && failureRate >= job.getMaxAllowedFetchFailuresFraction()) { LOG.info("Too many fetch-failures for output of task attempt: " + mapId + " ... raising fetch failure to map"); job.eventHandler.handle(new TaskAttemptEvent(mapId, TaskAttemptEventType.TA_TOO_MANY_FETCH_FAILURE)); job.fetchFailuresMapping.remove(mapId); } } }
@Override public TaskAttempt getAttempt(TaskAttemptId attemptID) { readLock.lock(); try { return attempts.get(attemptID); } finally { readLock.unlock(); } }
@Override public float getProgress() { readLock.lock(); try { TaskAttempt bestAttempt = selectBestAttempt(); if (bestAttempt == null) { return 0f; } return bestAttempt.getProgress(); } finally { readLock.unlock(); } }
@Test public void testTaskAttemptIdCounters() throws JSONException, Exception { WebResource r = resource(); Map<JobId, Job> jobsMap = appContext.getAllJobs(); for (JobId id : jobsMap.keySet()) { String jobId = MRApps.toString(id); for (Task task : jobsMap.get(id).getTasks().values()) { String tid = MRApps.toString(task.getID()); for (TaskAttempt att : task.getAttempts().values()) { TaskAttemptId attemptid = att.getID(); String attid = MRApps.toString(attemptid); ClientResponse response = r.path("ws").path("v1").path("history") .path("mapreduce").path("jobs").path(jobId).path("tasks") .path(tid).path("attempts").path(attid).path("counters") .accept(MediaType.APPLICATION_JSON).get(ClientResponse.class); assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType()); JSONObject json = response.getEntity(JSONObject.class); assertEquals("incorrect number of elements", 1, json.length()); JSONObject info = json.getJSONObject("jobTaskAttemptCounters"); verifyHsJobTaskAttemptCounters(info, att); } } } }
private long getFinishTime() { if (!isFinished()) { return 0; } long finishTime = 0; for (TaskAttempt at : attempts.values()) { //select the max finish time of all attempts if (finishTime < at.getFinishTime()) { finishTime = at.getFinishTime(); } } return finishTime; }
private long getFinishTime(TaskAttemptId taId) { if (taId == null) { return clock.getTime(); } long finishTime = 0; for (TaskAttempt at : attempts.values()) { //select the max finish time of all attempts if (at.getID().equals(taId)) { return at.getFinishTime(); } } return finishTime; }
private TaskAttempt selectBestAttempt() { if (successfulAttempt != null) { return attempts.get(successfulAttempt); } float progress = 0f; TaskAttempt result = null; for (TaskAttempt at : attempts.values()) { switch (at.getState()) { // ignore all failed task attempts case FAILED: case KILLED: continue; } if (result == null) { result = at; //The first time around } // calculate the best progress float attemptProgress = at.getProgress(); if (attemptProgress > progress) { result = at; progress = attemptProgress; } } return result; }
protected TaskAttempt getSuccessfulAttempt() { readLock.lock(); try { if (null == successfulAttempt) { return null; } return attempts.get(successfulAttempt); } finally { readLock.unlock(); } }