public JobMetrics(Job job, String bytesReplicatedKey) { Builder<String, Long> builder = ImmutableMap.builder(); if (job != null) { Counters counters; try { counters = job.getCounters(); } catch (IOException e) { throw new CircusTrainException("Unable to get counters from job.", e); } if (counters != null) { for (CounterGroup group : counters) { for (Counter counter : group) { builder.put(DotJoiner.join(group.getName(), counter.getName()), counter.getValue()); } } } } metrics = builder.build(); Long bytesReplicatedValue = metrics.get(bytesReplicatedKey); if (bytesReplicatedValue != null) { bytesReplicated = bytesReplicatedValue; } else { bytesReplicated = 0L; } }
@Override protected boolean runJob(Job job) throws ClassNotFoundException, IOException, InterruptedException { PerfCounters perfCounters = new PerfCounters(); perfCounters.startClock(); boolean success = doSubmitJob(job); perfCounters.stopClock(); Counters jobCounters = job.getCounters(); // If the job has been retired, these may be unavailable. if (null == jobCounters) { displayRetiredJobNotice(LOG); } else { perfCounters.addBytes(jobCounters.getGroup("FileSystemCounters") .findCounter("HDFS_BYTES_READ").getValue()); LOG.info("Transferred " + perfCounters.toString()); long numRecords = ConfigurationHelper.getNumMapInputRecords(job); LOG.info("Exported " + numRecords + " records."); } return success; }
@Override public Counters getAllCounters() { readLock.lock(); try { JobStateInternal state = getInternalState(); if (state == JobStateInternal.ERROR || state == JobStateInternal.FAILED || state == JobStateInternal.KILLED || state == JobStateInternal.SUCCEEDED) { this.mayBeConstructFinalFullCounters(); return fullCounters; } Counters counters = new Counters(); counters.incrAllCounters(jobCounters); return incrTaskCounters(counters, tasks.values()); } finally { readLock.unlock(); } }
@Private public void constructFinalFullcounters() { this.fullCounters = new Counters(); this.finalMapCounters = new Counters(); this.finalReduceCounters = new Counters(); this.fullCounters.incrAllCounters(jobCounters); for (Task t : this.tasks.values()) { Counters counters = t.getCounters(); switch (t.getType()) { case MAP: this.finalMapCounters.incrAllCounters(counters); break; case REDUCE: this.finalReduceCounters.incrAllCounters(counters); break; default: throw new IllegalStateException("Task type neither map nor reduce: " + t.getType()); } this.fullCounters.incrAllCounters(counters); } }
@Override public Counters getCounters() { Counters counters = null; readLock.lock(); try { TaskAttempt bestAttempt = selectBestAttempt(); if (bestAttempt != null) { counters = bestAttempt.getCounters(); } else { counters = TaskAttemptImpl.EMPTY_COUNTERS; // counters.groups = new HashMap<CharSequence, CounterGroup>(); } return counters; } finally { readLock.unlock(); } }
@Private public JsonNode countersToJSON(Counters counters) { ObjectMapper mapper = new ObjectMapper(); ArrayNode nodes = mapper.createArrayNode(); if (counters != null) { for (CounterGroup counterGroup : counters) { ObjectNode groupNode = nodes.addObject(); groupNode.put("NAME", counterGroup.getName()); groupNode.put("DISPLAY_NAME", counterGroup.getDisplayName()); ArrayNode countersNode = groupNode.putArray("COUNTERS"); for (Counter counter : counterGroup) { ObjectNode counterNode = countersNode.addObject(); counterNode.put("NAME", counter.getName()); counterNode.put("DISPLAY_NAME", counter.getDisplayName()); counterNode.put("VALUE", counter.getValue()); } } } return nodes; }
private void updateStatus(MRApp app, TaskAttempt attempt, Phase phase) { TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus(); status.counters = new Counters(); status.fetchFailedMaps = new ArrayList<TaskAttemptId>(); status.id = attempt.getID(); status.mapFinishTime = 0; status.phase = phase; status.progress = 0.5f; status.shuffleFinishTime = 0; status.sortFinishTime = 0; status.stateString = "OK"; status.taskState = attempt.getState(); TaskAttemptStatusUpdateEvent event = new TaskAttemptStatusUpdateEvent(attempt.getID(), status); app.getContext().getEventHandler().handle(event); }
/** * test a getters of TaskAttemptFinishedEvent and TaskAttemptFinished * * @throws Exception */ @Test(timeout = 10000) public void testTaskAttemptFinishedEvent() throws Exception { JobID jid = new JobID("001", 1); TaskID tid = new TaskID(jid, TaskType.REDUCE, 2); TaskAttemptID taskAttemptId = new TaskAttemptID(tid, 3); Counters counters = new Counters(); TaskAttemptFinishedEvent test = new TaskAttemptFinishedEvent(taskAttemptId, TaskType.REDUCE, "TEST", 123L, "RAKNAME", "HOSTNAME", "STATUS", counters); assertEquals(test.getAttemptId().toString(), taskAttemptId.toString()); assertEquals(test.getCounters(), counters); assertEquals(test.getFinishTime(), 123L); assertEquals(test.getHostname(), "HOSTNAME"); assertEquals(test.getRackName(), "RAKNAME"); assertEquals(test.getState(), "STATUS"); assertEquals(test.getTaskId(), tid); assertEquals(test.getTaskStatus(), "TEST"); assertEquals(test.getTaskType(), TaskType.REDUCE); }
protected void verifySleepJobCounters(Job job) throws InterruptedException, IOException { Counters counters = job.getCounters(); Assert.assertEquals(3, counters.findCounter(JobCounter.OTHER_LOCAL_MAPS) .getValue()); Assert.assertEquals(3, counters.findCounter(JobCounter.TOTAL_LAUNCHED_MAPS) .getValue()); Assert.assertEquals(numSleepReducers, counters.findCounter(JobCounter.TOTAL_LAUNCHED_REDUCES).getValue()); Assert .assertTrue(counters.findCounter(JobCounter.SLOTS_MILLIS_MAPS) != null && counters.findCounter(JobCounter.SLOTS_MILLIS_MAPS).getValue() != 0); Assert .assertTrue(counters.findCounter(JobCounter.SLOTS_MILLIS_MAPS) != null && counters.findCounter(JobCounter.SLOTS_MILLIS_MAPS).getValue() != 0); }
/** * Create an event to record completion of a reduce attempt * @param id Attempt Id * @param taskType Type of task * @param taskStatus Status of the task * @param shuffleFinishTime Finish time of the shuffle phase * @param sortFinishTime Finish time of the sort phase * @param finishTime Finish time of the attempt * @param hostname Name of the host where the attempt executed * @param port RPC port for the tracker host. * @param rackName Name of the rack where the attempt executed * @param state State of the attempt * @param counters Counters for the attempt * @param allSplits the "splits", or a pixelated graph of various * measurable worker node state variables against progress. * Currently there are four; wallclock time, CPU time, * virtual memory and physical memory. */ public ReduceAttemptFinishedEvent (TaskAttemptID id, TaskType taskType, String taskStatus, long shuffleFinishTime, long sortFinishTime, long finishTime, String hostname, int port, String rackName, String state, Counters counters, int[][] allSplits) { this.attemptId = id; this.taskType = taskType; this.taskStatus = taskStatus; this.shuffleFinishTime = shuffleFinishTime; this.sortFinishTime = sortFinishTime; this.finishTime = finishTime; this.hostname = hostname; this.rackName = rackName; this.port = port; this.state = state; this.counters = counters; this.allSplits = allSplits; this.clockSplits = ProgressSplitsBlock.arrayGetWallclockTime(allSplits); this.cpuUsages = ProgressSplitsBlock.arrayGetCPUTime(allSplits); this.gpuUsages = ProgressSplitsBlock.arrayGetGPUTime(allSplits); this.vMemKbytes = ProgressSplitsBlock.arrayGetVMemKbytes(allSplits); this.physMemKbytes = ProgressSplitsBlock.arrayGetPhysMemKbytes(allSplits); }
static JhCounters toAvro(Counters counters, String name) { JhCounters result = new JhCounters(); result.name = new Utf8(name); result.groups = new ArrayList<JhCounterGroup>(0); if (counters == null) return result; for (CounterGroup group : counters) { JhCounterGroup g = new JhCounterGroup(); g.name = new Utf8(group.getName()); g.displayName = new Utf8(group.getDisplayName()); g.counts = new ArrayList<JhCounter>(group.size()); for (Counter counter : group) { JhCounter c = new JhCounter(); c.name = new Utf8(counter.getName()); c.displayName = new Utf8(counter.getDisplayName()); c.value = counter.getValue(); g.counts.add(c); } result.groups.add(g); } return result; }
/** * Create an event to record successful job completion * @param id Job ID * @param finishTime Finish time of the job * @param finishedMaps The number of finished maps * @param finishedReduces The number of finished reduces * @param failedMaps The number of failed maps * @param failedReduces The number of failed reduces * @param mapCounters Map Counters for the job * @param reduceCounters Reduce Counters for the job * @param totalCounters Total Counters for the job */ public JobFinishedEvent(JobID id, long finishTime, int finishedMaps, int finishedReduces, int failedMaps, int failedReduces, Counters mapCounters, Counters reduceCounters, Counters totalCounters) { this.jobId = id; this.finishTime = finishTime; this.finishedMaps = finishedMaps; this.finishedReduces = finishedReduces; this.failedMaps = failedMaps; this.failedReduces = failedReduces; this.mapCounters = mapCounters; this.reduceCounters = reduceCounters; this.totalCounters = totalCounters; }
/** * Create an event for successful completion of map attempts * @param id Task Attempt ID * @param taskType Type of the task * @param taskStatus Status of the task * @param mapFinishTime Finish time of the map phase * @param finishTime Finish time of the attempt * @param hostname Name of the host where the map executed * @param port RPC port for the tracker host. * @param rackName Name of the rack where the map executed * @param state State string for the attempt * @param counters Counters for the attempt * @param allSplits the "splits", or a pixelated graph of various * measurable worker node state variables against progress. * Currently there are four; wallclock time, CPU time, * virtual memory and physical memory. * * If you have no splits data, code {@code null} for this * parameter. */ public MapAttemptFinishedEvent (TaskAttemptID id, TaskType taskType, String taskStatus, long mapFinishTime, long finishTime, String hostname, int port, String rackName, String state, Counters counters, int[][] allSplits) { this.attemptId = id; this.taskType = taskType; this.taskStatus = taskStatus; this.mapFinishTime = mapFinishTime; this.finishTime = finishTime; this.hostname = hostname; this.rackName = rackName; this.port = port; this.state = state; this.counters = counters; this.allSplits = allSplits; this.clockSplits = ProgressSplitsBlock.arrayGetWallclockTime(allSplits); this.cpuUsages = ProgressSplitsBlock.arrayGetCPUTime(allSplits); this.gpuUsages = ProgressSplitsBlock.arrayGetGPUTime(allSplits); this.vMemKbytes = ProgressSplitsBlock.arrayGetVMemKbytes(allSplits); this.physMemKbytes = ProgressSplitsBlock.arrayGetPhysMemKbytes(allSplits); }
private void constructTaskReport() { loadAllTaskAttempts(); this.report = Records.newRecord(TaskReport.class); report.setTaskId(taskId); long minLaunchTime = Long.MAX_VALUE; for(TaskAttempt attempt: attempts.values()) { minLaunchTime = Math.min(minLaunchTime, attempt.getLaunchTime()); } minLaunchTime = minLaunchTime == Long.MAX_VALUE ? -1 : minLaunchTime; report.setStartTime(minLaunchTime); report.setFinishTime(taskInfo.getFinishTime()); report.setTaskState(getState()); report.setProgress(getProgress()); Counters counters = getCounters(); if (counters == null) { counters = EMPTY_COUNTERS; } report.setCounters(TypeConverter.toYarn(counters)); if (successfulAttempt != null) { report.setSuccessfulAttempt(successfulAttempt); } report.addAllDiagnostics(reportDiagnostics); report .addAllRunningAttempts(new ArrayList<TaskAttemptId>(attempts.keySet())); }
protected static Counters parseCounters(String counters) throws ParseException { if (counters == null) { LOG.warn("HistoryEventEmitters: null counter detected:"); return null; } counters = counters.replace("\\.", "\\\\."); counters = counters.replace("\\\\{", "\\{"); counters = counters.replace("\\\\}", "\\}"); counters = counters.replace("\\\\(", "\\("); counters = counters.replace("\\\\)", "\\)"); counters = counters.replace("\\\\[", "\\["); counters = counters.replace("\\\\]", "\\]"); org.apache.hadoop.mapred.Counters depForm = org.apache.hadoop.mapred.Counters.fromEscapedCompactString(counters); return new Counters(depForm); }
@Test public void testSyncTable() throws Exception { String sourceTableName = "testSourceTable"; String targetTableName = "testTargetTable"; Path testDir = TEST_UTIL.getDataTestDirOnTestFS("testSyncTable"); writeTestData(sourceTableName, targetTableName); hashSourceTable(sourceTableName, testDir); Counters syncCounters = syncTables(sourceTableName, targetTableName, testDir); assertEqualTables(90, sourceTableName, targetTableName); assertEquals(60, syncCounters.findCounter(Counter.ROWSWITHDIFFS).getValue()); assertEquals(10, syncCounters.findCounter(Counter.SOURCEMISSINGROWS).getValue()); assertEquals(10, syncCounters.findCounter(Counter.TARGETMISSINGROWS).getValue()); assertEquals(50, syncCounters.findCounter(Counter.SOURCEMISSINGCELLS).getValue()); assertEquals(50, syncCounters.findCounter(Counter.TARGETMISSINGCELLS).getValue()); assertEquals(20, syncCounters.findCounter(Counter.DIFFERENTCELLVALUES).getValue()); TEST_UTIL.deleteTable(sourceTableName); TEST_UTIL.deleteTable(targetTableName); TEST_UTIL.cleanupDataTestDirOnTestFS(); }
public boolean verify(long expectedReferenced) throws Exception { if (job == null) { throw new IllegalStateException("You should call run() first"); } Counters counters = job.getCounters(); // Run through each check, even if we fail one early boolean success = verifyExpectedValues(expectedReferenced, counters); if (!verifyUnexpectedValues(counters)) { // We found counter objects which imply failure success = false; } if (!success) { handleFailure(counters); } return success; }
/** * Verify the values in the Counters against the expected number of entries written. * * @param expectedReferenced * Expected number of referenced entrires * @param counters * The Job's Counters object * @return True if the values match what's expected, false otherwise */ protected boolean verifyExpectedValues(long expectedReferenced, Counters counters) { final Counter referenced = counters.findCounter(Counts.REFERENCED); final Counter unreferenced = counters.findCounter(Counts.UNREFERENCED); boolean success = true; if (expectedReferenced != referenced.getValue()) { LOG.error("Expected referenced count does not match with actual referenced count. " + "expected referenced=" + expectedReferenced + " ,actual=" + referenced.getValue()); success = false; } if (unreferenced.getValue() > 0) { final Counter multiref = counters.findCounter(Counts.EXTRAREFERENCES); boolean couldBeMultiRef = (multiref.getValue() == unreferenced.getValue()); LOG.error("Unreferenced nodes were not expected. Unreferenced count=" + unreferenced.getValue() + (couldBeMultiRef ? "; could be due to duplicate random numbers" : "")); success = false; } return success; }
/** * Verify that the Counters don't contain values which indicate an outright failure from the Reducers. * * @param counters * The Job's counters * @return True if the "bad" counter objects are 0, false otherwise */ protected boolean verifyUnexpectedValues(Counters counters) { final Counter undefined = counters.findCounter(Counts.UNDEFINED); final Counter lostfamilies = counters.findCounter(Counts.LOST_FAMILIES); boolean success = true; if (undefined.getValue() > 0) { LOG.error("Found an undefined node. Undefined count=" + undefined.getValue()); success = false; } if (lostfamilies.getValue() > 0) { LOG.error("Found nodes which lost big or tiny families, count=" + lostfamilies.getValue()); success = false; } return success; }
@Override public TimingResult call() throws Exception { PerformanceEvaluation.TestOptions opts = PerformanceEvaluation.parseOpts(argv); PerformanceEvaluation.checkTable(admin, opts); PerformanceEvaluation.RunResult results[] = null; long numRows = opts.totalRows; long elapsedTime = 0; if (opts.nomapred) { results = PerformanceEvaluation.doLocalClients(opts, admin.getConfiguration()); for (PerformanceEvaluation.RunResult r : results) { elapsedTime = Math.max(elapsedTime, r.duration); } } else { Job job = PerformanceEvaluation.doMapReduce(opts, admin.getConfiguration()); Counters counters = job.getCounters(); numRows = counters.findCounter(PerformanceEvaluation.Counter.ROWS).getValue(); elapsedTime = counters.findCounter(PerformanceEvaluation.Counter.ELAPSED_TIME).getValue(); } return new TimingResult(numRows, elapsedTime, results); }
public static void verifyCounters(Job normalJob, Job nativeJob, boolean hasCombiner) throws IOException { Counters normalCounters = normalJob.getCounters(); Counters nativeCounters = nativeJob.getCounters(); assertEquals("Counter MAP_OUTPUT_RECORDS should be equal", normalCounters.findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getValue(), nativeCounters.findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getValue()); assertEquals("Counter REDUCE_INPUT_GROUPS should be equal", normalCounters.findCounter(TaskCounter.REDUCE_INPUT_GROUPS).getValue(), nativeCounters.findCounter(TaskCounter.REDUCE_INPUT_GROUPS).getValue()); if (!hasCombiner) { assertEquals("Counter REDUCE_INPUT_RECORDS should be equal", normalCounters.findCounter(TaskCounter.REDUCE_INPUT_RECORDS).getValue(), nativeCounters.findCounter(TaskCounter.REDUCE_INPUT_RECORDS).getValue()); } }
/** * Create an event to record completion of a reduce attempt * @param id Attempt Id * @param taskType Type of task * @param taskStatus Status of the task * @param shuffleFinishTime Finish time of the shuffle phase * @param sortFinishTime Finish time of the sort phase * @param finishTime Finish time of the attempt * @param hostname Name of the host where the attempt executed * @param port RPC port for the tracker host. * @param rackName Name of the rack where the attempt executed * @param state State of the attempt * @param counters Counters for the attempt * @param allSplits the "splits", or a pixelated graph of various * measurable worker node state variables against progress. * Currently there are four; wallclock time, CPU time, * virtual memory and physical memory. */ public ReduceAttemptFinishedEvent (TaskAttemptID id, TaskType taskType, String taskStatus, long shuffleFinishTime, long sortFinishTime, long finishTime, String hostname, int port, String rackName, String state, Counters counters, int[][] allSplits) { this.attemptId = id; this.taskType = taskType; this.taskStatus = taskStatus; this.shuffleFinishTime = shuffleFinishTime; this.sortFinishTime = sortFinishTime; this.finishTime = finishTime; this.hostname = hostname; this.rackName = rackName; this.port = port; this.state = state; this.counters = counters; this.allSplits = allSplits; this.clockSplits = ProgressSplitsBlock.arrayGetWallclockTime(allSplits); this.cpuUsages = ProgressSplitsBlock.arrayGetCPUTime(allSplits); this.vMemKbytes = ProgressSplitsBlock.arrayGetVMemKbytes(allSplits); this.physMemKbytes = ProgressSplitsBlock.arrayGetPhysMemKbytes(allSplits); }
static Counters fromAvro(JhCounters counters) { Counters result = new Counters(); if(counters != null) { for (JhCounterGroup g : counters.getGroups()) { CounterGroup group = result.addGroup(StringInterner.weakIntern(g.getName().toString()), StringInterner.weakIntern(g.getDisplayName().toString())); for (JhCounter c : g.getCounts()) { group.addCounter(StringInterner.weakIntern(c.getName().toString()), StringInterner.weakIntern(c.getDisplayName().toString()), c.getValue()); } } } return result; }
/** * Create an event to record the unsuccessful completion of attempts * @param id Attempt ID * @param taskType Type of the task * @param status Status of the attempt * @param finishTime Finish time of the attempt * @param hostname Name of the host where the attempt executed * @param port rpc port for for the tracker * @param rackName Name of the rack where the attempt executed * @param error Error string * @param counters Counters for the attempt * @param allSplits the "splits", or a pixelated graph of various * measurable worker node state variables against progress. * Currently there are four; wallclock time, CPU time, * virtual memory and physical memory. */ public TaskAttemptUnsuccessfulCompletionEvent (TaskAttemptID id, TaskType taskType, String status, long finishTime, String hostname, int port, String rackName, String error, Counters counters, int[][] allSplits) { this.attemptId = id; this.taskType = taskType; this.status = status; this.finishTime = finishTime; this.hostname = hostname; this.port = port; this.rackName = rackName; this.error = error; this.counters = counters; this.allSplits = allSplits; this.clockSplits = ProgressSplitsBlock.arrayGetWallclockTime(allSplits); this.cpuUsages = ProgressSplitsBlock.arrayGetCPUTime(allSplits); this.vMemKbytes = ProgressSplitsBlock.arrayGetVMemKbytes(allSplits); this.physMemKbytes = ProgressSplitsBlock.arrayGetPhysMemKbytes(allSplits); }
/** * Create an event for successful completion of map attempts * @param id Task Attempt ID * @param taskType Type of the task * @param taskStatus Status of the task * @param mapFinishTime Finish time of the map phase * @param finishTime Finish time of the attempt * @param hostname Name of the host where the map executed * @param port RPC port for the tracker host. * @param rackName Name of the rack where the map executed * @param state State string for the attempt * @param counters Counters for the attempt * @param allSplits the "splits", or a pixelated graph of various * measurable worker node state variables against progress. * Currently there are four; wallclock time, CPU time, * virtual memory and physical memory. * * If you have no splits data, code {@code null} for this * parameter. */ public MapAttemptFinishedEvent (TaskAttemptID id, TaskType taskType, String taskStatus, long mapFinishTime, long finishTime, String hostname, int port, String rackName, String state, Counters counters, int[][] allSplits) { this.attemptId = id; this.taskType = taskType; this.taskStatus = taskStatus; this.mapFinishTime = mapFinishTime; this.finishTime = finishTime; this.hostname = hostname; this.rackName = rackName; this.port = port; this.state = state; this.counters = counters; this.allSplits = allSplits; this.clockSplits = ProgressSplitsBlock.arrayGetWallclockTime(allSplits); this.cpuUsages = ProgressSplitsBlock.arrayGetCPUTime(allSplits); this.vMemKbytes = ProgressSplitsBlock.arrayGetVMemKbytes(allSplits); this.physMemKbytes = ProgressSplitsBlock.arrayGetPhysMemKbytes(allSplits); }
@Test public void typical() throws Exception { Counters counters = new Counters(); counters.getGroup(GROUP).addCounter(COUNTER, COUNTER, 1L); when(job.getCounters()).thenReturn(counters); JobMetrics jobMetrics = new JobMetrics(job, GROUP, COUNTER); Map<String, Long> metrics = jobMetrics.getMetrics(); assertThat(metrics.size(), is(1)); assertThat(metrics.get("group.counter"), is(1L)); assertThat(jobMetrics.getBytesReplicated(), is(1L)); }