public TaskInProgress(Task task, JobConf conf, TaskLauncher launcher) { this.task = task; this.launcher = launcher; this.lastProgressReport = System.currentTimeMillis(); this.ttConf = conf; localJobConf = null; taskStatus = TaskStatus.createTaskStatus(task.isMapTask(), task.getTaskID(), 0.0f, task.getNumSlotsRequired(), task.getState(), diagnosticInfo.toString(), "initializing", getName(), task.isTaskCleanupTask() ? TaskStatus.Phase.CLEANUP : task.isMapTask()? TaskStatus.Phase.MAP: TaskStatus.Phase.SHUFFLE, task.getCounters()); taskTimeout = (10 * 60 * 1000); }
private void setTaskFailState(boolean wasFailure) { // go FAILED_UNCLEAN -> FAILED and KILLED_UNCLEAN -> KILLED always if (taskStatus.getRunState() == TaskStatus.State.FAILED_UNCLEAN) { taskStatus.setRunState(TaskStatus.State.FAILED); } else if (taskStatus.getRunState() == TaskStatus.State.KILLED_UNCLEAN) { taskStatus.setRunState(TaskStatus.State.KILLED); } else if (task.isMapOrReduce() && taskStatus.getPhase() != TaskStatus.Phase.CLEANUP) { if (wasFailure) { taskStatus.setRunState(TaskStatus.State.FAILED_UNCLEAN); } else { taskStatus.setRunState(TaskStatus.State.KILLED_UNCLEAN); } } else { if (wasFailure) { taskStatus.setRunState(TaskStatus.State.FAILED); } else { taskStatus.setRunState(TaskStatus.State.KILLED); } } }
public void processingRate(TaskAttemptID taskId, Task.Counter counterName, long counterValue, float progress, Phase p) { TaskInProgress tip = jobtracker.taskidToTIPMap.get(taskId); Counters counters = tip.getCounters(); if(tip.isMapTask()) { assert p == Phase.MAP : "Map task but phase is " + p; } else { assert ((p != Phase.SHUFFLE) && (p != Phase.SORT) && (p != Phase.REDUCE)) : "Reduce task, but phase is " + p; } TaskStatus status = TaskStatus.createTaskStatus(tip.isMapTask(), taskId, progress, 1, TaskStatus.State.RUNNING, "", "", tip.machineWhereTaskRan(taskId), p , counters); //need to keep the time TaskStatus oldStatus = tip.getTaskStatus(taskId); status.setStartTime(oldStatus.getStartTime()); if(!tip.isMapTask()) { status.setShuffleFinishTime(oldStatus.getShuffleFinishTime()); status.setSortFinishTime(oldStatus.getSortFinishTime()); } tip.getCounters().findCounter(counterName).setValue(counterValue); updateTaskStatus(tip, status); LOG.info(tip.getCounters().toString()); }
public TaskInProgress(Task task, JobConf conf, TaskLauncher launcher) { this.task = task; this.launcher = launcher; this.lastProgressReport = System.currentTimeMillis(); this.defaultJobConf = conf; localJobConf = null; taskStatus = TaskStatus.createTaskStatus(task.isMapTask(), task.getTaskID(), 0.0f, task.getState(), diagnosticInfo.toString(), "initializing", getName(), task.isTaskCleanupTask() ? TaskStatus.Phase.CLEANUP : task.isMapTask()? TaskStatus.Phase.MAP: TaskStatus.Phase.SHUFFLE, task.getCounters()); taskTimeout = (10 * 60 * 1000); }
public TaskInProgress(Task task, JobConf conf, TaskLauncher launcher) { this.task = task; this.launcher = launcher; this.lastProgressReport = System.currentTimeMillis(); this.defaultJobConf = conf; localJobConf = null; taskStatus = TaskStatus.createTaskStatus(task.isMapTask(), task.getTaskID(), 0.0f, task.getState(), diagnosticInfo.toString(), "initializing", getName(), task.isTaskCleanupTask() ? TaskStatus.Phase.CLEANUP : task.isMapTask()? TaskStatus.Phase.MAP: TaskStatus.Phase.SHUFFLE, task.runOnGPU(), task.getCounters()); taskTimeout = (10 * 60 * 1000); }
private List <FetchStatus> reducesInShuffle() { List <FetchStatus> fList = new ArrayList<FetchStatus>(); for (Map.Entry <JobID, RunningJob> item : runningJobs.entrySet()) { RunningJob rjob = item.getValue(); if (!rjob.localized) { continue; } JobID jobId = item.getKey(); FetchStatus f; synchronized (rjob) { f = rjob.getFetchStatus(); for (TaskInProgress tip : rjob.tasks) { Task task = tip.getTask(); if (!task.isMapTask()) { if (((ReduceTask)task).getPhase() == TaskStatus.Phase.SHUFFLE) { if (rjob.getFetchStatus() == null) { //this is a new job; we start fetching its map events f = new FetchStatus(jobId, ((ReduceTask)task).getNumMaps()); rjob.setFetchStatus(f); } f = rjob.getFetchStatus(); fList.add(f); break; //no need to check any more tasks belonging to this } } } } } //at this point, we have information about for which of //the running jobs do we need to query the jobtracker for map //outputs (actually map events). return fList; }
public void finishTask(TaskAttemptID taskId) { TaskInProgress tip = jobtracker.taskidToTIPMap.get(taskId); TaskStatus status = TaskStatus.createTaskStatus(tip.isMapTask(), taskId, 1.0f, 1, TaskStatus.State.SUCCEEDED, "", "", tip.machineWhereTaskRan(taskId), tip.isMapTask() ? Phase.MAP : Phase.REDUCE, new Counters()); updateTaskStatus(tip, status); }
private void makeRunning(TaskAttemptID taskId, TaskInProgress tip, String taskTracker) { addRunningTaskToTIP(tip, taskId, new TaskTrackerStatus(taskTracker, JobInProgress.convertTrackerNameToHostName(taskTracker)), true); TaskStatus status = TaskStatus.createTaskStatus(tip.isMapTask(), taskId, 0.0f, 1, TaskStatus.State.RUNNING, "", "", taskTracker, tip.isMapTask() ? Phase.MAP : Phase.REDUCE, new Counters()); updateTaskStatus(tip, status); }
public void progressMade(TaskAttemptID taskId, float progress) { TaskInProgress tip = jobtracker.taskidToTIPMap.get(taskId); TaskStatus status = TaskStatus.createTaskStatus(tip.isMapTask(), taskId, progress, 1, TaskStatus.State.RUNNING, "", "", tip.machineWhereTaskRan(taskId), tip.isMapTask() ? Phase.MAP : Phase.REDUCE, new Counters()); updateTaskStatus(tip, status); }
public double getRate(Phase p) { if (p == Phase.MAP) { return this.mapRate; } else if (p == Phase.SHUFFLE) { return this.copyRate; } else if (p == Phase.SORT) { return this.sortRate; } else if (p == Phase.REDUCE) { return this.reduceRate; } else { throw new RuntimeException("Invalid phase " + p); } }
/** * Get the processing rate for this task (e.g. bytes/ms in reduce) */ public double getProcessingRate(TaskStatus.Phase phase) { // we don't have processing rate information for the starting and cleaning // up phase if (phase != TaskStatus.Phase.MAP && phase != TaskStatus.Phase.SHUFFLE && phase != TaskStatus.Phase.SORT && phase != TaskStatus.Phase.REDUCE) { return 0; } return processingRates.getRate(getProcessingPhase()); }
/** * For the map task, using the bytes processed/sec as the processing rate * For the reduce task, using different rate for different phase: * copy: using the bytes copied/sec as the processing rate * sort: using the accumulated progress rate as the processing rate * reduce: using the the bytes processed/sec as the processing rate * @param currentTime * @return */ boolean canBeSpeculatedUsingProcessingRate(long currentTime) { TaskStatus.Phase p = getProcessingPhase(); // check if the task is on one of following four phases if ((p != TaskStatus.Phase.MAP) && (p != TaskStatus.Phase.SHUFFLE) && (p != TaskStatus.Phase.SORT) && (p != TaskStatus.Phase.REDUCE)) { return false; } DataStatistics taskStats = job.getRunningTaskStatistics(p); if (LOG.isDebugEnabled()) { LOG.debug("TaskID: " + this.id + "processing phase is " + p + " and processing rate for this phase is " + getProcessingRate(p)); } // Find if task should be speculated based on standard deviation // the max difference allowed between the tasks's progress rate // and the mean progress rate of sibling tasks. double maxDiff = (taskStats.std() == 0 ? taskStats.mean()/3 : job.getSlowTaskThreshold() * taskStats.std()); // if stddev > mean - we are stuck. cap the max difference at a // more meaningful number. maxDiff = Math.min(maxDiff, taskStats.mean() * job.getStddevMeanRatioMax()); return (taskStats.mean() - processingRates.getRate(p) > maxDiff); }
private void makeRunning(TaskAttemptID taskId, TaskInProgress tip, String taskTracker, long startTime) { Phase phase = tip.isMapTask() ? Phase.MAP : Phase.REDUCE; addRunningTaskToTIP(tip, taskId, new TaskTrackerStatus(taskTracker, JobInProgress.convertTrackerNameToHostName(taskTracker)), true); TaskStatus status = TaskStatus.createTaskStatus(tip.isMapTask(), taskId, 0.0f, 1, TaskStatus.State.RUNNING, "", "", taskTracker, phase, new Counters()); status.setStartTime(startTime); updateTaskStatus(tip, status); }
public void failTask(TaskAttemptID taskId) { TaskInProgress tip = jobtracker.taskidToTIPMap.get(taskId); TaskStatus status = TaskStatus.createTaskStatus(tip.isMapTask(), taskId, 1.0f, 1, TaskStatus.State.FAILED, "", "", tip .machineWhereTaskRan(taskId), tip.isMapTask() ? Phase.MAP : Phase.REDUCE, new Counters()); updateTaskStatus(tip, status); }
public void killTask(TaskAttemptID taskId) { TaskInProgress tip = jobtracker.taskidToTIPMap.get(taskId); TaskStatus status = TaskStatus.createTaskStatus(tip.isMapTask(), taskId, 1.0f, 1, TaskStatus.State.KILLED, "", "", tip .machineWhereTaskRan(taskId), tip.isMapTask() ? Phase.MAP : Phase.REDUCE, new Counters()); updateTaskStatus(tip, status); }
public void parseMap(Map<String, Object> trackerInfo) { active = (Boolean) trackerInfo.get("active"); lastSeen = (Long) trackerInfo.get("last_seen"); maxMapTasks = ((Long) trackerInfo.get("map_tasks_max")).intValue(); maxReduceTasks = ((Long) trackerInfo.get("reduce_tasks_max")).intValue(); Object[] tasks = (Object[]) trackerInfo.get("tasks"); for (Object task : tasks) { Map<String, Object> taskMap = (Map<String, Object>) task; int jobId = ((Long) taskMap.get("job_id")).intValue(); int taskId = ((Long) taskMap.get("task_id")).intValue(); int attempt = ((Long) taskMap.get("attempt")).intValue(); boolean map = taskMap.get("type").equals("map"); double taskProgress = (Double) taskMap.get("progress"); long startTime = (Long) taskMap.get("start_time"); long runningTime = (Long) taskMap.get("running_time"); TaskStatus.State taskState = TaskStatus.State.valueOf(taskMap.get("state").toString()); TaskStatus.Phase taskPhase = TaskStatus.Phase.valueOf(taskMap.get("phase").toString()); TaskInfo taskInfo = new TaskInfo(jobId, taskId, attempt, map, startTime, runningTime, taskProgress, taskPhase, taskState); if (map && taskState == TaskStatus.State.SUCCEEDED || taskState == TaskStatus.State.RUNNING) { totalMapTasks++; } localTasksInfo.add(taskInfo); } }
public TaskInfo(int jobId, int taskId, int attempt, boolean map, long startTime, long runningTime, double taskProgress, Phase taskPhase, State taskState) { this.jobId = jobId; this.taskId = taskId; this.attempt = attempt; this.map = map; this.startTime = startTime; this.taskProgress = taskProgress; this.taskPhase = taskPhase; this.taskState = taskState; this.runningTime = runningTime; }
/** * Kills a task attempt. * * @param action contains the task attempt to kill * @param now current simulation time * @return new events generated in response, empty */ private List<SimulatorEvent> handleKillTaskAction(KillTaskAction action, long now) { TaskAttemptID taskId = action.getTaskID(); // we don't have a nice(r) toString() in Hadoop's TaskActions if (LOG.isDebugEnabled()) { LOG.debug("Handling kill task action, taskId=" + taskId + ", now=" + now); } SimulatorTaskInProgress tip = tasks.get(taskId); // Safety check: We might get a KillTaskAction even for completed reduces if (tip == null) { return SimulatorEngine.EMPTY_EVENTS; } progressTaskStatus(tip, now); // make progress up to date TaskStatus finalStatus = (TaskStatus)tip.getTaskStatus().clone(); finalStatus.setFinishTime(now); finalStatus.setRunState(State.KILLED); finishRunningTask(finalStatus, now); if (finalStatus.getIsMap() || finalStatus.getPhase() == Phase.REDUCE) { // if we have already created a task attempt completion event we remember // the task id, so that we can safely ignore the event when its delivered orphanTaskCompletions.add(taskId); } return SimulatorEngine.EMPTY_EVENTS; }
/** * Starts "running" the REDUCE phase of reduce upon being notified that * all map tasks are (successfully) done. * * @param action contains the notification for one of the reduce tasks * @param now current simulation time * @return new events generated, a single TaskAttemptCompletionEvent for the * reduce */ private List<SimulatorEvent> handleAllMapsCompletedTaskAction( AllMapsCompletedTaskAction action, long now) { if (LOG.isDebugEnabled()) { LOG.debug("Handling all maps completed task action " + action); } TaskAttemptID taskId = action.getTaskID(); SimulatorTaskInProgress tip = tasks.get(taskId); // If tip is null here it is because the task attempt to be notified is // unknown to this TaskTracker. TaskStatus status = tip.getTaskStatus(); if (status.getIsMap()) { throw new IllegalStateException( "Invalid AllMapsCompletedTaskAction, task attempt " + "to be notified is a map: " + taskId + " " + status); } if (status.getPhase() != Phase.SHUFFLE) { throw new IllegalArgumentException( "Reducer task attempt already notified: " + taskId + " " + status); } // Warning: setPhase() uses System.currentTimeMillis() internally to // set shuffle and sort times, but we overwrite that manually anyway status.setPhase(Phase.REDUCE); status.setShuffleFinishTime(now); status.setSortFinishTime(now); // Forecast the completion of this reduce TaskAttemptCompletionEvent e = createTaskAttemptCompletionEvent(tip, now); return Collections.<SimulatorEvent>singletonList(e); }
public void expectMapTask(SimulatorTaskTracker taskTracker, TaskAttemptID taskId, long mapStart, long mapRuntime) { long mapDone = mapStart + mapRuntime; org.apache.hadoop.mapred.TaskAttemptID taskIdOldApi = org.apache.hadoop.mapred.TaskAttemptID.downgrade(taskId); MapTaskStatus status = new MapTaskStatus(taskIdOldApi, 1.0f, 1, State.SUCCEEDED, null, null, null, Phase.MAP, null); status.setFinishTime(mapDone); TaskAttemptCompletionEvent completionEvent = new TaskAttemptCompletionEvent(taskTracker, status); addExpected(mapStart, completionEvent); }
public void expectReduceTask(SimulatorTaskTracker taskTracker, TaskAttemptID taskId, long mapDone, long reduceRuntime) { long reduceDone = mapDone + reduceRuntime; org.apache.hadoop.mapred.TaskAttemptID taskIdOldApi = org.apache.hadoop.mapred.TaskAttemptID.downgrade(taskId); ReduceTaskStatus status = new ReduceTaskStatus(taskIdOldApi, 1.0f, 1, State.SUCCEEDED, null, null, null, Phase.REDUCE, null); status.setFinishTime(reduceDone); TaskAttemptCompletionEvent completionEvent = new TaskAttemptCompletionEvent(taskTracker, status); addExpected(mapDone, completionEvent); }
int findLaunchTaskActions(HeartbeatResponse response) { TaskTrackerAction[] actions = response.getActions(); int numLaunchTaskActions = 0; // HashSet<> numLaunchTaskActions for (TaskTrackerAction action : actions) { if (action instanceof SimulatorLaunchTaskAction) { Task task = ((SimulatorLaunchTaskAction) action).getTask(); numLaunchTaskActions++; TaskAttemptID taskId = task.getTaskID(); if (tasks.containsKey(taskId)) { // already have this task..do not need to generate new status continue; } TaskStatus status; if (task.isMapTask()) { status = new MapTaskStatus(taskId, 0f, 1, State.RUNNING, "", "", taskTrackerName, Phase.MAP, new Counters()); } else { status = new ReduceTaskStatus(taskId, 0f, 1, State.RUNNING, "", "", taskTrackerName, Phase.SHUFFLE, new Counters()); } status.setRunState(State.SUCCEEDED); status.setStartTime(this.now); SimulatorTaskInProgress tip = new SimulatorTaskInProgress( (SimulatorLaunchTaskAction) action, status, this.now); tasks.put(taskId, tip); } } return numLaunchTaskActions; }
boolean fetchFailureNotification( TaskAttemptID reportingAttempt, TaskInProgress tip, TaskAttemptID mapAttemptId, String trackerName) { jobStats.incNumMapFetchFailures(); synchronized (lockObject) { Integer fetchFailures = mapTaskIdToFetchFailuresMap.get(mapAttemptId); fetchFailures = (fetchFailures == null) ? 1 : (fetchFailures + 1); mapTaskIdToFetchFailuresMap.put(mapAttemptId, fetchFailures); LOG.info("Failed fetch notification #" + fetchFailures + " by " + reportingAttempt + " for task " + mapAttemptId + " tracker " + trackerName); float failureRate = (float)fetchFailures / runningReduceTasks; // declare faulty if fetch-failures >= max-allowed-failures final boolean isMapFaulty = (failureRate >= MAX_ALLOWED_FETCH_FAILURES_PERCENT) || fetchFailures > maxFetchFailuresPerMapper; if (fetchFailures >= MAX_FETCH_FAILURES_NOTIFICATIONS && isMapFaulty) { String reason = "Too many fetch-failures (" + fetchFailures + ") at " + new Date(); LOG.info(reason + " for " + mapAttemptId + " ... killing it"); final boolean isFailed = true; TaskTrackerInfo ttStatus = null; jobStats.incNumMapTasksFailedByFetchFailures(); failedTask(tip, mapAttemptId, reason, (tip.isMapTask() ? TaskStatus.Phase.MAP : TaskStatus.Phase.REDUCE), isFailed, trackerName, ttStatus); mapTaskIdToFetchFailuresMap.remove(mapAttemptId); return true; } } return false; }