/** * Get the active task tracker statuses in the cluster * * @return {@link Collection} of active {@link TaskTrackerStatus} */ // This method is synchronized to make sure that the locking order // "taskTrackers lock followed by faultyTrackers.potentiallyFaultyTrackers // lock" is under JobTracker lock to avoid deadlocks. synchronized public Collection<TaskTrackerStatus> activeTaskTrackers() { Collection<TaskTrackerStatus> activeTrackers = new ArrayList<TaskTrackerStatus>(); synchronized (taskTrackers) { for ( TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); if (!faultyTrackers.isBlacklisted(status.getHost())) { activeTrackers.add(status); } } } return activeTrackers; }
/** * Get the active and blacklisted task tracker names in the cluster. The first * element in the returned list contains the list of active tracker names. * The second element in the returned list contains the list of blacklisted * tracker names. */ // This method is synchronized to make sure that the locking order // "taskTrackers lock followed by faultyTrackers.potentiallyFaultyTrackers // lock" is under JobTracker lock to avoid deadlocks. synchronized public List<List<String>> taskTrackerNames() { List<String> activeTrackers = new ArrayList<String>(); List<String> blacklistedTrackers = new ArrayList<String>(); synchronized (taskTrackers) { for (TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); if (!faultyTrackers.isBlacklisted(status.getHost())) { activeTrackers.add(status.getTrackerName()); } else { blacklistedTrackers.add(status.getTrackerName()); } } } List<List<String>> result = new ArrayList<List<String>>(2); result.add(activeTrackers); result.add(blacklistedTrackers); return result; }
/** * Get the blacklisted task tracker statuses in the cluster * * @return {@link Collection} of blacklisted {@link TaskTrackerStatus} */ // This method is synchronized to make sure that the locking order // "taskTrackers lock followed by faultyTrackers.potentiallyFaultyTrackers // lock" is under JobTracker lock to avoid deadlocks. synchronized public Collection<TaskTrackerStatus> blacklistedTaskTrackers() { Collection<TaskTrackerStatus> blacklistedTrackers = new ArrayList<TaskTrackerStatus>(); synchronized (taskTrackers) { for (TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); if (faultyTrackers.isBlacklisted(status.getHost())) { blacklistedTrackers.add(status); } } } return blacklistedTrackers; }
/** * Adds a new node to the jobtracker. It involves adding it to the expiry * thread and adding it for resolution * * Assumes JobTracker, taskTrackers and trackerExpiryQueue is locked on entry * * @param status Task Tracker's status */ private void addNewTracker(TaskTracker taskTracker) { TaskTrackerStatus status = taskTracker.getStatus(); trackerExpiryQueue.add(status); // Register the tracker if its not registered String hostname = status.getHost(); if (getNode(status.getTrackerName()) == null) { // Making the network location resolution inline .. resolveAndAddToTopology(hostname); } // add it to the set of tracker per host Set<TaskTracker> trackers = hostnameToTaskTracker.get(hostname); if (trackers == null) { trackers = Collections.synchronizedSet(new HashSet<TaskTracker>()); hostnameToTaskTracker.put(hostname, trackers); } statistics.taskTrackerAdded(status.getTrackerName()); getInstrumentation().addTrackers(1); LOG.info("Adding tracker " + status.getTrackerName() + " to host " + hostname); trackers.add(taskTracker); }
private synchronized void refreshHosts() throws IOException { // Reread the config to get mapred.hosts and mapred.hosts.exclude filenames. // Update the file names and refresh internal includes and excludes list LOG.info("Refreshing hosts information"); Configuration conf = new Configuration(); hostsReader.updateFileNames(conf.get("mapred.hosts",""), conf.get("mapred.hosts.exclude", "")); hostsReader.refresh(); Set<String> excludeSet = new HashSet<String>(); for(Map.Entry<String, TaskTracker> eSet : taskTrackers.entrySet()) { String trackerName = eSet.getKey(); TaskTrackerStatus status = eSet.getValue().getStatus(); // Check if not include i.e not in host list or in hosts list but excluded if (!inHostsList(status) || inExcludedHostsList(status)) { excludeSet.add(status.getHost()); // add to rejected trackers } } decommissionNodes(excludeSet); }
synchronized void decommissionNodes(Set<String> hosts) throws IOException { LOG.info("Decommissioning " + hosts.size() + " nodes"); // create a list of tracker hostnames synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { int trackersDecommissioned = 0; for (String host : hosts) { LOG.info("Decommissioning host " + host); Set<TaskTracker> trackers = hostnameToTaskTracker.remove(host); if (trackers != null) { for (TaskTracker tracker : trackers) { LOG.info("Decommission: Losing tracker " + tracker.getTrackerName() + " on host " + host); removeTracker(tracker); } trackersDecommissioned += trackers.size(); } LOG.info("Host " + host + " is ready for decommissioning"); } getInstrumentation().setDecommissionedTrackers(trackersDecommissioned); } } }
public FakeTaskTrackerManager() { JobConf conf = new JobConf(); queueManager = new QueueManager(conf); TaskTracker tt1 = new TaskTracker("tt1"); tt1.setStatus(new TaskTrackerStatus("tt1", "http", "tt1.host", 1, new ArrayList<TaskStatus>(), 0, 0, maxMapTasksPerTracker, maxReduceTasksPerTracker)); trackers.put("tt1", tt1); TaskTracker tt2 = new TaskTracker("tt2"); tt2.setStatus(new TaskTrackerStatus("tt2", "http", "tt2.host", 2, new ArrayList<TaskStatus>(), 0, 0, maxMapTasksPerTracker, maxReduceTasksPerTracker)); trackers.put("tt2", tt2); }
@Override public List<Task> assignTasks(TaskTracker tt) { if (unreserveSlots) { tt.unreserveSlots(TaskType.MAP, fakeJob); tt.unreserveSlots(TaskType.REDUCE, fakeJob); } else { int currCount = 1; if (reservedCounts.containsKey(tt)) { currCount = reservedCounts.get(tt) + 1; } reservedCounts.put(tt, currCount); tt.reserveSlots(TaskType.MAP, fakeJob, currCount); tt.reserveSlots(TaskType.REDUCE, fakeJob, currCount); } return new ArrayList<Task>(); }
/** * Test that verifies default values are configured and reported correctly. * * @throws Exception */ public void testDefaultResourceValues() throws Exception { JobConf conf = new JobConf(); try { // Memory values are disabled by default. conf.setClass( org.apache.hadoop.mapred.TaskTracker.TT_RESOURCE_CALCULATOR_PLUGIN, DummyResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class); setUpCluster(conf); JobConf jobConf = miniMRCluster.createJobConf(); jobConf.setClass( org.apache.hadoop.mapred.TaskTracker.TT_RESOURCE_CALCULATOR_PLUGIN, DummyResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class); runSleepJob(jobConf); verifyTestResults(); } finally { tearDownCluster(); } }
public FakeTaskTrackerManager() { TaskTracker tt1 = new TaskTracker("tt1"); tt1.setStatus(new TaskTrackerStatus("tt1", "http", "tt1.host", 1, new ArrayList<TaskStatus>(), 0, 0, maxMapTasksPerTracker, maxReduceTasksPerTracker)); trackers.put("tt1", tt1); TaskTracker tt2 = new TaskTracker("tt2"); tt2.setStatus(new TaskTrackerStatus("tt2", "http", "tt2.host", 2, new ArrayList<TaskStatus>(), 0, 0, maxMapTasksPerTracker, maxReduceTasksPerTracker)); trackers.put("tt2", tt2); }
/** * Adds a new node to the jobtracker. It involves adding it to the expiry * thread and adding it for resolution * * Assumes JobTracker, taskTrackers and trackerExpiryQueue is locked on entry * * @param taskTracker Task Tracker */ void addNewTracker(TaskTracker taskTracker) { TaskTrackerStatus status = taskTracker.getStatus(); trackerExpiryQueue.add(status); // Register the tracker if its not registered String hostname = status.getHost(); if (getNode(status.getTrackerName()) == null) { // Making the network location resolution inline .. resolveAndAddToTopology(hostname); } // add it to the set of tracker per host Set<TaskTracker> trackers = hostnameToTaskTracker.get(hostname); if (trackers == null) { trackers = Collections.synchronizedSet(new HashSet<TaskTracker>()); hostnameToTaskTracker.put(hostname, trackers); } statistics.taskTrackerAdded(status.getTrackerName()); getInstrumentation().addTrackers(1); LOG.info("Adding tracker " + status.getTrackerName() + " to host " + hostname); trackers.add(taskTracker); }
private synchronized void refreshHosts() throws IOException { // Reread the config to get mapred.hosts and mapred.hosts.exclude filenames. // Update the file names and refresh internal includes and excludes list LOG.info("Refreshing hosts information"); Configuration conf = new Configuration(); hostsReader.updateFileNames(conf.get("mapred.hosts",""), conf.get("mapred.hosts.exclude", "")); hostsReader.refresh(); Set<String> excludeSet = new HashSet<String>(); for(Map.Entry<String, TaskTracker> eSet : taskTrackers.entrySet()) { String trackerName = eSet.getKey(); TaskTrackerStatus status = eSet.getValue().getStatus(); // Check if not include i.e not in host list or in hosts list but excluded if (!inHostsList(status) || inExcludedHostsList(status)) { excludeSet.add(status.getHost()); // add to rejected trackers } } decommissionNodes(excludeSet); int totalExcluded = hostsReader.getExcludedHosts().size(); getInstrumentation().setDecommissionedTrackers(totalExcluded); }
synchronized void decommissionNodes(Set<String> hosts) throws IOException { LOG.info("Decommissioning " + hosts.size() + " nodes"); // create a list of tracker hostnames synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { int trackersDecommissioned = 0; for (String host : hosts) { LOG.info("Decommissioning host " + host); Set<TaskTracker> trackers = hostnameToTaskTracker.remove(host); if (trackers != null) { for (TaskTracker tracker : trackers) { LOG.info("Decommission: Losing tracker " + tracker.getTrackerName() + " on host " + host); removeTracker(tracker); } trackersDecommissioned += trackers.size(); } LOG.info("Host " + host + " is ready for decommissioning"); } } } }
/** * Returns a set of dead nodes. (nodes that are expected to be alive) */ public Collection<String> getDeadNodes() { List<String> activeHosts = new ArrayList<String>(); synchronized(taskTrackers) { for (TaskTracker tt : taskTrackers.values()) { activeHosts.add(tt.getStatus().getHost()); } } // dead hosts are the difference between active and known hosts // We don't consider a blacklisted host to be dead. Set<String> knownHosts = new HashSet<String>(hostsReader.getHosts()); knownHosts.removeAll(activeHosts); // Also remove the excluded nodes as getHosts() returns them as well knownHosts.removeAll(hostsReader.getExcludedHosts()); Set<String> deadHosts = knownHosts; return deadHosts; }
public FakeTaskTrackerManager() { JobConf conf = new JobConf(); queueManager = new QueueManager(conf); TaskTracker tt1 = new TaskTracker("tt1"); tt1.setStatus(new TaskTrackerStatus("tt1", "tt1.host", 1, new ArrayList<TaskStatus>(), 0, maxMapTasksPerTracker, maxReduceTasksPerTracker)); trackers.put("tt1", tt1); TaskTracker tt2 = new TaskTracker("tt2"); tt2.setStatus(new TaskTrackerStatus("tt2", "tt2.host", 2, new ArrayList<TaskStatus>(), 0, maxMapTasksPerTracker, maxReduceTasksPerTracker)); trackers.put("tt2", tt2); }
/** * Test that verifies default values are configured and reported correctly. * * @throws Exception */ @Test(timeout=60000) public void testDefaultResourceValues() throws Exception { JobConf conf = new JobConf(); try { // Memory values are disabled by default. conf.setClass( org.apache.hadoop.mapred.TaskTracker.MAPRED_TASKTRACKER_MEMORY_CALCULATOR_PLUGIN_PROPERTY, DummyResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class); setUpCluster(conf); JobConf jobConf = miniMRCluster.createJobConf(); jobConf.setClass( org.apache.hadoop.mapred.TaskTracker.MAPRED_TASKTRACKER_MEMORY_CALCULATOR_PLUGIN_PROPERTY, DummyResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class); runSleepJob(jobConf); verifyTestResults(); } finally { tearDownCluster(); } }
private int getClosestLocality(TaskTracker taskTracker, RawSplit split) { int locality = 2; Node taskTrackerNode = jobtracker .getNode(taskTracker.getStatus().getHost()); if (taskTrackerNode == null) { throw new IllegalArgumentException( "Cannot determine network topology node for TaskTracker " + taskTracker.getTrackerName()); } for (String location : split.getLocations()) { Node dataNode = jobtracker.getNode(location); if (dataNode == null) { throw new IllegalArgumentException( "Cannot determine network topology node for split location " + location); } locality = Math.min(locality, jobtracker.clusterMap.getDistance( taskTrackerNode, dataNode)); } return locality; }
/** * Given the reduce taskAttemptID, returns the TaskAttemptInfo. Deconstructs * the reduce taskAttemptID and looks up the jobStory with the parts taskType, * id of task, id of task attempt. * * @param taskTracker * tasktracker * @param taskAttemptID * task-attempt * @return TaskAttemptInfo for the reduce task-attempt */ private TaskAttemptInfo getReduceTaskAttemptInfo(TaskTracker taskTracker, TaskAttemptID taskAttemptID) { assert (!taskAttemptID.isMap()); TaskID taskId = taskAttemptID.getTaskID(); TaskType taskType; if (taskAttemptID.isMap()) { taskType = TaskType.MAP; } else { taskType = TaskType.REDUCE; } TaskAttemptInfo taskAttemptInfo = jobStory.getTaskAttemptInfo(taskType, taskId.getId(), taskAttemptID.getId()); if (LOG.isDebugEnabled()) { LOG.debug("get an attempt: " + taskAttemptID.toString() + ", state=" + taskAttemptInfo.getRunState() + ", runtime=" + ((taskAttemptID.isMap()) ? taskAttemptInfo.getRuntime() : ((ReduceTaskAttemptInfo) taskAttemptInfo).getReduceRuntime())); } return taskAttemptInfo; }
synchronized private Collection<TaskTrackerStatus> blackOrGraylistedTaskTrackers(boolean gray) { Collection<TaskTrackerStatus> listedTrackers = new ArrayList<TaskTrackerStatus>(); synchronized (taskTrackers) { for (TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); boolean listed = gray? faultyTrackers.isGraylisted(status.getHost()) : faultyTrackers.isBlacklisted(status.getHost()); if (listed) { listedTrackers.add(status); } } } return listedTrackers; }
/** * Adds a new node to the jobtracker. It involves adding it to the expiry * thread and adding it for resolution * * Assumes JobTracker, taskTrackers and trackerExpiryQueue is locked on entry * * @param status Task Tracker's status */ private void addNewTracker(TaskTracker taskTracker) throws UnknownHostException { TaskTrackerStatus status = taskTracker.getStatus(); trackerExpiryQueue.add(status); // Register the tracker if its not registered String hostname = status.getHost(); if (getNode(status.getTrackerName()) == null) { // Making the network location resolution inline .. resolveAndAddToTopology(hostname); } // add it to the set of tracker per host Set<TaskTracker> trackers = hostnameToTaskTracker.get(hostname); if (trackers == null) { trackers = Collections.synchronizedSet(new HashSet<TaskTracker>()); hostnameToTaskTracker.put(hostname, trackers); } statistics.taskTrackerAdded(status.getTrackerName()); getInstrumentation().addTrackers(1); LOG.info("Adding tracker " + status.getTrackerName() + " to host " + hostname); trackers.add(taskTracker); }
private void removeTracker(TaskTracker tracker) { String trackerName = tracker.getTrackerName(); String hostName = JobInProgress.convertTrackerNameToHostName(trackerName); // Remove completely after marking the tasks as 'KILLED' lostTaskTracker(tracker); // tracker is lost; if it is blacklisted and/or graylisted, remove // it from the relevant count(s) of trackers in the cluster if (isBlacklisted(trackerName)) { LOG.info("Removing " + hostName + " from blacklist"); faultyTrackers.decrBlacklistedTrackers(1); } if (isGraylisted(trackerName)) { LOG.info("Removing " + hostName + " from graylist"); faultyTrackers.decrGraylistedTrackers(1); } updateTaskTrackerStatus(trackerName, null); statistics.taskTrackerRemoved(trackerName); getInstrumentation().decTrackers(1); }
public synchronized void collect(TaskInProgress tip, TaskAttemptID taskId, TaskTracker taskTracker, long now) { List<String> diagnostics = tip.getDiagnosticInfo(taskId); if (diagnostics == null || diagnostics.isEmpty()) { incErrorCounts(UNKNOWN_ERROR, taskTracker, now); return; } String latestDiagnostic = diagnostics.get(diagnostics.size() - 1); latestDiagnostic = latestDiagnostic.replace("\n", " "); boolean found = false; for (TaskError error : knownErrors.values()) { String p = error.pattern.toString(); if (error.pattern.matcher(latestDiagnostic).matches()) { incErrorCounts(error, taskTracker, now); found = true; break; } } if (!found) { LOG.info("Undefined diagnostic info:" + latestDiagnostic); incErrorCounts(UNKNOWN_ERROR, taskTracker, now); } }