/** * Returns the count for the given counter name in the counter group * 'MultiStoreCounters' * * @param job the MR job * @param jobClient the Hadoop job client * @param counterName the counter name * @return the count of the given counter name */ @SuppressWarnings("deprecation") public static long getMultiStoreCount(Job job, JobClient jobClient, String counterName) { long value = -1; try { RunningJob rj = jobClient.getJob(job.getAssignedJobID()); if (rj != null) { Counters.Counter counter = rj.getCounters().getGroup( MULTI_STORE_COUNTER_GROUP).getCounterForName(counterName); value = counter.getValue(); } } catch (IOException e) { LOG.warn("Failed to get the counter for " + counterName, e); } return value; }
@SuppressWarnings("deprecation") JobStats addJobStats(Job job) { MapReduceOper mro = jobMroMap.get(job); if (mro == null) { LOG.warn("unable to get MR oper for job: " + job.toString()); return null; } JobStats js = mroJobMap.get(mro); JobID jobId = job.getAssignedJobID(); js.setId(jobId); js.setAlias(mro); js.setConf(job.getJobConf()); return js; }
void setBackendException(Job job, Exception e) { if (e instanceof PigException) { LOG.error("ERROR " + ((PigException)e).getErrorCode() + ": " + e.getLocalizedMessage()); } else if (e != null) { LOG.error("ERROR: " + e.getLocalizedMessage()); } if (job.getAssignedJobID() == null || e == null) { LOG.debug("unable to set backend exception"); return; } String id = job.getAssignedJobID().toString(); Iterator<JobStats> iter = jobPlan.iterator(); while (iter.hasNext()) { JobStats js = iter.next(); if (id.equals(js.getJobId())) { js.setBackendException(e); break; } } }
/** * Moves all the results of a collection of MR jobs to the final * output directory. Some of the results may have been put into a * temp location to work around restrictions with multiple output * from a single map reduce job. * * This method should always be called after the job execution * completes. */ public void moveResults(List<Job> completedJobs) throws IOException { for (Job job: completedJobs) { Pair<List<POStore>, Path> pair = jobStoreMap.get(job); if (pair != null && pair.second != null) { Path tmp = pair.second; Path abs = new Path(tmp, "abs"); Path rel = new Path(tmp, "rel"); FileSystem fs = tmp.getFileSystem(conf); if (fs.exists(abs)) { moveResults(abs, abs.toUri().getPath(), fs); } if (fs.exists(rel)) { moveResults(rel, rel.toUri().getPath()+"/", fs); } } } }
@Override public void run() { try { log.debug("Receive kill signal"); if (jc!=null) { for (Job job : jc.getRunningJobs()) { RunningJob runningJob = job.getJobClient().getJob(job.getAssignedJobID()); if (runningJob!=null) runningJob.killJob(); log.info("Job " + job.getJobID() + " killed"); } } } catch (Exception e) { log.warn("Encounter exception on cleanup:" + e); } }
/** * If stop_on_failure is enabled and any job has failed, an ExecException is thrown. * @param stop_on_failure whether it's enabled. * @throws ExecException If stop_on_failure is enabled and any job is failed */ private void checkStopOnFailure(boolean stop_on_failure) throws ExecException{ if (jc.getFailedJobs().isEmpty()) return; if (stop_on_failure){ int errCode = 6017; StringBuilder msg = new StringBuilder(); for (int i=0; i<jc.getFailedJobs().size(); i++) { Job j = jc.getFailedJobs().get(i); msg.append(j.getMessage()); if (i!=jc.getFailedJobs().size()-1) { msg.append("\n"); } } throw new ExecException(msg.toString(), errCode, PigException.REMOTE_ENVIRONMENT); } }
@Test public void testDefaultParallel() throws Throwable { pc.defaultParallel = 100; String query = "a = load 'input';" + "b = group a by $0;" + "store b into 'output';"; PigServer ps = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); PhysicalPlan pp = Util.buildPp(ps, query); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); ConfigurationValidator.validatePigProperties(pc.getProperties()); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jobControl = jcc.compile(mrPlan, "Test"); Job job = jobControl.getWaitingJobs().get(0); int parallel = job.getJobConf().getNumReduceTasks(); assertEquals(100, parallel); Util.assertParallelValues(100, -1, -1, 100, job.getJobConf()); pc.defaultParallel = -1; }
/** * Test parallelism for group by constant * @throws Throwable */ @Test public void testGroupConstWithParallel() throws Throwable { PigContext pc = new PigContext(ExecType.MAPREDUCE, cluster.getProperties()); pc.defaultParallel = 100; pc.connect(); String query = "a = load 'input';\n" + "b = group a by 1;" + "store b into 'output';"; PigServer pigServer = new PigServer( ExecType.MAPREDUCE, cluster.getProperties() ); PhysicalPlan pp = Util.buildPp( pigServer, query ); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); ConfigurationValidator.validatePigProperties(pc.getProperties()); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jobControl = jcc.compile(mrPlan, "Test"); Job job = jobControl.getWaitingJobs().get(0); int parallel = job.getJobConf().getNumReduceTasks(); assertEquals("parallism", 1, parallel); }
/** * Test parallelism for group by column * @throws Throwable */ @Test public void testGroupNonConstWithParallel() throws Throwable { PigContext pc = new PigContext(ExecType.MAPREDUCE, cluster.getProperties()); pc.defaultParallel = 100; pc.connect(); PigServer pigServer = new PigServer( ExecType.MAPREDUCE, cluster.getProperties() ); String query = "a = load 'input';\n" + "b = group a by $0;" + "store b into 'output';"; PhysicalPlan pp = Util.buildPp( pigServer, query ); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); ConfigurationValidator.validatePigProperties(pc.getProperties()); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jobControl = jcc.compile(mrPlan, "Test"); Job job = jobControl.getWaitingJobs().get(0); int parallel = job.getJobConf().getNumReduceTasks(); assertEquals("parallism", 100, parallel); }
/** * Returns the count for the given counter name in the counter group * 'MultiStoreCounters' * * @param job the MR job * @param jobClient the Hadoop job client * @param counterName the counter name * @return the count of the given counter name */ public static long getMultiStoreCount(Job job, JobClient jobClient, String counterName) { long value = -1; try { RunningJob rj = jobClient.getJob(job.getAssignedJobID()); if (rj != null) { Counters.Counter counter = rj.getCounters().getGroup( MULTI_STORE_COUNTER_GROUP).getCounterForName(counterName); value = counter.getValue(); } } catch (IOException e) { LOG.warn("Failed to get the counter for " + counterName, e); } return value; }
private static MRJobStats addSuccessJobStats(SimplePigStats ps, Job job) { if (ps.isJobSeen(job)) return null; MRJobStats js = ps.addMRJobStats(job); if (js == null) { LOG.warn("unable to add job stats"); } else { js.setSuccessful(true); js.addMapReduceStatistics(job); js.addCounters(job); js.addOutputStatistics(); js.addInputStatistics(); } return js; }
private boolean okToRunLocal(org.apache.hadoop.mapreduce.Job job, MapReduceOper mro, List<POLoad> lds) throws IOException { Configuration conf = job.getConfiguration(); if(!conf.getBoolean(PigConfiguration.PIG_AUTO_LOCAL_ENABLED, false)) { return false; } long inputByteMax = conf.getLong(PigConfiguration.PIG_AUTO_LOCAL_INPUT_MAXBYTES, 100*1000*1000l); long totalInputFileSize = InputSizeReducerEstimator.getTotalInputFileSize(conf, lds, job, inputByteMax); log.info("Size of input: " + totalInputFileSize +" bytes. Small job threshold: " + inputByteMax ); if (totalInputFileSize < 0 || totalInputFileSize > inputByteMax) { return false; } int reducers = conf.getInt(MRConfiguration.REDUCE_TASKS, 1); log.info("No of reducers: " + reducers); if (reducers > 1) { return false; } return true; }
public static void setOutputFormat(org.apache.hadoop.mapreduce.Job job) { // the OutputFormat we report to Hadoop is always PigOutputFormat which // can be wrapped with LazyOutputFormat provided if it is supported by // the Hadoop version and PigConfiguration.PIG_OUTPUT_LAZY is set if ("true".equalsIgnoreCase(job.getConfiguration().get(PigConfiguration.PIG_OUTPUT_LAZY))) { try { Class<?> clazz = PigContext .resolveClassName("org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat"); Method method = clazz.getMethod("setOutputFormatClass", org.apache.hadoop.mapreduce.Job.class, Class.class); method.invoke(null, job, PigOutputFormat.class); } catch (Exception e) { job.setOutputFormatClass(PigOutputFormat.class); log.warn(PigConfiguration.PIG_OUTPUT_LAZY + " is set but LazyOutputFormat couldn't be loaded. Default PigOutputFormat will be used"); } } else { job.setOutputFormatClass(PigOutputFormat.class); } }
/** * If stop_on_failure is enabled and any job has failed, an ExecException is thrown. * @param stop_on_failure whether it's enabled. * @throws ExecException If stop_on_failure is enabled and any job is failed */ private void checkStopOnFailure(boolean stop_on_failure) throws ExecException{ if (jc.getFailedJobs().isEmpty()) return; if (stop_on_failure){ int errCode = 6017; StringBuilder msg = new StringBuilder(); for (int i=0; i<jc.getFailedJobs().size(); i++) { Job j = jc.getFailedJobs().get(i); msg.append("JobID: " + j.getAssignedJobID() + " Reason: " + j.getMessage()); if (i!=jc.getFailedJobs().size()-1) { msg.append("\n"); } } throw new ExecException(msg.toString(), errCode, PigException.REMOTE_ENVIRONMENT); } }
private void createSuccessFile(Job job, POStore store) throws IOException { if(shouldMarkOutputDir(job)) { Path outputPath = new Path(store.getSFile().getFileName()); String scheme = outputPath.toUri().getScheme(); if (HadoopShims.hasFileSystemImpl(outputPath, job.getJobConf())) { FileSystem fs = outputPath.getFileSystem(job.getJobConf()); if (fs.exists(outputPath)) { // create a file in the folder to mark it Path filePath = new Path(outputPath, SUCCEEDED_FILE_NAME); if (!fs.exists(filePath)) { fs.create(filePath).close(); } } } else { log.warn("No FileSystem for scheme: " + scheme + ". Not creating success file"); } } }
public static Iterator<TaskReport> getTaskReports(Job job, TaskType type) throws IOException { if (job.getJobConf().getBoolean(PigConfiguration.PIG_NO_TASK_REPORT, false)) { LOG.info("TaskReports are disabled for job: " + job.getAssignedJobID()); return null; } Cluster cluster = new Cluster(job.getJobConf()); try { org.apache.hadoop.mapreduce.Job mrJob = cluster.getJob(job.getAssignedJobID()); if (mrJob == null) { // In local mode, mrJob will be null mrJob = job.getJob(); } org.apache.hadoop.mapreduce.TaskReport[] reports = mrJob.getTaskReports(type); return DowngradeHelper.downgradeTaskReports(reports); } catch (InterruptedException ir) { throw new IOException(ir); } }
@Test public void testDefaultParallel() throws Throwable { pc.defaultParallel = 100; String query = "a = load 'input';" + "b = group a by $0;" + "store b into 'output';"; PigServer ps = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); PhysicalPlan pp = Util.buildPp(ps, query); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); ConfigurationValidator.validatePigProperties(pc.getProperties()); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jobControl = jcc.compile(mrPlan, "Test"); Job job = jobControl.getWaitingJobs().get(0); int parallel = job.getJobConf().getNumReduceTasks(); assertTrue(parallel==100); pc.defaultParallel = -1; }
public static JobControl createValueAggregatorJobs(String args[] , Class<? extends ValueAggregatorDescriptor>[] descriptors) throws IOException { JobControl theControl = new JobControl("ValueAggregatorJobs"); ArrayList<Job> dependingJobs = new ArrayList<Job>(); JobConf aJobConf = createValueAggregatorJob(args); if(descriptors != null) setAggregatorDescriptors(aJobConf, descriptors); Job aJob = new Job(aJobConf, dependingJobs); theControl.addJob(aJob); return theControl; }
public static JobControl createValueAggregatorJobs(String args[] , Class<? extends ValueAggregatorDescriptor>[] descriptors) throws IOException { JobControl theControl = new JobControl("ValueAggregatorJobs"); ArrayList<Job> dependingJobs = new ArrayList<Job>(); JobConf aJobConf = createValueAggregatorJob(args, (Class<?>) null); if(descriptors != null) setAggregatorDescriptors(aJobConf, descriptors); Job aJob = new Job(aJobConf, dependingJobs); theControl.addJob(aJob); return theControl; }