public int run(Path outputDir, int numReducers) throws Exception { LOG.info("Running Verify with outputDir=" + outputDir +", numReducers=" + numReducers); job = Job.getInstance(getConf()); job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); setJobScannerConf(job); Scan scan = new Scan(); scan.addColumn(FAMILY_NAME, COLUMN_PREV); scan.setCaching(10000); scan.setCacheBlocks(false); if (isMultiUnevenColumnFamilies(getConf())) { scan.addColumn(BIG_FAMILY_NAME, BIG_FAMILY_NAME); scan.addColumn(TINY_FAMILY_NAME, TINY_FAMILY_NAME); } TableMapReduceUtil.initTableMapperJob(getTableName(getConf()).getName(), scan, VerifyMapper.class, BytesWritable.class, BytesWritable.class, job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(SequenceFileAsBinaryOutputFormat.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); TextOutputFormat.setOutputPath(job, outputDir); boolean success = job.waitForCompletion(true); if (success) { Counters counters = job.getCounters(); if (null == counters) { LOG.warn("Counters were null, cannot verify Job completion"); // We don't have access to the counters to know if we have "bad" counts return 0; } // If we find no unexpected values, the job didn't outright fail if (verifyUnexpectedValues(counters)) { // We didn't check referenced+unreferenced counts, leave that to visual inspection return 0; } } // We failed return 1; }
public int run(Path outputDir, int numReducers) throws Exception { LOG.info("Running Verify with outputDir=" + outputDir +", numReducers=" + numReducers); job = Job.getInstance(getConf()); job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); setJobScannerConf(job); Scan scan = new Scan(); scan.addColumn(FAMILY_NAME, COLUMN_PREV); scan.setCaching(10000); scan.setCacheBlocks(false); if (isMultiUnevenColumnFamilies(getConf())) { scan.addColumn(BIG_FAMILY_NAME, BIG_FAMILY_NAME); scan.addColumn(TINY_FAMILY_NAME, TINY_FAMILY_NAME); } TableMapReduceUtil.initTableMapperJob(getTableName(getConf()).getName(), scan, VerifyMapper.class, BytesWritable.class, BytesWritable.class, job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(SequenceFileAsBinaryOutputFormat.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); TextOutputFormat.setOutputPath(job, outputDir); boolean success = job.waitForCompletion(true); if (success) { Counters counters = job.getCounters(); if (null == counters) { LOG.warn("Counters were null, cannot verify Job completion." + " This is commonly a result of insufficient YARN configuration."); // We don't have access to the counters to know if we have "bad" counts return 0; } // If we find no unexpected values, the job didn't outright fail if (verifyUnexpectedValues(counters)) { // We didn't check referenced+unreferenced counts, leave that to visual inspection return 0; } } // We failed return 1; }
/** * Java wrapper for * {@link SequenceFileAsBinaryOutputFormat#getOutputCompressorClass(org.apache.hadoop.mapreduce.JobContext, Class)}. * * @param ctx the JavaScript context * @param thisObj the 'this' object * @param args the function arguments * @param func the function being called * * @return the output compressor class */ @JSStaticFunction public static Object getOutputCompressorClass(final Context ctx, final Scriptable thisObj, final Object[] args, final Function func) { return FileOutputFormatHelper.getOutputCompressorClass(SequenceFileAsBinaryOutputFormat.class, ctx, thisObj, args); }
/** * Java wrapper for * {@link SequenceFileAsBinaryOutputFormat#getOutputCompressionType(org.apache.hadoop.mapreduce.JobContext)}. * * @param ctx the JavaScript context * @param thisObj the 'this' object * @param args the function arguments * @param func the function being called * * @return the output compression type */ @JSStaticFunction public static Object getOutputCompressionType(final Context ctx, final Scriptable thisObj, final Object[] args, final Function func) { return SequenceFileOutputFormatHelper.getOutputCompressionType(SequenceFileAsBinaryOutputFormat.class, ctx, thisObj, args); }
/** * Java wrapper for {@link SequenceFileAsBinaryOutputFormat * #setOutputCompressionType(org.apache.hadoop.mapreduce.Job, * org.apache.hadoop.io.SequenceFile.CompressionType)}. * * @param ctx the JavaScript context * @param thisObj the 'this' object * @param args the function arguments * @param func the function being called */ @JSStaticFunction public static void setOutputCompressionType(final Context ctx, final Scriptable thisObj, final Object[] args, final Function func) { SequenceFileOutputFormatHelper.setOutputCompressionType(SequenceFileAsBinaryOutputFormat.class, ctx, thisObj, args); }
/** * Java wrapper for * {@link SequenceFileAsBinaryOutputFormat#getCompressOutput(org.apache.hadoop.mapreduce.JobContext)}. * * @param ctx the JavaScript context * @param thisObj the 'this' object * @param args the function arguments * @param func the function being called * * @return whether or not the output is compressed */ @JSStaticFunction public static Object getCompressOutput(final Context ctx, final Scriptable thisObj, final Object[] args, final Function func) { return FileOutputFormatHelper.getCompressOutput(SequenceFileAsBinaryOutputFormat.class, ctx, thisObj, args); }
/** * Java wrapper for {@link SequenceFileAsBinaryOutputFormat#getOutputPath(org.apache.hadoop.mapreduce.JobContext)}. * * @param ctx the JavaScript context * @param thisObj the 'this' object * @param args the function arguments * @param func the function being called * * @return the output path */ @JSStaticFunction public static Object getOutputPath(final Context ctx, final Scriptable thisObj, final Object[] args, final Function func) { return FileOutputFormatHelper.getOutputPath(SequenceFileAsBinaryOutputFormat.class, ctx, thisObj, args); }
/** * Java wrapper for * {@link SequenceFileAsBinaryOutputFormat * #getPathForWorkFile(org.apache.hadoop.mapreduce.TaskInputOutputContext, String, String)}. * * @param ctx the JavaScript context * @param thisObj the 'this' object * @param args the function arguments * @param func the function being called * * @return the path for the work file */ @JSStaticFunction public static Object getPathForWorkFile(final Context ctx, final Scriptable thisObj, final Object[] args, final Function func) { return FileOutputFormatHelper.getPathForWorkFile(SequenceFileAsBinaryOutputFormat.class, ctx, thisObj, args); }
/** * Java wrapper for * {@link SequenceFileAsBinaryOutputFormat#getWorkOutputPath(org.apache.hadoop.mapreduce.TaskInputOutputContext)}. * * @param ctx the JavaScript context * @param thisObj the 'this' object * @param args the function arguments * @param func the function being called * * @return the path for the work output */ @JSStaticFunction public static Object getWorkOutputPath(final Context ctx, final Scriptable thisObj, final Object[] args, final Function func) { return FileOutputFormatHelper.getWorkOutputPath(SequenceFileAsBinaryOutputFormat.class, ctx, thisObj, args); }
/** * Java wrapper for * {@link SequenceFileAsBinaryOutputFormat#setCompressOutput(org.apache.hadoop.mapreduce.Job, boolean)}. * * @param ctx the JavaScript context * @param thisObj the 'this' object * @param args the function arguments * @param func the function being called */ @JSStaticFunction public static void setCompressOutput(final Context ctx, final Scriptable thisObj, final Object[] args, final Function func) { FileOutputFormatHelper.setCompressOutput(SequenceFileAsBinaryOutputFormat.class, ctx, thisObj, args); }
/** * Java wrapper for * {@link SequenceFileAsBinaryOutputFormat#setOutputCompressorClass(org.apache.hadoop.mapreduce.Job, Class)}. * * @param ctx the JavaScript context * @param thisObj the 'this' object * @param args the function arguments * @param func the function being called */ @JSStaticFunction public static void setOutputCompressorClass(final Context ctx, final Scriptable thisObj, final Object[] args, final Function func) { FileOutputFormatHelper.setOutputCompressorClass(SequenceFileAsBinaryOutputFormat.class, ctx, thisObj, args); }
/** * Java wrapper for * {@link SequenceFileAsBinaryOutputFormat#setOutputPath(org.apache.hadoop.mapreduce.Job, * org.apache.hadoop.fs.Path)}. * * @param ctx the JavaScript context * @param thisObj the 'this' object * @param args the function arguments * @param func the function being called */ @JSStaticFunction public static void setOutputPath(final Context ctx, final Scriptable thisObj, final Object[] args, final Function func) { FileOutputFormatHelper.setOutputPath(SequenceFileAsBinaryOutputFormat.class, ctx, thisObj, args); }