public static void run(Configuration conf, Path inputPath, Path output, double params) throws IOException, ClassNotFoundException, InterruptedException { String jobName = "calculating parameter"; conf.set("params",String.valueOf(params)); Job job = new Job(conf, jobName); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(indexToCountWritable.class); job.setOutputKeyClass(twoDimensionIndexWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(CalParamsMapper.class); job.setReducerClass(CalParamsReducer.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job,output); job.setJarByClass(LDADriver.class); if (!job.waitForCompletion(true)) { throw new InterruptedException("calculating parameter failed"); } }
private Job doVerify(Configuration conf, HTableDescriptor htd, String... auths) throws IOException, InterruptedException, ClassNotFoundException { Path outputDir = getTestDir(TEST_NAME, "verify-output"); Job job = new Job(conf); job.setJarByClass(this.getClass()); job.setJobName(TEST_NAME + " Verification for " + htd.getTableName()); setJobScannerConf(job); Scan scan = new Scan(); scan.setAuthorizations(new Authorizations(auths)); TableMapReduceUtil.initTableMapperJob(htd.getTableName().getNameAsString(), scan, VerifyMapper.class, NullWritable.class, NullWritable.class, job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class); int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING); TableMapReduceUtil.setScannerCaching(job, scannerCaching); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputDir); assertTrue(job.waitForCompletion(true)); return job; }
public static void cut(String name, String in) throws Exception { Job job = createJob(name, in); job.setNumReduceTasks(0); boolean success = job.waitForCompletion(true); if (success) { // MapReduce reads an input and writes the result to an new location. // Hence it can not modify data in place, and we have to replace the input // with the output Path output = FileOutputFormat.getOutputPath(job); FileSystem fs = output.getFileSystem(job.getConfiguration()); Path[] inputs = FileInputFormat.getInputPaths(job); for (int i = 0; i < inputs.length; i++) { fs.delete(inputs[i], true); } fs.rename(output, inputs[0]); } }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("xmlinput.start", "<page>"); conf.set("xmlinput.end", "</page>"); Job job =Job.getInstance(conf); job.setJobName("TermFrequencyCount"); job.setJarByClass(TF.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntArrayWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setMapperClass(TFMap.class); job.setReducerClass(TFReduce.class); job.setInputFormatClass(XmlInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean wait = job.waitForCompletion(true); System.exit(wait ? 0 : 1); }
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Path outputDir = new Path(args[1]); String reportSeparatorString = (args.length > 2) ? args[2]: ":"; conf.set("ReportSeparator", reportSeparatorString); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(CellCounter.class); Scan scan = getConfiguredScanForJob(conf, args); TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, outputDir); job.setReducerClass(IntSumReducer.class); return job; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("xmlinput.start", "<page>"); conf.set("xmlinput.end", "</page>"); Job job =Job.getInstance(conf); job.setJobName("PageWordCount"); job.setJarByClass(PageWordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(PageWordCountMap.class); job.setCombinerClass(PageWordCountReduce.class); job.setReducerClass(PageWordCountReduce.class); job.setInputFormatClass(XmlInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }
public static void runJob(Path input, Path output, String vectorClassName,Configuration config) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = config; conf.set("vector.implementation.class.name", vectorClassName); Job job = new Job(conf, "Input Driver running over input: " + input); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(InputMapper.class); job.setNumReduceTasks(0); job.setJarByClass(InputDriver.class); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "maxaverage"); job.setMapperClass(MaximumAverageMapper.class); job.setReducerClass(MaximumAverageReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); if (!job.waitForCompletion(true)) return; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "systemuser"); //job.setMapperClass(SystemUserMapper.class); job.setMapperClass(DailyCount.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); if (!job.waitForCompletion(true)) return; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "maxtemp"); job.setMapperClass(MaxTempMapper.class); job.setReducerClass(MaxTempReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); if (!job.waitForCompletion(true)) return; }
@Override protected void jobSetup(Job job) throws IOException, ImportException { super.jobSetup(job); // we shouldn't have gotten here if bulk load dir is not set // so let's throw a ImportException if(getContext().getDestination() == null){ throw new ImportException("Can't run HBaseBulkImportJob without a " + "valid destination directory."); } TableMapReduceUtil.addDependencyJars(job.getConfiguration(), Preconditions.class); FileOutputFormat.setOutputPath(job, getContext().getDestination()); HTable hTable = new HTable(job.getConfiguration(), options.getHBaseTable()); HFileOutputFormat.configureIncrementalLoad(job, hTable); }
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput, Integer width, Integer wrapMuplitplier) throws Exception { LOG.info("Running RandomInputGenerator with numMappers=" + numMappers + ", numNodes=" + numNodes); Job job = Job.getInstance(getConf()); job.setJobName("Random Input Generator"); job.setNumReduceTasks(0); job.setJarByClass(getClass()); job.setInputFormatClass(GeneratorInputFormat.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(NullWritable.class); setJobConf(job, numMappers, numNodes, width, wrapMuplitplier); job.setMapperClass(Mapper.class); //identity mapper FileOutputFormat.setOutputPath(job, tmpOutput); job.setOutputFormatClass(SequenceFileOutputFormat.class); boolean success = jobCompletion(job); return success ? 0 : 1; }
private Job jobListFriends(String inputPath, String outputPath) throws IOException, InterruptedException, ClassNotFoundException{ Job job = new Job(); job.setJarByClass(WordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(KeyValueTextInputFormat.class); // Need to change the import job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); return job; }
protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception { Path outputDir = getTestDir(TEST_NAME, "verify-output"); LOG.info("Verify output dir: " + outputDir); Job job = Job.getInstance(conf); job.setJarByClass(this.getClass()); job.setJobName(TEST_NAME + " Verification for " + htd.getTableName()); setJobScannerConf(job); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob( htd.getTableName().getNameAsString(), scan, VerifyMapper.class, BytesWritable.class, BytesWritable.class, job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class); int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING); TableMapReduceUtil.setScannerCaching(job, scannerCaching); job.setReducerClass(VerifyReducer.class); job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT)); FileOutputFormat.setOutputPath(job, outputDir); assertTrue(job.waitForCompletion(true)); long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue(); assertEquals(0, numOutputRecords); }
public static void main(String[] args) throws Exception { BasicConfigurator.configure(); Configuration conf = new Configuration(); conf.setQuietMode(true); Job job = Job.getInstance(conf, "WordCount"); job.setJarByClass(HadoopWordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1] + "_" + System.currentTimeMillis())); long t = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("TotalTime=" + (System.currentTimeMillis() - t)); }
public static void main(String[] args) throws Exception { if(args.length != 2){ System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>"); System.exit(-1); } Job job = new Job(); job.setJarByClass(MaxTemperatureWithCombiner.class); job.setJobName("Max Temperature With Combiner"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MaxTemperatureMapper.class); job.setCombinerClass(MaxTemperatureReducer.class); job.setReducerClass(MaxTemperatureReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
/** * Creates a simple copy job. * * @param conf Configuration object * @param outdir Output directory. * @param indirs Comma separated input directories. * @return Job initialized for a data copy job. * @throws Exception If an error occurs creating job configuration. */ public static Job createCopyJob(Configuration conf, Path outdir, Path... indirs) throws Exception { conf.setInt(MRJobConfig.NUM_MAPS, 3); Job theJob = Job.getInstance(conf); theJob.setJobName("DataMoveJob"); FileInputFormat.setInputPaths(theJob, indirs); theJob.setMapperClass(DataCopyMapper.class); FileOutputFormat.setOutputPath(theJob, outdir); theJob.setOutputKeyClass(Text.class); theJob.setOutputValueClass(Text.class); theJob.setReducerClass(DataCopyReducer.class); theJob.setNumReduceTasks(1); return theJob; }
/** * Creates a simple fail job. * * @param conf Configuration object * @param outdir Output directory. * @param indirs Comma separated input directories. * @return Job initialized for a simple fail job. * @throws Exception If an error occurs creating job configuration. */ public static Job createFailJob(Configuration conf, Path outdir, Path... indirs) throws Exception { FileSystem fs = outdir.getFileSystem(conf); if (fs.exists(outdir)) { fs.delete(outdir, true); } conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2); Job theJob = Job.getInstance(conf); theJob.setJobName("Fail-Job"); FileInputFormat.setInputPaths(theJob, indirs); theJob.setMapperClass(FailMapper.class); theJob.setReducerClass(Reducer.class); theJob.setNumReduceTasks(0); FileOutputFormat.setOutputPath(theJob, outdir); theJob.setOutputKeyClass(Text.class); theJob.setOutputValueClass(Text.class); return theJob; }
protected Job doLoad(Configuration conf, HTableDescriptor htd) throws Exception { Path outputDir = getTestDir(TEST_NAME, "load-output"); LOG.info("Load output dir: " + outputDir); NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT)); conf.set(TABLE_NAME_KEY, htd.getTableName().getNameAsString()); Job job = Job.getInstance(conf); job.setJobName(TEST_NAME + " Load for " + htd.getTableName()); job.setJarByClass(this.getClass()); setMapperClass(job); job.setInputFormatClass(NMapInputFormat.class); job.setNumReduceTasks(0); setJobScannerConf(job); FileOutputFormat.setOutputPath(job, outputDir); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class); TableMapReduceUtil.initCredentials(job); assertTrue(job.waitForCompletion(true)); return job; }
public static Job createJob(Configuration conf, Path inDir, Path outDir, int numInputFiles, int numReds, String input) throws IOException { Job job = Job.getInstance(conf); FileSystem fs = FileSystem.get(conf); if (fs.exists(outDir)) { fs.delete(outDir, true); } if (fs.exists(inDir)) { fs.delete(inDir, true); } fs.mkdirs(inDir); for (int i = 0; i < numInputFiles; ++i) { DataOutputStream file = fs.create(new Path(inDir, "part-" + i)); file.writeBytes(input); file.close(); } FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); job.setNumReduceTasks(numReds); return job; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.setFloat("beta", Float.parseFloat(args[3])); Job job = Job.getInstance(conf); job.setJarByClass(UnitSum.class); ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf); ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf); job.setReducerClass(SumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
private static void runTestLazyOutput(Configuration conf, Path output, int numReducers, boolean createLazily) throws Exception { Job job = Job.getInstance(conf, "Test-Lazy-Output"); FileInputFormat.setInputPaths(job, INPUT); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(TestMapReduceLazyOutput.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(numReducers); job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); if (createLazily) { LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); } else { job.setOutputFormatClass(TextOutputFormat.class); } assertTrue(job.waitForCompletion(true)); }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } Job job = new Job(getConf(), "Text to Parquet"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(TextToParquetMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AvroParquetOutputFormat.class); AvroParquetOutputFormat.setSchema(job, SCHEMA); job.setOutputKeyClass(Void.class); job.setOutputValueClass(Group.class); return job.waitForCompletion(true) ? 0 : 1; }
public void testEmptyJoin() throws Exception { Configuration conf = new Configuration(); Path base = cluster.getFileSystem().makeQualified(new Path("/empty")); Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") }; conf.set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose("outer", MapReduceTestUtil.Fake_IF.class, src)); MapReduceTestUtil.Fake_IF.setKeyClass(conf, MapReduceTestUtil.IncomparableKey.class); Job job = Job.getInstance(conf); job.setInputFormatClass(CompositeInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(base, "out")); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); job.setOutputKeyClass(MapReduceTestUtil.IncomparableKey.class); job.setOutputValueClass(NullWritable.class); job.waitForCompletion(true); assertTrue(job.isSuccessful()); base.getFileSystem(conf).delete(base, true); }
/** * Run a test with a misconfigured number of mappers. * Expect failure. */ @Test public void testInvalidMultiMapParallelism() throws Exception { Job job = Job.getInstance(); Path inputPath = createMultiMapsInput(); Path outputPath = getOutputPath(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } job.setMapperClass(StressMapper.class); job.setReducerClass(CountingReducer.class); job.setNumReduceTasks(1); LocalJobRunner.setLocalMaxRunningMaps(job, -6); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); boolean success = job.waitForCompletion(true); assertFalse("Job succeeded somehow", success); }
/** Test case for zero mappers */ @Test public void testEmptyMaps() throws Exception { Job job = Job.getInstance(); Path outputPath = getOutputPath(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } job.setInputFormatClass(EmptyInputFormat.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, outputPath); boolean success = job.waitForCompletion(true); assertTrue("Empty job should work", success); }
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } Job job = createJob(getConf()); FileOutputFormat.setOutputPath(job, new Path(args[0])); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return ret; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.setFloat("beta", Float.parseFloat(args[3])); Job job = Job.getInstance(conf); job.setJarByClass(UnitMultiplication.class); ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf); ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf); job.setReducerClass(MultiplicationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordstddev <in> <out>"); return 0; } Configuration conf = getConf(); Job job = Job.getInstance(conf, "word stddev"); job.setJarByClass(WordStandardDeviation.class); job.setMapperClass(WordStandardDeviationMapper.class); job.setCombinerClass(WordStandardDeviationReducer.class); job.setReducerClass(WordStandardDeviationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); // read output and calculate standard deviation stddev = readAndCalcStdDev(outputpath, conf); return (result ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2); } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmean <in> <out>"); return 0; } Configuration conf = getConf(); Job job = Job.getInstance(conf, "word mean"); job.setJarByClass(WordMean.class); job.setMapperClass(WordMeanMapper.class); job.setCombinerClass(WordMeanReducer.class); job.setReducerClass(WordMeanReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); mean = readAndCalcMean(outputpath, conf); return (result ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(UnitMultiplication.class); ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf); ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf); job.setReducerClass(MultiplicationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage(); return 2; } TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSum"); job.setJarByClass(TeraChecksum.class); job.setMapperClass(ChecksumMapper.class); job.setReducerClass(ChecksumReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Unsigned16.class); // force a single reducer job.setNumReduceTasks(1); job.setInputFormatClass(TeraInputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage(); return 1; } TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1); // force a single split FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE); job.setInputFormatClass(TeraInputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
/** * Configure the {@link Job} for enabling compression emulation. */ static void configure(final Job job) throws IOException, InterruptedException, ClassNotFoundException { // set the random text mapper job.setMapperClass(RandomTextDataMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(GenDataFormat.class); job.setJarByClass(GenerateData.class); // set the output compression true FileOutputFormat.setCompressOutput(job, true); try { FileInputFormat.addInputPath(job, new Path("ignored")); } catch (IOException e) { LOG.error("Error while adding input path ", e); } }
public Job call() throws IOException, InterruptedException, ClassNotFoundException { ugi.doAs( new PrivilegedExceptionAction<Job>() { public Job run() throws IOException, ClassNotFoundException, InterruptedException { job.setMapperClass(LoadMapper.class); job.setReducerClass(LoadReducer.class); job.setNumReduceTasks(jobdesc.getNumberReduces()); job.setMapOutputKeyClass(GridmixKey.class); job.setMapOutputValueClass(GridmixRecord.class); job.setSortComparatorClass(LoadSortComparator.class); job.setGroupingComparatorClass(SpecGroupingComparator.class); job.setInputFormatClass(LoadInputFormat.class); job.setOutputFormatClass(RawBytesOutputFormat.class); job.setPartitionerClass(DraftPartitioner.class); job.setJarByClass(LoadJob.class); job.getConfiguration().setBoolean(Job.USED_GENERIC_PARSER, true); FileOutputFormat.setOutputPath(job, outdir); job.submit(); return job; } }); return job; }
/** * Runs a GridMix data-generation job. */ private static void runDataGenJob(Configuration conf, Path tempDir) throws IOException, ClassNotFoundException, InterruptedException { JobClient client = new JobClient(conf); // get the local job runner conf.setInt(MRJobConfig.NUM_MAPS, 1); Job job = Job.getInstance(conf); CompressionEmulationUtil.configure(job); job.setInputFormatClass(CustomInputFormat.class); // set the output path FileOutputFormat.setOutputPath(job, tempDir); // submit and wait for completion job.submit(); int ret = job.waitForCompletion(true) ? 0 : 1; assertEquals("Job Failed", 0, ret); }
public static void main(String[] args) throws Exception { Configuration con = new Configuration(); Job bookJob = Job.getInstance(con, "Average Page Count"); bookJob.setJarByClass(AveragePageCount.class); bookJob.setMapperClass(TextMapper.class); bookJob.setReducerClass(AverageReduce.class); bookJob.setOutputKeyClass(Text.class); bookJob.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(bookJob, new Path("C:/Hadoop/books.txt")); FileOutputFormat.setOutputPath(bookJob, new Path("C:/Hadoop/BookOutput")); if (bookJob.waitForCompletion(true)) { System.exit(0); } }
/** * Tests a MR scan using specific start and stop rows. * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ protected void testScan(String start, String stop, String last) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") + "To" + (stop != null ? stop.toUpperCase() : "Empty"); LOG.info("Before map/reduce startup - job " + jobName); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILY); if (start != null) { scan.setStartRow(Bytes.toBytes(start)); } c.set(KEY_STARTROW, start != null ? start : ""); if (stop != null) { scan.setStopRow(Bytes.toBytes(stop)); } c.set(KEY_LASTROW, last != null ? last : ""); LOG.info("scan before: " + scan); Job job = new Job(c, jobName); TableMapReduceUtil.initTableMapperJob( Bytes.toString(TABLE_NAME), scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); job.setReducerClass(ScanReducer.class); job.setNumReduceTasks(1); // one to get final "first" and "last" key FileOutputFormat.setOutputPath(job, new Path(TEST_UTIL.getDataTestDir(), job.getJobName())); LOG.info("Started " + job.getJobName()); assertTrue(job.waitForCompletion(true)); LOG.info("After map/reduce completion - job " + jobName); }