public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "systemuser"); //job.setMapperClass(SystemUserMapper.class); job.setMapperClass(DailyCount.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); if (!job.waitForCompletion(true)) return; }
public void testNoChain() throws Exception { Path inDir = new Path(localPathRoot, "testing/chain/input"); Path outDir = new Path(localPathRoot, "testing/chain/output"); String input = "a\nb\na\n"; String expectedOutput = "a\t2\nb\t1\n"; Configuration conf = createJobConf(); Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1, input); job.setJobName("chain"); ChainMapper.addMapper(job, TokenCounterMapper.class, Object.class, Text.class, Text.class, IntWritable.class, null); ChainReducer.setReducer(job, IntSumReducer.class, Text.class, IntWritable.class, Text.class, IntWritable.class, null); job.waitForCompletion(true); assertTrue("Job failed", job.isSuccessful()); assertEquals("Outputs doesn't match", expectedOutput, MapReduceTestUtil .readOutput(outDir, conf)); }
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Path outputDir = new Path(args[1]); String reportSeparatorString = (args.length > 2) ? args[2]: ":"; conf.set("ReportSeparator", reportSeparatorString); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(CellCounter.class); Scan scan = getConfiguredScanForJob(conf, args); TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, outputDir); job.setReducerClass(IntSumReducer.class); return job; }
@Test public void testNoChain() throws Exception { Path inDir = new Path(localPathRoot, "testing/chain/input"); Path outDir = new Path(localPathRoot, "testing/chain/output"); String input = "a\nb\na\n"; String expectedOutput = "a\t2\nb\t1\n"; Configuration conf = createJobConf(); Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1, input); job.setJobName("chain"); ChainMapper.addMapper(job, TokenCounterMapper.class, Object.class, Text.class, Text.class, IntWritable.class, null); ChainReducer.setReducer(job, IntSumReducer.class, Text.class, IntWritable.class, Text.class, IntWritable.class, null); job.waitForCompletion(true); assertTrue("Job failed", job.isSuccessful()); assertEquals("Outputs doesn't match", expectedOutput, MapReduceTestUtil .readOutput(outDir, conf)); }
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf); job.setJobName("test TextWithPath Input"); job.setJarByClass(DemoRun.class); TWPInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(TWPInputFormat.class); job.setMapperClass(TestMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(IntSumReducer.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.submit(); job.waitForCompletion(true); return 0; }
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub Configuration conf = getConf(); Job job = new Job(conf); job.setJobName("CombineFile Demo"); job.setJarByClass(TestMain.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(CFInputFormat.class); job.setMapperClass(TestMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(IntSumReducer.class); job.setNumReduceTasks(13); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.submit(); job.waitForCompletion(true); return 0; }
public int run(String[] args) throws Exception { if(args.length < 2) { printUsage(); return 2; } Job job = Job.getInstance(getConf()); job.setJobName("MultiFileWordCount"); job.setJarByClass(MultiFileWordCount.class); //set the InputFormat of the job to our InputFormat job.setInputFormatClass(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); //use the defined mapper job.setMapperClass(MapClass.class); //use the WordCount Reducer job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
public int run(String[] args) throws Exception { GfxdDataSerializable.initTypes(); Configuration conf = getConf(); Path outputPath = new Path(args[0]); String hdfsHomeDir = args[1]; String tableName = args[2]; outputPath.getFileSystem(conf).delete(outputPath, true); conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir); conf.set(RowInputFormat.INPUT_TABLE, tableName); conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false); Job job = Job.getInstance(conf, "Busy Airport Count"); job.setInputFormatClass(RowInputFormat.class); // configure mapper and reducer job.setMapperClass(SampleMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); // configure output TextOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
public int run(String[] args) throws Exception { GfxdDataSerializable.initTypes(); Configuration conf = getConf(); Path outputPath = new Path(args[0]); String hdfsHomeDir = args[1]; String tableName = args[2]; outputPath.getFileSystem(conf).delete(outputPath, true); conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir); conf.set(RowInputFormat.INPUT_TABLE, tableName); conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false); Job job = Job.getInstance(conf, "Busy Leg Count"); job.setInputFormatClass(RowInputFormat.class); // configure mapper and reducer job.setMapperClass(SampleMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); // configure output TextOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "word count1"); job.setJarByClass(WordCountPredefined.class); job.setMapperClass(TokenCounterMapper.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public int run(String[] args) throws Exception { if(args.length < 2) { printUsage(); return 2; } Job job = new Job(getConf()); job.setJobName("MultiFileWordCount"); job.setJarByClass(MultiFileWordCount.class); //set the InputFormat of the job to our InputFormat job.setInputFormatClass(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); //use the defined mapper job.setMapperClass(MapClass.class); //use the WordCount Reducer job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
private boolean runJob(String inDir, String outDir, boolean compressOutput) throws Exception { Configuration conf = getConf(); conf.setBoolean("mapred.output.compress", compressOutput); conf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); Job job = Job.getInstance(conf); job.setJarByClass(HadoopMain.class); FileInputFormat.addInputPath(job, new Path(inDir)); FileOutputFormat.setOutputPath(job, new Path(outDir)); job.setMapperClass(HadoopMap.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(MultiOutputIntSumReducer.class); // Turn off the default output ("part-..."), we don't need it LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); MultipleOutputs.addNamedOutput(job, "W", TextOutputFormat.class, Text.class, IntWritable.class); MultipleOutputs.addNamedOutput(job, "CoocF", TextOutputFormat.class, Text.class, IntWritable.class); MultipleOutputs.addNamedOutput(job, "CoocWF", TextOutputFormat.class, Text.class, IntWritable.class); MultipleOutputs.addNamedOutput(job, "F", TextOutputFormat.class, Text.class, IntWritable.class); MultipleOutputs.addNamedOutput(job, "WF", TextOutputFormat.class, Text.class, IntWritable.class); String[] mwePaths = conf.getStrings("holing.mwe.vocabulary", ""); String mwePath = ""; if (mwePaths != null && mwePaths.length > 0 && mwePaths[0] != null) mwePath = mwePaths[0]; if (!mwePath.equals("")) job.addCacheFile(new URI(mwePath + "#mwe_voc")); job.setJobName("lefex: Feature Extraction"); return job.waitForCompletion(true); }