private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); Path inputDir = writeInputFile(conf); conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = Job.getInstance(conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs")); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Path inputDir = writeInputFile(this.conf); this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = new Job(this.conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs")); job.waitForCompletion(true); }
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); Path inputDir = writeInputFile(conf); conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = new Job(conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs")); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = super.getConf(); // TODO: Add to the configuration the postcode index in customer csv file // TODO: Add to the configuration the referential file name Job job = Job.getInstance(conf, JOB_NAME); // TODO: Add the cache file URI to the job job.setJarByClass(Main.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(CsvFieldCountMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = super.getConf(); conf.setInt(CsvFieldCountMapper.CSV_FIELD_IDX, 2); conf.set(CsvFieldCountMapper.FILTER_CACHE_FILE_NAME, "fr_urban_postcodes.txt"); Job job = Job.getInstance(conf, JOB_NAME); job.addCacheFile(new Path(args[0]).toUri()); job.setJarByClass(Main.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(CsvFieldCountMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCountImproved.class); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.addInputPath(job, new Path(args[0])); TextOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); Job job = Job.getInstance(conf, "Extract server type"); job.setJarByClass(ServerType.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(ServerTypeExtracter.class); job.setReducerClass(LongSumReducer.class); job.setInputFormatClass(WarcInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); // Execute job and return status return job.waitForCompletion(true) ? 0 : 1; }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = new Job(conf, "invwordcount"); job.setJarByClass(InverseWCJob.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(InverseWordCountMapper.class); job.setReducerClass(LongSumReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }
/** * Builds and runs the Hadoop job. * @return 0 if the Hadoop job completes successfully and 1 otherwise. */ @Override public int run(String[] arg0) throws Exception { Configuration conf = getConf(); // Job job = new Job(conf); job.setJarByClass(WATServerType.class); job.setNumReduceTasks(1); String inputPath = "data/*.warc.wat.gz"; //inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz"; //inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz"; LOG.info("Input path: " + inputPath); FileInputFormat.addInputPath(job, new Path(inputPath)); String outputPath = "/tmp/cc/"; FileSystem fs = FileSystem.newInstance(conf); if (fs.exists(new Path(outputPath))) { fs.delete(new Path(outputPath), true); } FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(WARCFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(ServerTypeMap.ServerMapper.class); job.setReducerClass(LongSumReducer.class); if (job.waitForCompletion(true)) { return 0; } else { return 1; } }
/** * Builds and runs the Hadoop job. * @return 0 if the Hadoop job completes successfully and 1 otherwise. */ @Override public int run(String[] arg0) throws Exception { Configuration conf = getConf(); // Job job = new Job(conf); job.setJarByClass(WETWordCount.class); job.setNumReduceTasks(1); String inputPath = "data/*.warc.wet.gz"; //inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz"; //inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz"; LOG.info("Input path: " + inputPath); FileInputFormat.addInputPath(job, new Path(inputPath)); String outputPath = "/tmp/cc/"; FileSystem fs = FileSystem.newInstance(conf); if (fs.exists(new Path(outputPath))) { fs.delete(new Path(outputPath), true); } FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(WARCFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(org.commoncrawl.examples.mapreduce.WordCounterMap.WordCountMapper.class); // The reducer is quite useful in the word frequency task job.setReducerClass(LongSumReducer.class); if (job.waitForCompletion(true)) { return 0; } else { return 1; } }
/** * Builds and runs the Hadoop job. * @return 0 if the Hadoop job completes successfully and 1 otherwise. */ @Override public int run(String[] arg0) throws Exception { Configuration conf = getConf(); // Job job = new Job(conf); job.setJarByClass(WARCTagCounter.class); job.setNumReduceTasks(1); String inputPath = "data/*.warc.gz"; //inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz"; //inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz"; LOG.info("Input path: " + inputPath); FileInputFormat.addInputPath(job, new Path(inputPath)); String outputPath = "/tmp/cc/"; FileSystem fs = FileSystem.newInstance(conf); if (fs.exists(new Path(outputPath))) { fs.delete(new Path(outputPath), true); } FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(WARCFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(TagCounterMap.TagCounterMapper.class); job.setReducerClass(LongSumReducer.class); return job.waitForCompletion(true) ? 0 : -1; }
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Path inputDir = writeInputFile(this.conf); this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = new Job(this.conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs")); TableMapReduceUtil.addDependencyJars(job); // Add a Class from the hbase.jar so it gets registered too. TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); Path inputDir = writeInputFile(conf, opts); conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = new Job(conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.setInputPaths(job, inputDir); // this is default, but be explicit about it just in case. NLineInputFormat.setNumLinesPerSplit(job, 1); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs")); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), DescriptiveStatistics.class, // commons-math ObjectMapper.class); // jackson-mapper-asl TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
@Before public void before() throws URISyntaxException { CsvFieldCountMapper mapper = new CsvFieldCountMapper(); LongSumReducer<Text> combiner = new LongSumReducer<Text>(); LongSumReducer<Text> reducer = new LongSumReducer<Text>(); mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer, combiner); Configuration conf = mapReduceDriver.getConfiguration(); conf.setInt(CsvFieldCountMapper.CSV_FIELD_IDX, 2); conf.set(CsvFieldCountMapper.FILTER_CACHE_FILE_NAME, "fr_urban_postcodes.txt"); mapReduceDriver.addCacheFile(new File("target/test-classes/referential/fr_urban_postcodes.txt").toURI()); }
@Before public void before() throws URISyntaxException { CsvFieldCountMapper mapper = new CsvFieldCountMapper(); LongSumReducer<Text> combiner = new LongSumReducer<Text>(); LongSumReducer<Text> reducer = new LongSumReducer<Text>(); mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer, combiner); Configuration conf = mapReduceDriver.getConfiguration(); conf.setInt(CsvFieldCountMapper.CSV_FIELD_IDX, 2); }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.out.printf("Usage: " + this.getClass().getName() + " <input dir> <output dir>\n"); return -1; } Job job = new Job(getConf()); job.setJarByClass(StringPairTestDriver.class); job.setJobName("Custom Writable Comparable"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); /* * LongSumReducer is a Hadoop API class that sums values into * A LongWritable. It works with any key and value type, therefore * supports the new StringPairWritable as a key type. */ job.setReducerClass(LongSumReducer.class); job.setMapperClass(StringPairMapper.class); /* * Set the key output class for the job */ job.setOutputKeyClass(StringPairWritable.class); /* * Set the value output class for the job */ job.setOutputValueClass(LongWritable.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.out.printf("Usage: " + this.getClass().getName() + " <input dir> <output dir>\n"); return -1; } Job job = new Job(getConf()); job.setJarByClass(StringPairTestDriver.class); job.setJobName("Custom Writable Comparable"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); /* * LongSumReducer is a Hadoop API class that sums values into * A LongWritable. It works with any key and value type, therefore * supports the new StringPairWritable as a key type. */ job.setReducerClass(LongSumReducer.class); job.setMapperClass(StringPairMapper.class); /* * Set the key output class for the job */ /* TODO: implement */ /* * Set the value output class for the job */ job.setOutputValueClass(LongWritable.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); Path inputDir = writeInputFile(conf, opts); conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = new Job(conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.setInputPaths(job, inputDir); // this is default, but be explicit about it just in case. NLineInputFormat.setNumLinesPerSplit(job, 1); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs")); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), Histogram.class, // yammer metrics ObjectMapper.class); // jackson-mapper-asl TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); Path inputDir = writeInputFile(conf); conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = new Job(conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs")); TableMapReduceUtil.addDependencyJars(job); // Add a Class from the hbase.jar so it gets registered too. TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
/** * Builds and runs the Hadoop job. * @return 0 if the Hadoop job completes successfully and 1 otherwise. */ @Override public int run(String[] arg0) throws Exception { Configuration conf = getConf(); // Job job = new Job(conf); job.setJarByClass(WETWordCount.class); job.setNumReduceTasks(1); String inputPath = "data/*.warc.wet.gz"; //inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz"; //inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz"; LOG.info("Input path: " + inputPath); FileInputFormat.addInputPath(job, new Path(inputPath)); String outputPath = "/tmp/cc/"; FileSystem fs = FileSystem.newInstance(conf); if (fs.exists(new Path(outputPath))) { fs.delete(new Path(outputPath), true); } FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(WARCFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(WordCounterMap.WordCountMapper.class); // The reducer is quite useful in the word frequency task job.setReducerClass(LongSumReducer.class); if (job.waitForCompletion(true)) { return 0; } else { return 1; } }
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args) .getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); return 2; } conf.set("nl.basjes.parse.apachehttpdlogline.format", logFormat); // A ',' separated list of fields conf.set("nl.basjes.parse.apachehttpdlogline.fields", "STRING:request.status.last"); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(Wordcount.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); job.setInputFormatClass(ApacheHttpdLogfileInputFormat.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); // configuration should contain reference to your namenode FileSystem fs = FileSystem.get(conf); // true stands for recursively deleting the folder you gave Path outputPath = new Path(otherArgs[1]); fs.delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); if (job.waitForCompletion(true)) { return 0; } return 1; }