/** * Sets up a job conf for the given job using the given config object. Ensures * that the correct input format is set, the mapper and and reducer class and * the input and output keys and value classes along with any other job * configuration. * * @param config * @return JobConf representing the job to be ran * @throws IOException */ private JobConf getJob(ConfigExtractor config) throws IOException { JobConf job = new JobConf(config.getConfig(), SliveTest.class); job.setInputFormat(DummyInputFormat.class); FileOutputFormat.setOutputPath(job, config.getOutputPath()); job.setMapperClass(SliveMapper.class); job.setPartitionerClass(SlivePartitioner.class); job.setReducerClass(SliveReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setCompressOutput(job, false); job.setNumReduceTasks(config.getReducerAmount()); job.setNumMapTasks(config.getMapAmount()); return job; }
/** * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job) */ public static void addDependencyJars(JobConf job) throws IOException { org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job); org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars( job, // when making changes here, consider also mapreduce.TableMapReduceUtil // pull job classes job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getPartitionerClass(), job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class), job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class), job.getCombinerClass()); }
private static JobConf getOldAPIJobconf(Configuration configuration, String name, String input, String output) throws Exception { final JobConf jobConf = new JobConf(configuration); final FileSystem fs = FileSystem.get(configuration); if (fs.exists(new Path(output))) { fs.delete(new Path(output), true); } fs.close(); jobConf.setJobName(name); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(IntWritable.class); jobConf.setMapperClass(WordCountWithOldAPI.TokenizerMapperWithOldAPI.class); jobConf.setCombinerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class); jobConf.setReducerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(jobConf, new Path(input)); FileOutputFormat.setOutputPath(jobConf, new Path(output)); return jobConf; }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WeatherData.class); conf.setJobName("temp"); // Note:- As Mapper's output types are not default so we have to define // the // following properties. conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setMapperClass(MaxTemperatureMapper.class); conf.setReducerClass(MaxTemperatureReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
@SuppressWarnings("rawtypes") @Override public void configure() { super.configure(); outputFormat = new TextOutputFormat(); Class<? extends CompressionCodec> codecClass = null; if (CompressEnum.NONE.name().equalsIgnoreCase(compress)) { codecClass = null; } else if (CompressEnum.GZIP.name().equalsIgnoreCase(compress)) { codecClass = org.apache.hadoop.io.compress.GzipCodec.class; } else if (CompressEnum.BZIP2.name().equalsIgnoreCase(compress)) { codecClass = org.apache.hadoop.io.compress.BZip2Codec.class; } else { throw new IllegalArgumentException("Unsupported compress format: " + compress); } if (codecClass != null) { this.outputFormat.setOutputCompressorClass(jobConf, codecClass); } }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCountOldAPI.class); conf.setJobName("old wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
public void run(String[] args) throws Exception { JobConf conf = new JobConf(this.getClass()); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
public static void main(String[] args) throws IOException { if (args.length != 1) { System.err.println("Usage: mapred1 outputpath"); System.exit(0); } JobConf conf = new JobConf(mapred1.class); conf.setJobName("mapred1"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.set("es.nodes", "10.149.3.3:9200"); conf.setInputFormat(EsInputFormat.class); conf.set("es.resource", "kb/doc"); conf.set("es.query", "{\"query\":{\"query_string\":{\"fields\":[\"article_dc_title\"],\"query\":\"IN HET ZUIDEN\"}}}"); conf.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(conf, new Path(args[0])); JobClient.runJob(conf); }
public static void fillInWordCountMRJobConf(JobConf conf) { String input = "select n_comment from tpch.nation"; conf.setJobName("samplejob-wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); com.cloudera.recordservice.mr.RecordServiceConfig.setInputQuery(conf, input); setRandomOutputDir(conf); }
public static void IDMappingJob(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJarByClass(HybridDriver.class); job.setJobName("Converting binary similarity scores to text"); job.setMapperClass(IDMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); Path inputPath = new Path(OUTPUT_DIR); job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPath); Path outputPath = new Path("SimilarityScores"); job.setOutputFormat(TextOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); FileSystem.get(job).delete(outputPath, true); HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure JobSubmitter.run(job,"BINARY TO TEXT",job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); }
public static void main(String[] args) throws IOException { JobConf job = new JobConf(DuplicateGraph.class); job.setJobName(DuplicateGraph.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(0); JobClient.runJob(job); }
public void runParsing(String inputPath, String outputPath) throws IOException { JobConf conf = new JobConf(Hits.class); // Delete folders FileSystem.get(conf).delete(new Path("wiki"), true); // Input / Mapper FileInputFormat.setInputPaths(conf, new Path(inputPath)); conf.setInputFormat(DataInputFormat.class); conf.setMapperClass(DataParserMapper.class); // Output / Reducer FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setReducerClass(DataParserReducer.class); JobClient.runJob(conf); }
public void getLinkIn(String inputPath, String outputPath) throws IOException { JobConf conf = new JobConf(Hits.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setMapperClass(FromPagesMapper.class); conf.setReducerClass(FromPagesReducer.class); JobClient.runJob(conf); }
private void initialize(String inputPath1, String inputPath2, String outputPath) throws IOException { JobConf conf = new JobConf(Hits.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.addInputPath(conf, new Path(inputPath1)); FileInputFormat.addInputPath(conf, new Path(inputPath2)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setMapperClass(InitAuthHubMapper.class); conf.setReducerClass(InitAuthHubReducer.class); JobClient.runJob(conf); }
private void runCalculation(String inputPath, String outputPath) throws IOException { JobConf conf = new JobConf(Hits.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setMapperClass(CalculateMapper.class); conf.setReducerClass(CalculateReducer.class); JobClient.runJob(conf); }
public int run(final String[] args) throws Exception { log.info("run starting"); final Configuration conf = getConf(); JobConf job = new JobConf(conf, WordCountInput.class); job.setJobName("AerospikeWordCountInput"); job.setInputFormat(AerospikeInputFormat.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(args[0])); JobClient.runJob(job); log.info("finished"); return 0; }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(InvertedIndex.class); conf.setJobName("inverted index"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordLenght.class); conf.setJobName("wordlenght"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: CartesianCommentComparison <in> <out>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2); } // Configure the join type JobConf conf = new JobConf("Cartesian Product"); conf.setJarByClass(CartesianCommentComparison.class); conf.setMapperClass(CartesianMapper.class); conf.setNumReduceTasks(0); conf.setInputFormat(CartesianInputFormat.class); // Configure the input format CartesianInputFormat.setLeftInputInfo(conf, TextInputFormat.class, args[0]); CartesianInputFormat.setRightInputInfo(conf, TextInputFormat.class, args[0]); TextOutputFormat.setOutputPath(conf, new Path(args[1])); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); RunningJob job = JobClient.runJob(conf); while (!job.isComplete()) { Thread.sleep(1000); } return job.isSuccessful() ? 0 : 1; }
protected JobConf createJobConf() throws Exception { JobConf jobConf = KafkaETLJob.createJobConf("SimpleKafakETL", _topic, _props, getClass()); jobConf.setMapperClass(SimpleKafkaETLMapper.class); KafkaETLInputFormat.setInputPaths(jobConf, new Path(_input)); jobConf.setOutputKeyClass(LongWritable.class); jobConf.setOutputValueClass(Text.class); jobConf.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setCompressOutput(jobConf, false); Path output = new Path(_output); FileSystem fs = output.getFileSystem(jobConf); if (fs.exists(output)) fs.delete(output); TextOutputFormat.setOutputPath(jobConf, output); jobConf.setNumReduceTasks(0); return jobConf; }
/** * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(Job) */ public static void addDependencyJars(JobConf job) throws IOException { org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars( job, org.apache.zookeeper.ZooKeeper.class, com.google.common.base.Function.class, com.google.protobuf.Message.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getPartitionerClass(), job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class), job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class), job.getCombinerClass()); }