public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job =Job.getInstance(conf); job.setJobName("DocumentFrequencyCount"); job.setJarByClass(DF.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(DFMap.class); job.setReducerClass(DFReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean wait = job.waitForCompletion(true); System.exit(wait ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job =Job.getInstance(conf); job.setJobName("MaxThreeLabel"); job.setJarByClass(MaxThreeLabel.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(TextArrayWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MaxThreeLabelMap.class); job.setReducerClass(MaxThreeLabelReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean wait = job.waitForCompletion(true); System.exit(wait ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job =Job.getInstance(conf); job.setJobName("TF-IDFCount"); job.setJarByClass(TF_IDF.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(TextArrayWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setMapperClass(TF_IDFMap.class); job.setReducerClass(TF_IDFReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileInputFormat.addInputPath(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2])); boolean wait = job.waitForCompletion(true); System.exit(wait ? 0 : 1); }
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile) throws IOException { Configuration conf = getConf(); conf.setBoolean(FAIL_MAP, failMappers); conf.setBoolean(FAIL_REDUCE, failReducers); Job job = Job.getInstance(conf, "fail"); job.setJarByClass(FailJob.class); job.setMapperClass(FailMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(FailReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(TextInputFormat.class); job.setSpeculativeExecution(false); job.setJobName("Fail job"); FileInputFormat.addInputPath(job, inputFile); return job; }
private static void runTestLazyOutput(Configuration conf, Path output, int numReducers, boolean createLazily) throws Exception { Job job = Job.getInstance(conf, "Test-Lazy-Output"); FileInputFormat.setInputPaths(job, INPUT); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(TestMapReduceLazyOutput.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(numReducers); job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); if (createLazily) { LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); } else { job.setOutputFormatClass(TextOutputFormat.class); } assertTrue(job.waitForCompletion(true)); }
public static void main(String [] args) throws Exception { Path outDir = new Path("output"); Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "user name check"); job.setJarByClass(UserNamePermission.class); job.setMapperClass(UserNamePermission.UserNameMapper.class); job.setCombinerClass(UserNamePermission.UserNameReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(UserNamePermission.UserNameReducer.class); job.setNumReduceTasks(1); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path("input")); FileOutputFormat.setOutputPath(job, outDir); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); this.fieldSeparator = conf.get(FieldSelectionHelper.DATA_FIELD_SEPERATOR, "\t"); this.mapOutputKeyValueSpec = conf.get(FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "0-:"); try { this.ignoreInputKey = TextInputFormat.class.getCanonicalName().equals( context.getInputFormatClass().getCanonicalName()); } catch (ClassNotFoundException e) { throw new IOException("Input format class not found", e); } allMapValueFieldsFrom = FieldSelectionHelper.parseOutputKeyValueSpec( mapOutputKeyValueSpec, mapOutputKeyFieldList, mapOutputValueFieldList); LOG.info(FieldSelectionHelper.specToString(fieldSeparator, mapOutputKeyValueSpec, allMapValueFieldsFrom, mapOutputKeyFieldList, mapOutputValueFieldList) + "\nignoreInputKey:" + ignoreInputKey); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(Multiplication.class); ChainMapper.addMapper(job, CooccurrenceMapper.class, LongWritable.class, Text.class, Text.class, Text.class, conf); ChainMapper.addMapper(job, RatingMapper.class, Text.class, Text.class, Text.class, Text.class, conf); job.setMapperClass(CooccurrenceMapper.class); job.setMapperClass(RatingMapper.class); job.setReducerClass(MultiplicationReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, CooccurrenceMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, RatingMapper.class); TextOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setMapperClass(SumMapper.class); job.setReducerClass(SumReducer.class); job.setJarByClass(Sum.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); TextInputFormat.setInputPaths(job, new Path(args[0])); TextOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setMapperClass(DataDividerMapper.class); job.setReducerClass(DataDividerReducer.class); job.setJarByClass(DataDividerByUser.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); TextInputFormat.setInputPaths(job, new Path(args[0])); TextOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setMapperClass(NormalizeMapper.class); job.setReducerClass(NormalizeReducer.class); job.setJarByClass(Normalize.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); TextInputFormat.setInputPaths(job, new Path(args[0])); TextOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(UnitMultiplication.class); ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf); ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf); job.setReducerClass(MultiplicationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.setFloat("beta", Float.parseFloat(args[3])); Job job = Job.getInstance(conf); job.setJarByClass(UnitMultiplication.class); ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf); ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf); job.setReducerClass(MultiplicationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.setFloat("beta", Float.parseFloat(args[3])); Job job = Job.getInstance(conf); job.setJarByClass(UnitSum.class); ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf); ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf); job.setReducerClass(SumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { BasicConfigurator.configure(); Configuration conf = new Configuration(); conf.setQuietMode(true); Job job = Job.getInstance(conf, "WordCount"); job.setJarByClass(HadoopWordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1] + "_" + System.currentTimeMillis())); long t = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("TotalTime=" + (System.currentTimeMillis() - t)); }
@Override public void setupJob(Job job) { job.setJarByClass(SortJob.class); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(SortMap.class); job.setReducerClass(SortReducer.class); job.setMapOutputKeyClass(ReadKey.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setSortComparatorClass(ReadKeyComparator.class); job.setPartitionerClass(ReadKeyPartitioner.class); job.setGroupingComparatorClass(ReadKeyGroupingComparator.class); }
public static void main(String args[]) throws IOException,InterruptedException, ClassNotFoundException, URISyntaxException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); conf.set("cachefile", otherArgs[0]); if (otherArgs.length != 3) { System.err.println("Usage: Question4 <cacheFile> <in> <out>"); System.exit(3); } Job job = new Job(conf, "Question4"); DistributedCache.addCacheFile(new URI(args[0]), conf); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); job.setJarByClass(Question4.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "KNN"); job.setJarByClass(KNN.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setMapperClass(KnnMapper.class); job.setReducerClass(KnnReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "Reduce-side join"); job.setJarByClass(ReduceJoin.class); job.setReducerClass(ReduceJoinReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, SalesRecordMapper.class) ; MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, AccountRecordMapper.class) ; // FileOutputFormat.setOutputPath(job, new Path(args[2])); Path outputPath = new Path(args[2]); FileOutputFormat.setOutputPath(job, outputPath); outputPath.getFileSystem(conf).delete(outputPath); System.exit(job.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] args) throws Exception { Opts opts = new Opts(); opts.parseArgs(WordCount.class.getName(), args); Job job = Job.getInstance(getConf()); job.setJobName(WordCount.class.getName()); job.setJarByClass(this.getClass()); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, new Path(opts.inputDirectory)); job.setMapperClass(MapClass.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Mutation.class); opts.setAccumuloConfigs(job); job.waitForCompletion(true); return 0; }
@Test public void readInputFormat() throws Exception { Map<String, Object> paramMap = new HashMap<>(); paramMap.put(FileSystemInput.FORMAT_CONFIG, "input-format"); paramMap.put(FileSystemInput.PATH_CONFIG, FileSystemInput.class.getResource(CSV_DATA).getPath()); paramMap.put(FileSystemInput.INPUT_FORMAT_TYPE_CONFIG, TextInputFormat.class.getCanonicalName()); paramMap.put(FileSystemInput.INPUT_FORMAT_KEY_CONFIG, LongWritable.class.getCanonicalName()); paramMap.put(FileSystemInput.INPUT_FORMAT_VALUE_CONFIG, Text.class.getCanonicalName()); paramMap.put("translator.type", DummyInputFormatTranslator.class.getCanonicalName()); config = ConfigFactory.parseMap(paramMap); FileSystemInput formatInput = new FileSystemInput(); formatInput.configure(config); Dataset<Row> results = formatInput.read(); assertEquals("Invalid number of rows", 4, results.count()); assertEquals("Invalid first row result", 0L, results.first().getLong(0)); assertEquals("Invalid first row result", "One,Two,Three,Four", results.first().getString(1)); }
private boolean bigItemCount(String output) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(this.getConf(), "Counting items from " + this.input); job.setJarByClass(TopPIoverHadoop.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(this.input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.setMapperClass(ItemBigCountingMapper.class); job.setReducerClass(ItemBigCountingReducer.class); boolean success = job.waitForCompletion(true); if (success) { Counter rebasingMaxID = job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS); this.getConf().setInt(KEY_REBASING_MAX_ID, (int) rebasingMaxID.getValue()); } return success; }
private boolean filterInput(String output, String rebasingMapPath) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(this.getConf(), "Computing items remapping for " + this.input); job.setJarByClass(TopPIoverHadoop.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(ConcatenatedTransactionsWritable.class); DistCache.copyToCache(job, rebasingMapPath); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.setMapperClass(FilteringMapper.class); job.setNumReduceTasks(0); return job.waitForCompletion(true); }
private static void runMapReduceJob(String featureName, Map<String, String> dsConf, Path mapredCSVFilePath) throws Exception { Job job = Job.getInstance(new Configuration()); job.setJobName("GeoMesa GDELT Ingest"); job.setJarByClass(GDELTIngest.class); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(GDELTIngestMapper.class); job.setOutputFormatClass(GeoMesaOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(SimpleFeature.class); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, mapredCSVFilePath); GeoMesaOutputFormat.configureDataStore(job, dsConf); job.getConfiguration().set(FEATURE_NAME, featureName); job.submit(); if (!job.waitForCompletion(true)) { throw new Exception("Job failed"); } }
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: WordCount <input path> <result path>"); return; } final String inputPath = args[0]; final String outputPath = args[1]; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // Set up the Hadoop Input Format Job job = Job.getInstance(); HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job); TextInputFormat.addInputPath(job, new Path(inputPath)); // Create a Flink job with it DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat); // Tokenize the line and convert from Writable "Text" to String for better handling DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer()); // Sum up the words DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); // Convert String back to Writable "Text" for use with Hadoop Output Format DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper()); // Set up Hadoop Output Format HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job); hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " "); hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test TextOutputFormat.setOutputPath(job, new Path(outputPath)); // Output & Execute hadoopResult.output(hadoopOutputFormat); env.execute("Word Count"); }
public static void main(String [] args) throws Exception { Path outDir = new Path("output"); Configuration conf = new Configuration(); Job job = new Job(conf, "user name check"); job.setJarByClass(UserNamePermission.class); job.setMapperClass(UserNamePermission.UserNameMapper.class); job.setCombinerClass(UserNamePermission.UserNameReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(UserNamePermission.UserNameReducer.class); job.setNumReduceTasks(1); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path("input")); FileOutputFormat.setOutputPath(job, outDir); System.exit(job.waitForCompletion(true) ? 0 : 1); }
private static void runTestLazyOutput(Configuration conf, Path output, int numReducers, boolean createLazily) throws Exception { Job job = new Job(conf, "Test-Lazy-Output"); FileInputFormat.setInputPaths(job, INPUT); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(TestMapReduceLazyOutput.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(numReducers); job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); if (createLazily) { LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); } else { job.setOutputFormatClass(TextOutputFormat.class); } assertTrue(job.waitForCompletion(true)); }
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCountImproved.class); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.addInputPath(job, new Path(args[0])); TextOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }