@SuppressWarnings("unchecked") public void testAddInputPathWithMapper() throws IOException { final Job conf = Job.getInstance(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class, KeyValueMapClass.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); final Map<Path, Class<? extends Mapper>> maps = MultipleInputs .getMapperTypeMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); assertEquals(MapClass.class, maps.get(new Path("/foo"))); assertEquals(KeyValueMapClass.class, maps.get(new Path("/bar"))); }
private Job jobListFriends(String inputPath, String outputPath) throws IOException, InterruptedException, ClassNotFoundException{ Job job = new Job(); job.setJarByClass(WordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(KeyValueTextInputFormat.class); // Need to change the import job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); return job; }
private Job jobRecommendFriends(String inputPath, String outputPath) throws IOException, InterruptedException, ClassNotFoundException{ Job job1 = new Job(); job1.setJarByClass(WordCount.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); job1.setMapperClass(MapRecommendation.class); job1.setReducerClass(ReduceRecommendation.class); job1.setOutputFormatClass(TextOutputFormat.class); job1.setInputFormatClass(KeyValueTextInputFormat.class); FileInputFormat.addInputPath(job1, new Path(inputPath)); FileOutputFormat.setOutputPath(job1, new Path(outputPath)); job1.waitForCompletion(true); return job1; }
@SuppressWarnings("unchecked") public void testAddInputPathWithMapper() throws IOException { final Job conf = new Job(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class, KeyValueMapClass.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); final Map<Path, Class<? extends Mapper>> maps = MultipleInputs .getMapperTypeMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); assertEquals(MapClass.class, maps.get(new Path("/foo"))); assertEquals(KeyValueMapClass.class, maps.get(new Path("/bar"))); }
public static boolean runCalcJob(Configuration conf, Path input, Path outputPath) throws Exception { Job job = new Job(conf); job.setJarByClass(Main.class); job.setMapperClass(CalcMapReduce.Map.class); job.setReducerClass(CalcMapReduce.Reduce.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(CalcMapReduce.TextPair.class); job.setMapOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, outputPath); return job.waitForCompletion(true); }
public static void runSortJob(Configuration conf, Path input, Path outputPath) throws Exception { Job job = new Job(conf); job.setJarByClass(Main.class); job.setMapperClass(SortMapReduce.Map.class); job.setReducerClass(SortMapReduce.Reduce.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Person.class); job.setMapOutputValueClass(Person.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(PersonNamePartitioner.class); job.setSortComparatorClass(PersonComparator.class); job.setGroupingComparatorClass(PersonNameComparator.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); }
public static void runJob(Configuration conf, Path inputPath, Path outputPath) throws Exception { Job job = new Job(conf); job.setJarByClass(UniqueHashedKeyJob.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); outputPath.getFileSystem(conf).delete(outputPath, true); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); if (!job.waitForCompletion(true)) { throw new Exception("Job failed"); } }
private static void startSSSPJob(String inputPath, String outputPath) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = new Job(conf, "SSSP Job"); job.setJarByClass(Driver.class); job.setMapperClass(SSSPMapper.class); job.setReducerClass(SSSPReducer.class); job.setNumReduceTasks(1); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.addInputPath(job, new Path(inputPath)); TextOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); }
@Test public void testAddInputPathWithMapper() throws IOException { final Job conf = Job.getInstance(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class, KeyValueMapClass.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); final Map<Path, Class<? extends Mapper>> maps = MultipleInputs .getMapperTypeMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); assertEquals(MapClass.class, maps.get(new Path("/foo"))); assertEquals(KeyValueMapClass.class, maps.get(new Path("/bar"))); }
public int run(String[] args) throws Exception { if (args.length != 2) { System.out.printf("Usage: InvertedIndex <input dir> <output dir>\n"); return -1; } Job job = new Job(getConf()); job.setJarByClass(InvertedIndex.class); job.setJobName("Inverted Index"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(IndexMapper.class); job.setReducerClass(IndexReducer.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
public static void main(String[] args) throws Exception { Job job = Job.getInstance(new Configuration()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TextArrayWritable.class); job.setMapperClass(TopWordsMap.class); job.setReducerClass(TopWordsReduce.class); job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setJarByClass(TopWords2.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "tweetLanguage"); job.setJarByClass(TweetLanguage.class); job.setMapperClass(TweetLanguageMapper.class); job.setReducerClass(MapReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(KeyValueTextInputFormat.class); FileInputFormat .addInputPath( job, new Path("/path/to/twitter_sample.*.queue")); FileOutputFormat.setOutputPath(job, new Path("output_tweetLang_clean")); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "userDescriptionLanguage"); job.setJarByClass(UserDescriptionLanguage.class); job.setMapperClass(UserDescriptionLanguageMapper.class); job.setReducerClass(MapReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(KeyValueTextInputFormat.class); FileInputFormat .addInputPath( job, new Path("/path/to/twitter_sample.*.queue")); FileOutputFormat.setOutputPath(job, new Path("output_userLang")); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "userMentions"); job.setJarByClass(UserMentions.class); job.setMapperClass(UserMentionMapper.class); job.setReducerClass(SumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(KeyValueTextInputFormat.class); FileInputFormat .addInputPath( job, new Path("/path/to/twitter_sample.*.queue")); FileOutputFormat.setOutputPath(job, new Path("output_userMentions")); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "userMentions"); job.setJarByClass(GeotagExtract.class); job.setMapperClass(GeotagMapper.class); //job.setReducerClass(...);//Identity reducer job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(KeyValueTextInputFormat.class); FileInputFormat .addInputPath( job, new Path("/path/to/twitter_sample.*.queue")); FileOutputFormat.setOutputPath(job, new Path("output_geotag")); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void runJob(String input, String output) throws Exception { Configuration conf = new Configuration(); conf.set("keep.failed.task.files", "true"); Job job = new Job(conf); job.setJarByClass(OptimizedMRForDebugging.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(0); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); Path outputPath = new Path(output); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, outputPath); outputPath.getFileSystem(conf).delete(outputPath, true); job.waitForCompletion(true); }
@SuppressWarnings("unchecked") public void testAddInputPathWithFormat() throws IOException { final Job conf = Job.getInstance(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); }
@Test (expected = SparkException.class) public void readInputFormatMismatchTranslator() throws Exception { Map<String, Object> paramMap = new HashMap<>(); paramMap.put(FileSystemInput.FORMAT_CONFIG, "input-format"); paramMap.put(FileSystemInput.PATH_CONFIG, FileSystemInput.class.getResource(CSV_DATA).getPath()); paramMap.put(FileSystemInput.INPUT_FORMAT_TYPE_CONFIG, KeyValueTextInputFormat.class.getCanonicalName()); paramMap.put(FileSystemInput.INPUT_FORMAT_KEY_CONFIG, Text.class.getCanonicalName()); paramMap.put(FileSystemInput.INPUT_FORMAT_VALUE_CONFIG, Text.class.getCanonicalName()); paramMap.put("translator.type", DummyInputFormatTranslator.class.getCanonicalName()); config = ConfigFactory.parseMap(paramMap); FileSystemInput formatInput = new FileSystemInput(); formatInput.configure(config); formatInput.read().show(); }
@SuppressWarnings("unchecked") public void testAddInputPathWithFormat() throws IOException { final Job conf = new Job(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); }
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT)); Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT)); Configuration conf = super.getConf(); Job job = new Job(conf); job.setJarByClass(XmlMapReduceWriter.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); if (job.waitForCompletion(true)) { return 0; } return 1; }
public static double calcPageRank(Path inputPath, Path outputPath, int numNodes) throws Exception { Configuration conf = new Configuration(); conf.setInt(Reduce.CONF_NUM_NODES_GRAPH, numNodes); Job job = new Job(conf); job.setJarByClass(Main.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); if (!job.waitForCompletion(true)) { throw new Exception("Job failed"); } long summedConvergence = job.getCounters().findCounter( Reduce.Counter.CONV_DELTAS).getValue(); double convergence = ((double) summedConvergence / Reduce.CONVERGENCE_SCALING_FACTOR) / (double) numNodes; System.out.println("======================================"); System.out.println("= Num nodes: " + numNodes); System.out.println("= Summed convergence: " + summedConvergence); System.out.println("= Convergence: " + convergence); System.out.println("======================================"); return convergence; }
public static void runJob(Configuration conf, Path userLogsPath, Path usersPath, Path outputPath) throws Exception { FileSystem fs = usersPath.getFileSystem(conf); FileStatus usersStatus = fs.getFileStatus(usersPath); if (usersStatus.isDir()) { for (FileStatus f : fs.listStatus(usersPath)) { if (f.getPath().getName().startsWith("part")) { DistributedCache.addCacheFile(f.getPath().toUri(), conf); } } } else { DistributedCache.addCacheFile(usersPath.toUri(), conf); } Job job = new Job(conf); job.setJarByClass(FinalJoinJob.class); job.setMapperClass(GenericReplicatedJoin.class); job.setNumReduceTasks(0); job.setInputFormatClass(KeyValueTextInputFormat.class); outputPath.getFileSystem(conf).delete(outputPath, true); FileInputFormat.setInputPaths(job, userLogsPath); FileOutputFormat.setOutputPath(job, outputPath); if (!job.waitForCompletion(true)) { throw new Exception("Job failed"); } }
public static void runJob(Configuration conf, Path usersPath, Path uniqueUsersPath, Path outputPath) throws Exception { FileSystem fs = uniqueUsersPath.getFileSystem(conf); FileStatus uniqueUserStatus = fs.getFileStatus(uniqueUsersPath); if (uniqueUserStatus.isDir()) { for (FileStatus f : fs.listStatus(uniqueUsersPath)) { if (f.getPath().getName().startsWith("part")) { DistributedCache.addCacheFile(f.getPath().toUri(), conf); } } } else { DistributedCache.addCacheFile(uniqueUsersPath.toUri(), conf); } Job job = new Job(conf); job.setJarByClass(ReplicatedFilterJob.class); job.setMapperClass(ReplicatedFilterJob.class); job.setNumReduceTasks(0); job.setInputFormatClass(KeyValueTextInputFormat.class); outputPath.getFileSystem(conf).delete(outputPath, true); FileInputFormat.setInputPaths(job, usersPath); FileOutputFormat.setOutputPath(job, outputPath); if (!job.waitForCompletion(true)) { throw new Exception("Job failed"); } }
public static void runJob(Path inputPath, Path smallFilePath, Path outputPath) throws Exception { Configuration conf = new Configuration(); FileSystem fs = smallFilePath.getFileSystem(conf); FileStatus smallFilePathStatus = fs.getFileStatus(smallFilePath); if (smallFilePathStatus.isDir()) { for (FileStatus f : fs.listStatus(smallFilePath)) { if (f.getPath().getName().startsWith("part")) { DistributedCache.addCacheFile(f.getPath().toUri(), conf); } } } else { DistributedCache.addCacheFile(smallFilePath.toUri(), conf); } Job job = new Job(conf); job.setJarByClass(Main.class); job.setMapperClass(GenericReplicatedJoin.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setNumReduceTasks(0); outputPath.getFileSystem(conf).delete(outputPath, true); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); }
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(Options.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path usersPath = new Path(cli.getArgValueAsString(Options.USERS)); Path userLogsPath = new Path(cli.getArgValueAsString(Options.USER_LOGS)); Path outputPath = new Path(cli.getArgValueAsString(Options.OUTPUT)); Configuration conf = super.getConf(); Job job = new Job(conf); job.setJarByClass(CompositeJoin.class); job.setMapperClass(JoinMap.class); job.setInputFormatClass(CompositeInputFormat.class); job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose("inner", KeyValueTextInputFormat.class, usersPath, userLogsPath) ); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, userLogsPath); FileOutputFormat.setOutputPath(job, outputPath); return job.waitForCompletion(true) ? 0 : 1; }
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT)); Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT)); Configuration conf = super.getConf(); Job job = new Job(conf); job.setJarByClass(SortMapReduce.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Person.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(PersonNamePartitioner.class); job.setSortComparatorClass(PersonComparator.class); job.setGroupingComparatorClass(PersonNameComparator.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); if (job.waitForCompletion(true)) { return 0; } return 1; }
public Job runJob(Configuration conf, Path inputPath, Path outputPath) throws ClassNotFoundException, IOException, InterruptedException { Job job = new Job(conf); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Text.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(false); return job; }