@Override public void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol) throws ClassNotFoundException, IOException { // Write a line of text into a file so that we can get // a record to the map task. Path dir = new Path(this.options.getTempDir()); Path p = new Path(dir, "sqoop-dummy-import-job-file.txt"); FileSystem fs = FileSystem.getLocal(this.options.getConf()); if (fs.exists(p)) { boolean result = fs.delete(p, false); assertTrue("Couldn't delete temp file!", result); } BufferedWriter w = new BufferedWriter( new OutputStreamWriter(fs.create(p))); w.append("This is a line!"); w.close(); FileInputFormat.addInputPath(job, p); // And set the InputFormat itself. super.configureInputFormat(job, tableName, tableClassName, splitByCol); }
public static void run(Configuration conf, Path inputPath, Path output, double params) throws IOException, ClassNotFoundException, InterruptedException { String jobName = "calculating parameter"; conf.set("params",String.valueOf(params)); Job job = new Job(conf, jobName); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(indexToCountWritable.class); job.setOutputKeyClass(twoDimensionIndexWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(CalParamsMapper.class); job.setReducerClass(CalParamsReducer.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job,output); job.setJarByClass(LDADriver.class); if (!job.waitForCompletion(true)) { throw new InterruptedException("calculating parameter failed"); } }
public static void cut(String name, String in) throws Exception { Job job = createJob(name, in); job.setNumReduceTasks(0); boolean success = job.waitForCompletion(true); if (success) { // MapReduce reads an input and writes the result to an new location. // Hence it can not modify data in place, and we have to replace the input // with the output Path output = FileOutputFormat.getOutputPath(job); FileSystem fs = output.getFileSystem(job.getConfiguration()); Path[] inputs = FileInputFormat.getInputPaths(job); for (int i = 0; i < inputs.length; i++) { fs.delete(inputs[i], true); } fs.rename(output, inputs[0]); } }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("xmlinput.start", "<page>"); conf.set("xmlinput.end", "</page>"); Job job =Job.getInstance(conf); job.setJobName("TermFrequencyCount"); job.setJarByClass(TF.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntArrayWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setMapperClass(TFMap.class); job.setReducerClass(TFReduce.class); job.setInputFormatClass(XmlInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean wait = job.waitForCompletion(true); System.exit(wait ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("xmlinput.start", "<page>"); conf.set("xmlinput.end", "</page>"); Job job =Job.getInstance(conf); job.setJobName("ExrtactPages"); job.setJarByClass(ExtractPage.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(ExtractPageMap.class); job.setReducerClass(ExtractPageReduce.class); job.setInputFormatClass(XmlInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean wait = job.waitForCompletion(true); System.exit(wait ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job =Job.getInstance(conf); job.setJobName("DocumentFrequencyCount"); job.setJarByClass(DF.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(DFMap.class); job.setReducerClass(DFReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean wait = job.waitForCompletion(true); System.exit(wait ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job =Job.getInstance(conf); job.setJobName("MaxThreeLabel"); job.setJarByClass(MaxThreeLabel.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(TextArrayWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MaxThreeLabelMap.class); job.setReducerClass(MaxThreeLabelReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean wait = job.waitForCompletion(true); System.exit(wait ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("xmlinput.start", "<page>"); conf.set("xmlinput.end", "</page>"); Job job =Job.getInstance(conf); job.setJobName("PageWordCount"); job.setJarByClass(PageWordCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(PageWordCountMap.class); job.setCombinerClass(PageWordCountReduce.class); job.setReducerClass(PageWordCountReduce.class); job.setInputFormatClass(XmlInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job =Job.getInstance(conf); job.setJobName("TF-IDFCount"); job.setJarByClass(TF_IDF.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(TextArrayWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setMapperClass(TF_IDFMap.class); job.setReducerClass(TF_IDFReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileInputFormat.addInputPath(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2])); boolean wait = job.waitForCompletion(true); System.exit(wait ? 0 : 1); }
public static void runJob(Path input, Path output, String vectorClassName,Configuration config) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = config; conf.set("vector.implementation.class.name", vectorClassName); Job job = new Job(conf, "Input Driver running over input: " + input); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(InputMapper.class); job.setNumReduceTasks(0); job.setJarByClass(InputDriver.class); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "maxaverage"); job.setMapperClass(MaximumAverageMapper.class); job.setReducerClass(MaximumAverageReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); if (!job.waitForCompletion(true)) return; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "test"); job.setMapperClass(testMapper.class); job.setPartitionerClass(testPartitioner.class); job.setReducerClass(testReducer.class); job.setNumReduceTasks(10); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); if (!job.waitForCompletion(true)) return; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "systemuser"); //job.setMapperClass(SystemUserMapper.class); job.setMapperClass(DailyCount.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); if (!job.waitForCompletion(true)) return; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "maxtemp"); job.setMapperClass(MaxTempMapper.class); job.setReducerClass(MaxTempReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); if (!job.waitForCompletion(true)) return; }
@Override protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol) throws ClassNotFoundException, IOException { if (options.getOdpsTable() != null) { Configuration conf = job.getConfiguration(); setInputFormatClass(OdpsExportInputFormat.class); conf.set(OdpsConstants.TABLE_NAME, options.getOdpsTable()); conf.set(OdpsConstants.ACCESS_ID, options.getOdpsAccessID()); conf.set(OdpsConstants.ACCESS_KEY, options.getOdpsAccessKey()); conf.set(OdpsConstants.ENDPOINT, options.getOdpsEndPoint()); conf.set(OdpsConstants.PROJECT, options.getOdpsProject()); String partitionSpec = options.getOdpsPartitionSpec(); if (partitionSpec != null) { conf.set(OdpsConstants.PARTITION_SPEC, partitionSpec); } setMapperClass(OdpsExportMapper.class); } super.configureInputFormat(job, tableName, tableClassName, splitByCol); if (!isHCatJob && options.getOdpsTable() == null) { FileInputFormat.addInputPath(job, getInputPath()); } }
public Job createJob() throws IOException { Configuration conf = getConf(); conf.setInt(MRJobConfig.NUM_MAPS, 1); Job job = Job.getInstance(conf, "test"); job.setNumReduceTasks(1); job.setJarByClass(CredentialsTestJob.class); job.setNumReduceTasks(1); job.setMapperClass(CredentialsTestJob.CredentialsTestMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(CredentialsTestJob.CredentialsTestReducer.class); job.setInputFormatClass(SleepJob.SleepInputFormat.class); job.setPartitionerClass(SleepJob.SleepJobPartitioner.class); job.setOutputFormatClass(NullOutputFormat.class); job.setSpeculativeExecution(false); job.setJobName("test job"); FileInputFormat.addInputPath(job, new Path("ignored")); return job; }
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile) throws IOException { Configuration conf = getConf(); conf.setBoolean(FAIL_MAP, failMappers); conf.setBoolean(FAIL_REDUCE, failReducers); Job job = Job.getInstance(conf, "fail"); job.setJarByClass(FailJob.class); job.setMapperClass(FailMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(FailReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(TextInputFormat.class); job.setSpeculativeExecution(false); job.setJobName("Fail job"); FileInputFormat.addInputPath(job, inputFile); return job; }
/** * Creates a simple copy job. * * @param conf Configuration object * @param outdir Output directory. * @param indirs Comma separated input directories. * @return Job initialized for a data copy job. * @throws Exception If an error occurs creating job configuration. */ public static Job createCopyJob(Configuration conf, Path outdir, Path... indirs) throws Exception { conf.setInt(MRJobConfig.NUM_MAPS, 3); Job theJob = Job.getInstance(conf); theJob.setJobName("DataMoveJob"); FileInputFormat.setInputPaths(theJob, indirs); theJob.setMapperClass(DataCopyMapper.class); FileOutputFormat.setOutputPath(theJob, outdir); theJob.setOutputKeyClass(Text.class); theJob.setOutputValueClass(Text.class); theJob.setReducerClass(DataCopyReducer.class); theJob.setNumReduceTasks(1); return theJob; }
/** * Creates a simple fail job. * * @param conf Configuration object * @param outdir Output directory. * @param indirs Comma separated input directories. * @return Job initialized for a simple fail job. * @throws Exception If an error occurs creating job configuration. */ public static Job createFailJob(Configuration conf, Path outdir, Path... indirs) throws Exception { FileSystem fs = outdir.getFileSystem(conf); if (fs.exists(outdir)) { fs.delete(outdir, true); } conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2); Job theJob = Job.getInstance(conf); theJob.setJobName("Fail-Job"); FileInputFormat.setInputPaths(theJob, indirs); theJob.setMapperClass(FailMapper.class); theJob.setReducerClass(Reducer.class); theJob.setNumReduceTasks(0); FileOutputFormat.setOutputPath(theJob, outdir); theJob.setOutputKeyClass(Text.class); theJob.setOutputValueClass(Text.class); return theJob; }
public static Job createJob(Configuration conf, Path inDir, Path outDir, int numInputFiles, int numReds, String input) throws IOException { Job job = Job.getInstance(conf); FileSystem fs = FileSystem.get(conf); if (fs.exists(outDir)) { fs.delete(outDir, true); } if (fs.exists(inDir)) { fs.delete(inDir, true); } fs.mkdirs(inDir); for (int i = 0; i < numInputFiles; ++i) { DataOutputStream file = fs.create(new Path(inDir, "part-" + i)); file.writeBytes(input); file.close(); } FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); job.setNumReduceTasks(numReds); return job; }
private static void runTestLazyOutput(Configuration conf, Path output, int numReducers, boolean createLazily) throws Exception { Job job = Job.getInstance(conf, "Test-Lazy-Output"); FileInputFormat.setInputPaths(job, INPUT); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(TestMapReduceLazyOutput.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(numReducers); job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); if (createLazily) { LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); } else { job.setOutputFormatClass(TextOutputFormat.class); } assertTrue(job.waitForCompletion(true)); }
public Job createJob(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount) throws IOException { Configuration conf = getConf(); conf.setLong(MAP_SLEEP_TIME, mapSleepTime); conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime); conf.setInt(MAP_SLEEP_COUNT, mapSleepCount); conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount); conf.setInt(MRJobConfig.NUM_MAPS, numMapper); Job job = Job.getInstance(conf, "sleep"); job.setNumReduceTasks(numReducer); job.setJarByClass(SleepJob.class); job.setMapperClass(SleepMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(SleepReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(SleepInputFormat.class); job.setPartitionerClass(SleepJobPartitioner.class); job.setSpeculativeExecution(false); job.setJobName("Sleep job"); FileInputFormat.addInputPath(job, new Path("ignored")); return job; }
/** * Run a test with a misconfigured number of mappers. * Expect failure. */ @Test public void testInvalidMultiMapParallelism() throws Exception { Job job = Job.getInstance(); Path inputPath = createMultiMapsInput(); Path outputPath = getOutputPath(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } job.setMapperClass(StressMapper.class); job.setReducerClass(CountingReducer.class); job.setNumReduceTasks(1); LocalJobRunner.setLocalMaxRunningMaps(job, -6); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); boolean success = job.waitForCompletion(true); assertFalse("Job succeeded somehow", success); }
/** * @param conf configuration for the job * @param dirs the initial list of paths * @param recursive whether to traverse the patchs recursively * @param inputFilter inputFilter to apply to the resulting paths * @param newApi whether using the mapred or mapreduce API * @throws InterruptedException * @throws IOException */ public LocatedFileStatusFetcher(Configuration conf, Path[] dirs, boolean recursive, PathFilter inputFilter, boolean newApi) throws InterruptedException, IOException { int numThreads = conf.getInt(FileInputFormat.LIST_STATUS_NUM_THREADS, FileInputFormat.DEFAULT_LIST_STATUS_NUM_THREADS); rawExec = Executors.newFixedThreadPool( numThreads, new ThreadFactoryBuilder().setDaemon(true) .setNameFormat("GetFileInfo #%d").build()); exec = MoreExecutors.listeningDecorator(rawExec); resultQueue = new LinkedBlockingQueue<List<FileStatus>>(); this.conf = conf; this.inputDirs = dirs; this.recursive = recursive; this.inputFilter = inputFilter; this.newApi = newApi; }
/** * Submit this job to mapred. The state becomes RUNNING if submission * is successful, FAILED otherwise. */ protected synchronized void submit() { try { Configuration conf = job.getConfiguration(); if (conf.getBoolean(CREATE_DIR, false)) { FileSystem fs = FileSystem.get(conf); Path inputPaths[] = FileInputFormat.getInputPaths(job); for (int i = 0; i < inputPaths.length; i++) { if (!fs.exists(inputPaths[i])) { try { fs.mkdirs(inputPaths[i]); } catch (IOException e) { } } } } job.submit(); this.state = State.RUNNING; } catch (Exception ioe) { LOG.info(getJobName()+" got an error while submitting ",ioe); this.state = State.FAILED; this.message = StringUtils.stringifyException(ioe); } }
@Test public void testListLocatedStatus() throws Exception { Configuration conf = getConfiguration(); conf.setBoolean("fs.test.impl.disable.cache", false); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1/a2"); MockFileSystem mockFs = (MockFileSystem) new Path("test:///").getFileSystem(conf); Assert.assertEquals("listLocatedStatus already called", 0, mockFs.numListLocatedStatusCalls); JobConf job = new JobConf(conf); TextInputFormat fileInputFormat = new TextInputFormat(); fileInputFormat.configure(job); InputSplit[] splits = fileInputFormat.getSplits(job, 1); Assert.assertEquals("Input splits are not correct", 2, splits.length); Assert.assertEquals("listLocatedStatuss calls", 1, mockFs.numListLocatedStatusCalls); FileSystem.closeAll(); }
@Test public void testSplitLocationInfo() throws Exception { Configuration conf = getConfiguration(); conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1/a2"); JobConf job = new JobConf(conf); TextInputFormat fileInputFormat = new TextInputFormat(); fileInputFormat.configure(job); FileSplit[] splits = (FileSplit[]) fileInputFormat.getSplits(job, 1); String[] locations = splits[0].getLocations(); Assert.assertEquals(2, locations.length); SplitLocationInfo[] locationInfo = splits[0].getLocationInfo(); Assert.assertEquals(2, locationInfo.length); SplitLocationInfo localhostInfo = locations[0].equals("localhost") ? locationInfo[0] : locationInfo[1]; SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ? locationInfo[0] : locationInfo[1]; Assert.assertTrue(localhostInfo.isOnDisk()); Assert.assertTrue(localhostInfo.isInMemory()); Assert.assertTrue(otherhostInfo.isOnDisk()); Assert.assertFalse(otherhostInfo.isInMemory()); }
@Test public void testListStatusSimple() throws IOException { Configuration conf = new Configuration(); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat .configureTestSimple(conf, localFs); JobConf jobConf = new JobConf(conf); TextInputFormat fif = new TextInputFormat(); fif.configure(jobConf); FileStatus[] statuses = fif.listStatus(jobConf); org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat .verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses), localFs); }
@Test public void testListStatusNestedRecursive() throws IOException { Configuration conf = new Configuration(); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat .configureTestNestedRecursive(conf, localFs); JobConf jobConf = new JobConf(conf); TextInputFormat fif = new TextInputFormat(); fif.configure(jobConf); FileStatus[] statuses = fif.listStatus(jobConf); org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat .verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses), localFs); }
@Test public void testListStatusNestedNonRecursive() throws IOException { Configuration conf = new Configuration(); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat .configureTestNestedNonRecursive(conf, localFs); JobConf jobConf = new JobConf(conf); TextInputFormat fif = new TextInputFormat(); fif.configure(jobConf); FileStatus[] statuses = fif.listStatus(jobConf); org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat .verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses), localFs); }
@Test public void testListStatusErrorOnNonExistantDir() throws IOException { Configuration conf = new Configuration(); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat .configureTestErrorOnNonExistantDir(conf, localFs); JobConf jobConf = new JobConf(conf); TextInputFormat fif = new TextInputFormat(); fif.configure(jobConf); try { fif.listStatus(jobConf); Assert.fail("Expecting an IOException for a missing Input path"); } catch (IOException e) { Path expectedExceptionPath = new Path(TEST_ROOT_DIR, "input2"); expectedExceptionPath = localFs.makeQualified(expectedExceptionPath); Assert.assertTrue(e instanceof InvalidInputException); Assert.assertEquals( "Input path does not exist: " + expectedExceptionPath.toString(), e.getMessage()); } }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordstddev <in> <out>"); return 0; } Configuration conf = getConf(); Job job = Job.getInstance(conf, "word stddev"); job.setJarByClass(WordStandardDeviation.class); job.setMapperClass(WordStandardDeviationMapper.class); job.setCombinerClass(WordStandardDeviationReducer.class); job.setReducerClass(WordStandardDeviationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); // read output and calculate standard deviation stddev = readAndCalcStdDev(outputpath, conf); return (result ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2); } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmean <in> <out>"); return 0; } Configuration conf = getConf(); Job job = Job.getInstance(conf, "word mean"); job.setJarByClass(WordMean.class); job.setMapperClass(WordMeanMapper.class); job.setCombinerClass(WordMeanReducer.class); job.setReducerClass(WordMeanReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); mean = readAndCalcMean(outputpath, conf); return (result ? 0 : 1); }
public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage(); return 1; } TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1); // force a single split FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE); job.setInputFormatClass(TeraInputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
/** * Configure the {@link Job} for enabling compression emulation. */ static void configure(final Job job) throws IOException, InterruptedException, ClassNotFoundException { // set the random text mapper job.setMapperClass(RandomTextDataMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(GenDataFormat.class); job.setJarByClass(GenerateData.class); // set the output compression true FileOutputFormat.setCompressOutput(job, true); try { FileInputFormat.addInputPath(job, new Path("ignored")); } catch (IOException e) { LOG.error("Error while adding input path ", e); } }
@Override public Job call() throws IOException, InterruptedException, ClassNotFoundException { UserGroupInformation ugi = UserGroupInformation.getLoginUser(); ugi.doAs( new PrivilegedExceptionAction <Job>() { public Job run() throws IOException, ClassNotFoundException, InterruptedException { job.setMapperClass(GenDCDataMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setInputFormatClass(GenDCDataFormat.class); job.setOutputFormatClass(NullOutputFormat.class); job.setJarByClass(GenerateDistCacheData.class); try { FileInputFormat.addInputPath(job, new Path("ignored")); } catch (IOException e) { LOG.error("Error while adding input path ", e); } job.submit(); return job; } }); return job; }
public static void main(String[] args) throws Exception { if(args.length != 2){ System.err.println("Usage: MaxTemperature <input path> <output path>"); System.exit(-1); } Job job = new Job(); job.setJarByClass(MaxTemperature.class); job.setJobName("Max temperature"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MaxTemperatureMapper.class); job.setReducerClass(MaxTemperatureReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { if(args.length != 2){ System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>"); System.exit(-1); } Job job = new Job(); job.setJarByClass(MaxTemperatureWithCombiner.class); job.setJobName("Max Temperature With Combiner"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MaxTemperatureMapper.class); job.setCombinerClass(MaxTemperatureReducer.class); job.setReducerClass(MaxTemperatureReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void runJob(Configuration conf, Path inputPath, Path output) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(conf, "Input Drive running input:"+inputPath); log.info("start running InputDriver"); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(indexToWordWritable.class); job.setOutputKeyClass(twoDimensionIndexWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(InputMapper.class); job.setReducerClass(InputReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setJarByClass(InputDriver.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, output); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } }