@Override public void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol) throws ClassNotFoundException, IOException { // Write a line of text into a file so that we can get // a record to the map task. Path dir = new Path(this.options.getTempDir()); Path p = new Path(dir, "sqoop-dummy-import-job-file.txt"); FileSystem fs = FileSystem.getLocal(this.options.getConf()); if (fs.exists(p)) { boolean result = fs.delete(p, false); assertTrue("Couldn't delete temp file!", result); } BufferedWriter w = new BufferedWriter( new OutputStreamWriter(fs.create(p))); w.append("This is a line!"); w.close(); FileInputFormat.addInputPath(job, p); // And set the InputFormat itself. super.configureInputFormat(job, tableName, tableClassName, splitByCol); }
public Job call() throws IOException, InterruptedException, ClassNotFoundException { ugi.doAs( new PrivilegedExceptionAction<Job>() { public Job run() throws IOException, ClassNotFoundException, InterruptedException { job.setMapperClass(LoadMapper.class); job.setReducerClass(LoadReducer.class); job.setNumReduceTasks(jobdesc.getNumberReduces()); job.setMapOutputKeyClass(GridmixKey.class); job.setMapOutputValueClass(GridmixRecord.class); job.setSortComparatorClass(LoadSortComparator.class); job.setGroupingComparatorClass(SpecGroupingComparator.class); job.setInputFormatClass(LoadInputFormat.class); job.setOutputFormatClass(RawBytesOutputFormat.class); job.setPartitionerClass(DraftPartitioner.class); job.setJarByClass(LoadJob.class); job.getConfiguration().setBoolean(Job.USED_GENERIC_PARSER, true); FileOutputFormat.setOutputPath(job, outdir); job.submit(); return job; } }); return job; }
public static void main(String[] args) throws Exception { if(args.length != 2){ System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>"); System.exit(-1); } Job job = new Job(); job.setJarByClass(MaxTemperatureWithCombiner.class); job.setJobName("Max Temperature With Combiner"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MaxTemperatureMapper.class); job.setCombinerClass(MaxTemperatureReducer.class); job.setReducerClass(MaxTemperatureReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job =Job.getInstance(conf); job.setJobName("TF-IDFCount"); job.setJarByClass(TF_IDF.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(TextArrayWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setMapperClass(TF_IDFMap.class); job.setReducerClass(TF_IDFReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileInputFormat.addInputPath(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2])); boolean wait = job.waitForCompletion(true); System.exit(wait ? 0 : 1); }
/** * Tests one of the maps consuming output. * * @throws Exception */ public void testChainMapNoOuptut() throws Exception { Configuration conf = createJobConf(); String expectedOutput = ""; Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0, input); job.setJobName("chain"); ChainMapper.addMapper(job, ConsumeMap.class, IntWritable.class, Text.class, LongWritable.class, Text.class, null); ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); job.waitForCompletion(true); assertTrue("Job failed", job.isSuccessful()); assertEquals("Outputs doesn't match", expectedOutput, MapReduceTestUtil .readOutput(outDir, conf)); }
/** * Look for jars we expect to be on the classpath by name. */ @Test public void testAddDependencyJars() throws Exception { Job job = new Job(); TableMapReduceUtil.addDependencyJars(job); String tmpjars = job.getConfiguration().get("tmpjars"); // verify presence of modules assertTrue(tmpjars.contains("hbase-common")); assertTrue(tmpjars.contains("hbase-protocol")); assertTrue(tmpjars.contains("hbase-client")); assertTrue(tmpjars.contains("hbase-hadoop-compat")); assertTrue(tmpjars.contains("hbase-server")); // verify presence of 3rd party dependencies. assertTrue(tmpjars.contains("zookeeper")); assertTrue(tmpjars.contains("netty")); assertTrue(tmpjars.contains("protobuf")); assertTrue(tmpjars.contains("guava")); assertTrue(tmpjars.contains("htrace")); }
/** * Creates a simple copy job. * * @param conf Configuration object * @param outdir Output directory. * @param indirs Comma separated input directories. * @return Job initialized for a data copy job. * @throws Exception If an error occurs creating job configuration. */ public static Job createCopyJob(Configuration conf, Path outdir, Path... indirs) throws Exception { conf.setInt(MRJobConfig.NUM_MAPS, 3); Job theJob = Job.getInstance(conf); theJob.setJobName("DataMoveJob"); FileInputFormat.setInputPaths(theJob, indirs); theJob.setMapperClass(DataCopyMapper.class); FileOutputFormat.setOutputPath(theJob, outdir); theJob.setOutputKeyClass(Text.class); theJob.setOutputValueClass(Text.class); theJob.setReducerClass(DataCopyReducer.class); theJob.setNumReduceTasks(1); return theJob; }
/** * Main entry point. * * @param args The command line parameters. * @throws Exception When running the job fails. */ public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("ERROR: Wrong number of parameters: " + args.length); System.err.println("Usage: CellCounter "); System.err.println(" <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " + "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]"); System.err.println(" Note: -D properties will be applied to the conf used. "); System.err.println(" Additionally, the following SCAN properties can be specified"); System.err.println(" to get fine grained control on what is counted.."); System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>"); System.err.println(" <reportSeparator> parameter can be used to override the default report separator " + "string : used to separate the rowId/column family name and qualifier name."); System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " + "operation to a limited subset of rows from the table based on regex or prefix pattern."); System.exit(-1); } Job job = createSubmittableJob(conf, otherArgs); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "test"); job.setMapperClass(testMapper.class); job.setPartitionerClass(testPartitioner.class); job.setReducerClass(testReducer.class); job.setNumReduceTasks(10); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); if (!job.waitForCompletion(true)) return; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "maxtemp"); job.setMapperClass(MaxTempMapper.class); job.setReducerClass(MaxTempReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); if (!job.waitForCompletion(true)) return; }
private void configueAvroMergeJob(Configuration conf, Job job, Path oldPath, Path newPath) throws IOException { LOG.info("Trying to merge avro files"); final Schema oldPathSchema = AvroUtil.getAvroSchema(oldPath, conf); final Schema newPathSchema = AvroUtil.getAvroSchema(newPath, conf); if (oldPathSchema == null || newPathSchema == null || !oldPathSchema.equals(newPathSchema)) { throw new IOException("Invalid schema for input directories. Schema for old data: [" + oldPathSchema + "]. Schema for new data: [" + newPathSchema + "]"); } LOG.debug("Avro Schema:" + oldPathSchema); job.setInputFormatClass(AvroInputFormat.class); job.setOutputFormatClass(AvroOutputFormat.class); job.setMapperClass(MergeAvroMapper.class); job.setReducerClass(MergeAvroReducer.class); AvroJob.setOutputSchema(job.getConfiguration(), oldPathSchema); }
/** * Job configuration. */ public static Job configureJob(Configuration conf, String [] args) throws IOException { Path inputPath = new Path(args[0]); String tableName = args[1]; Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(Uploader.class); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(Uploader.class); // No reducers. Just write straight to table. Call initTableReducerJob // because it sets up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); return job; }
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException, InterruptedException, ClassNotFoundException { final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration()); job.setInputFormatClass(clazz); job.setOutputFormatClass(NullOutputFormat.class); job.setMapperClass(ExampleVerifier.class); job.setNumReduceTasks(0); LOG.debug("submitting job."); assertTrue("job failed!", job.waitForCompletion(true)); assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue()); assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue()); assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue()); assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue()); assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue()); assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue()); }
private void runIncrementalPELoad(Configuration conf, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Path outDir) throws IOException, UnsupportedEncodingException, InterruptedException, ClassNotFoundException { Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator); FileOutputFormat.setOutputPath(job, outDir); assertFalse(util.getTestFileSystem().exists(outDir)) ; assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks()); assertTrue(job.waitForCompletion(true)); }
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); Path inputDir = writeInputFile(conf); conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = Job.getInstance(conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs")); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
@Override protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol) throws ClassNotFoundException, IOException { if (options.getOdpsTable() != null) { Configuration conf = job.getConfiguration(); setInputFormatClass(OdpsExportInputFormat.class); conf.set(OdpsConstants.TABLE_NAME, options.getOdpsTable()); conf.set(OdpsConstants.ACCESS_ID, options.getOdpsAccessID()); conf.set(OdpsConstants.ACCESS_KEY, options.getOdpsAccessKey()); conf.set(OdpsConstants.ENDPOINT, options.getOdpsEndPoint()); conf.set(OdpsConstants.PROJECT, options.getOdpsProject()); String partitionSpec = options.getOdpsPartitionSpec(); if (partitionSpec != null) { conf.set(OdpsConstants.PARTITION_SPEC, partitionSpec); } setMapperClass(OdpsExportMapper.class); } super.configureInputFormat(job, tableName, tableClassName, splitByCol); if (!isHCatJob && options.getOdpsTable() == null) { FileInputFormat.addInputPath(job, getInputPath()); } }
@Test public void testListAttemptIdsWithInvalidInputs() throws Exception { JobID jobId = JobID.forName(jobIdStr); Cluster mockCluster = mock(Cluster.class); Job job = mock(Job.class); CLI cli = spy(new CLI()); doReturn(mockCluster).when(cli).createCluster(); when(mockCluster.getJob(jobId)).thenReturn(job); int retCode_JOB_SETUP = cli.run(new String[] { "-list-attempt-ids", jobIdStr, "JOB_SETUP", "running" }); int retCode_JOB_CLEANUP = cli.run(new String[] { "-list-attempt-ids", jobIdStr, "JOB_CLEANUP", "running" }); int retCode_invalidTaskState = cli.run(new String[] { "-list-attempt-ids", jobIdStr, "REDUCE", "complete" }); assertEquals("JOB_SETUP is an invalid input,exit code should be -1", -1, retCode_JOB_SETUP); assertEquals("JOB_CLEANUP is an invalid input,exit code should be -1", -1, retCode_JOB_CLEANUP); assertEquals("complete is an invalid input,exit code should be -1", -1, retCode_invalidTaskState); }
@Test public void testNewCounterC() throws Exception { final Job job = createJob(); final Configuration conf = job.getConfiguration(); conf.setInt(JobContext.IO_SORT_FACTOR, 3); createWordsFile(inFiles[3], conf); createWordsFile(inFiles[4], conf); long inputSize = 0; inputSize += getFileSize(inFiles[0]); inputSize += getFileSize(inFiles[1]); inputSize += getFileSize(inFiles[2]); inputSize += getFileSize(inFiles[3]); inputSize += getFileSize(inFiles[4]); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths( job, IN_DIR); org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath( job, new Path(OUT_DIR, "outputN2")); assertTrue(job.waitForCompletion(true)); final Counters c1 = Counters.downgrade(job.getCounters()); validateCounters(c1, 122880, 25600, 102400); validateFileCounters(c1, inputSize, 0, 0, 0); }
/** Note that the "orderBy" column is called the "splitBy" in this version. * We reuse the same field, but it's not strictly ordering it -- just partitioning * the results. */ public static void setInput(Job job, Class<? extends DBWritable> inputClass, String tableName,String conditions, String splitBy, String... fieldNames) { DBInputFormat.setInput(job, inputClass, tableName, conditions, splitBy, fieldNames); job.setInputFormatClass(DataDrivenDBInputFormat.class); }
public static JobControl createValueAggregatorJobs(String args[], Class<? extends ValueAggregatorDescriptor>[] descriptors) throws IOException { JobControl theControl = new JobControl("ValueAggregatorJobs"); ArrayList<ControlledJob> dependingJobs = new ArrayList<ControlledJob>(); Configuration conf = new Configuration(); if (descriptors != null) { conf = setAggregatorDescriptors(descriptors); } Job job = createValueAggregatorJob(conf, args); ControlledJob cjob = new ControlledJob(job, dependingJobs); theControl.addJob(cjob); return theControl; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysort <in> <out>"); System.exit(2); } Job job = Job.getInstance(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void total(String name, String in, String out) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); conf.set(QUERIED_NAME, name); Job job = Job.getInstance(new Cluster(conf), conf); job.setJarByClass(Total.class); // in if (!in.endsWith("/")) in = in.concat("/"); in = in.concat("employees"); SequenceFileInputFormat.addInputPath(job, new Path(in)); job.setInputFormatClass(SequenceFileInputFormat.class); // map job.setMapperClass(TotalMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DoubleWritable.class); // reduce job.setCombinerClass(TotalReducer.class); job.setReducerClass(TotalReducer.class); // out SequenceFileOutputFormat.setOutputPath(job, new Path(out)); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.waitForCompletion(true); }
public static void run(Configuration conf, Path[] inputPath, Path outputPath) throws IOException, ClassNotFoundException, InterruptedException { String jobName = "init matrix"; Job job = new Job(conf, jobName); job.setMapOutputKeyClass(twoDimensionIndexWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(twoDimensionIndexWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(InitMapper.class); job.setReducerClass(InitReducer.class); job.setNumReduceTasks(1); for(Path path : inputPath) { FileInputFormat.addInputPath(job, path); } Path output = new Path(outputPath, "initDir"); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(LDADriver.class); if (!job.waitForCompletion(true)) { throw new InterruptedException("Init failed"); } }
@Override public int run(String[] args) throws Exception { String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs(); if (otherArgs.length < 2) { usage("Wrong number of arguments: " + otherArgs.length); System.exit(-1); } Job job = createSubmittableJob(otherArgs); return job.waitForCompletion(true) ? 0 : 1; }
/** * Tests a MR scan using specific start and stop rows. * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ protected void testScan(String start, String stop, String last) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") + "To" + (stop != null ? stop.toUpperCase() : "Empty"); LOG.info("Before map/reduce startup - job " + jobName); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILY); if (start != null) { scan.setStartRow(Bytes.toBytes(start)); } c.set(KEY_STARTROW, start != null ? start : ""); if (stop != null) { scan.setStopRow(Bytes.toBytes(stop)); } c.set(KEY_LASTROW, last != null ? last : ""); LOG.info("scan before: " + scan); Job job = new Job(c, jobName); TableMapReduceUtil.initTableMapperJob( Bytes.toString(TABLE_NAME), scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); job.setReducerClass(ScanReducer.class); job.setNumReduceTasks(1); // one to get final "first" and "last" key FileOutputFormat.setOutputPath(job, new Path(TEST_UTIL.getDataTestDir(), job.getJobName())); LOG.info("Started " + job.getJobName()); assertTrue(job.waitForCompletion(true)); LOG.info("After map/reduce completion - job " + jobName); }
/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table The table name to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(TableName table, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job) throws IOException { initTableMapperJob(table.getNameAsString(), scan, mapper, outputKeyClass, outputValueClass, job, true); }
/** * 初始化scan集合 * * @param job * @return */ private List<Scan> initScans(Job job) { Configuration conf = job.getConfiguration(); // 获取运行时间: yyyy-MM-dd String date = conf.get(GlobalConstants.RUNNING_DATE_PARAMES); long startDate = TimeUtil.parseString2Long(date); long endDate = startDate + GlobalConstants.DAY_OF_MILLISECONDS; Scan scan = new Scan(); // 定义hbase扫描的开始rowkey和结束rowkey scan.setStartRow(Bytes.toBytes("" + startDate)); scan.setStopRow(Bytes.toBytes("" + endDate)); FilterList filterList = new FilterList(); // 定义mapper中需要获取的列名 String[] columns = new String[] { EventLogConstants.LOG_COLUMN_NAME_UUID, // 用户id EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME, // 服务器时间 EventLogConstants.LOG_COLUMN_NAME_PLATFORM, // 平台名称 EventLogConstants.LOG_COLUMN_NAME_BROWSER_NAME, // 浏览器名称 EventLogConstants.LOG_COLUMN_NAME_BROWSER_VERSION // 浏览器版本号 }; filterList.addFilter(this.getColumnFilter(columns)); scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(EventLogConstants.HBASE_NAME_EVENT_LOGS)); scan.setFilter(filterList); return Lists.newArrayList(scan); }
private void setupBinaryTokenFile(Job job) { // Credentials in the job will not have delegation tokens // because security is disabled. Fetch delegation tokens // and store in binary token file. createBinaryTokenFile(job.getConfiguration()); job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY, binaryTokenFileName.toString()); // NB: the MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY // key now gets deleted from config, // so it's not accessible in the job's config. So, // we use another key to pass the file name into the job configuration: job.getConfiguration().set(KEY_SECURITY_TOKEN_FILE_NAME, binaryTokenFileName.toString()); }
@Test public void testListAttemptIdsWithValidInput() throws Exception { JobID jobId = JobID.forName(jobIdStr); Cluster mockCluster = mock(Cluster.class); Job job = mock(Job.class); CLI cli = spy(new CLI()); doReturn(mockCluster).when(cli).createCluster(); when(job.getTaskReports(TaskType.MAP)).thenReturn( getTaskReports(jobId, TaskType.MAP)); when(job.getTaskReports(TaskType.REDUCE)).thenReturn( getTaskReports(jobId, TaskType.REDUCE)); when(mockCluster.getJob(jobId)).thenReturn(job); int retCode_MAP = cli.run(new String[] { "-list-attempt-ids", jobIdStr, "MAP", "running" }); // testing case insensitive behavior int retCode_map = cli.run(new String[] { "-list-attempt-ids", jobIdStr, "map", "running" }); int retCode_REDUCE = cli.run(new String[] { "-list-attempt-ids", jobIdStr, "REDUCE", "running" }); int retCode_completed = cli.run(new String[] { "-list-attempt-ids", jobIdStr, "REDUCE", "completed" }); assertEquals("MAP is a valid input,exit code should be 0", 0, retCode_MAP); assertEquals("map is a valid input,exit code should be 0", 0, retCode_map); assertEquals("REDUCE is a valid input,exit code should be 0", 0, retCode_REDUCE); assertEquals( "REDUCE and completed are a valid inputs to -list-attempt-ids,exit code should be 0", 0, retCode_completed); verify(job, times(2)).getTaskReports(TaskType.MAP); verify(job, times(2)).getTaskReports(TaskType.REDUCE); }
public int run(String[] args) throws Exception { if(args.length < 2) { printUsage(); return 2; } Job job = Job.getInstance(getConf()); job.setJobName("MultiFileWordCount"); job.setJarByClass(MultiFileWordCount.class); //set the InputFormat of the job to our InputFormat job.setInputFormatClass(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); //use the defined mapper job.setMapperClass(MapClass.class); //use the WordCount Reducer job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
/** * Run small MR job. */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testWritingPEData() throws Exception { Configuration conf = util.getConfiguration(); Path testDir = util.getDataTestDirOnTestFS("testWritingPEData"); FileSystem fs = testDir.getFileSystem(conf); // Set down this value or we OOME in eclipse. conf.setInt("mapreduce.task.io.sort.mb", 20); // Write a few files. conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024); Job job = new Job(conf, "testWritingPEData"); setupRandomGeneratorMapper(job); // This partitioner doesn't work well for number keys but using it anyways // just to demonstrate how to configure it. byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT]; byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT]; Arrays.fill(startKey, (byte)0); Arrays.fill(endKey, (byte)0xff); job.setPartitionerClass(SimpleTotalOrderPartitioner.class); // Set start and end rows for partitioner. SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey); SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey); job.setReducerClass(KeyValueSortReducer.class); job.setOutputFormatClass(HFileOutputFormat2.class); job.setNumReduceTasks(4); job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); FileOutputFormat.setOutputPath(job, testDir); assertTrue(job.waitForCompletion(false)); FileStatus [] files = fs.listStatus(testDir); assertTrue(files.length > 0); }
/** * Create and submit the mapreduce job. * @return The mapreduce job object that has been submitted */ public Job createAndSubmitJob() throws Exception { assert inputOptions != null; assert getConf() != null; Job job = null; try { synchronized(this) { //Don't cleanup while we are setting up. metaFolder = createMetaFolderPath(); jobFS = metaFolder.getFileSystem(getConf()); job = createJob(); } if (inputOptions.shouldUseDiff()) { if (!DistCpSync.sync(inputOptions, getConf())) { inputOptions.disableUsingDiff(); } } createInputFileListing(job); job.submit(); submitted = true; } finally { if (!submitted) { cleanup(); } } String jobID = job.getJobID().toString(); job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID); LOG.info("DistCp job-id: " + jobID); return job; }
public int run(String[] args) throws Exception { if(args.length < 1) { System.err.println("FailJob " + " (-failMappers|-failReducers)"); ToolRunner.printGenericCommandUsage(System.err); return 2; } boolean failMappers = false, failReducers = false; for (int i = 0; i < args.length; i++ ) { if (args[i].equals("-failMappers")) { failMappers = true; } else if(args[i].equals("-failReducers")) { failReducers = true; } } if (!(failMappers ^ failReducers)) { System.err.println("Exactly one of -failMappers or -failReducers must be specified."); return 3; } // Write a file with one line per mapper. final FileSystem fs = FileSystem.get(getConf()); Path inputDir = new Path(FailJob.class.getSimpleName() + "_in"); fs.mkdirs(inputDir); for (int i = 0; i < getConf().getInt("mapred.map.tasks", 1); ++i) { BufferedWriter w = new BufferedWriter(new OutputStreamWriter( fs.create(new Path(inputDir, Integer.toString(i))))); w.write(Integer.toString(i) + "\n"); w.close(); } Job job = createJob(failMappers, failReducers, inputDir); return job.waitForCompletion(true) ? 0 : 1; }
/** * Use this before submitting a Multi TableMap job. It will appropriately set * up the job. * * @param scans The list of {@link Scan} objects to read from. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is carrying * all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the * configured job classes via the distributed cache (tmpjars). * @param initCredentials whether to initialize hbase auth credentials for the job * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(List<Scan> scans, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job, boolean addDependencyJars, boolean initCredentials) throws IOException { job.setInputFormatClass(MultiTableInputFormat.class); if (outputValueClass != null) { job.setMapOutputValueClass(outputValueClass); } if (outputKeyClass != null) { job.setMapOutputKeyClass(outputKeyClass); } job.setMapperClass(mapper); Configuration conf = job.getConfiguration(); HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf)); List<String> scanStrings = new ArrayList<String>(); for (Scan scan : scans) { scanStrings.add(convertScanToString(scan)); } job.getConfiguration().setStrings(MultiTableInputFormat.SCANS, scanStrings.toArray(new String[scanStrings.size()])); if (addDependencyJars) { addDependencyJars(job); } if (initCredentials) { initCredentials(job); } }
@Override public Job createSubmittableJob(String[] args) throws IOException { Job job = super.createSubmittableJob(args); // Call my class instead. job.setJarByClass(WALMapperSearcher.class); job.setMapperClass(WALMapperSearcher.class); job.setOutputFormatClass(NullOutputFormat.class); return job; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(UnitSum.class); job.setMapperClass(PassMapper.class); job.setReducerClass(SumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }
@Test public void testInitTableMapperJob1() throws Exception { Configuration configuration = new Configuration(); Job job = new Job(configuration, "tableName"); // test TableMapReduceUtil.initTableMapperJob("Table", new Scan(), Import.Importer.class, Text.class, Text.class, job, false, WALInputFormat.class); assertEquals(WALInputFormat.class, job.getInputFormatClass()); assertEquals(Import.Importer.class, job.getMapperClass()); assertEquals(LongWritable.class, job.getOutputKeyClass()); assertEquals(Text.class, job.getOutputValueClass()); assertNull(job.getCombinerClass()); assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE)); }
private DatasetDescriptor getDatasetDescriptorFromParquetFile(Job job, FileSystem fs, String uri) throws IOException { ArrayList<FileStatus> files = new ArrayList<FileStatus>(); FileStatus[] dirs; dirs = fs.globStatus(fs.makeQualified(getInputPath())); for (int i = 0; (dirs != null && i < dirs.length); i++) { files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath(), HIDDEN_FILES_PATH_FILTER))); // We only check one file, so exit the loop when we have at least // one. if (files.size() > 0) { break; } } ParquetMetadata parquetMetadata; try { parquetMetadata = ParquetFileReader.readFooter(job.getConfiguration(), fs.makeQualified(files.get(0).getPath())); } catch (IOException e) { LOG.error("Wrong file format. Please check the export file's format.", e); throw e; } MessageType schema = parquetMetadata.getFileMetaData().getSchema(); Schema avroSchema = new AvroSchemaConverter().convert(schema); DatasetDescriptor descriptor = new DatasetDescriptor.Builder().schema(avroSchema).format(Formats.PARQUET) .compressionType(ParquetJob.getCompressionType(job.getConfiguration())).build(); return descriptor; }
@Override public Job newJob(Configuration conf) throws IOException { // Implementing: // return new Job(conf); Constructor<Job> c; try { c = Job.class.getConstructor(Configuration.class); return c.newInstance(conf); } catch (Exception e) { throw new IllegalStateException( "Failed to instantiate new Job(conf)", e); } }
public static void main(String[] args) { if (args.length != 2) { System.err.println("Usage: MaxTemperature <input path> <output path>"); System.exit(-1); } String temperatureInput = args[0]; String temperatureOutput = args[1]; try { Configuration configuration = new Configuration(); Job job = Job.getInstance(configuration); job.setJarByClass(MaxTemperatureMapReduce.class); job.setJobName("MaxTemperature"); FileInputFormat.addInputPath(job, new Path(temperatureInput)); FileOutputFormat.setOutputPath(job, new Path(temperatureOutput)); job.setMapperClass(MaxTemperatureMapper.class); job.setReducerClass(MaxTemperatureReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.waitForCompletion(true); } catch (IOException | InterruptedException | ClassNotFoundException e) { e.printStackTrace(); } }