@SuppressWarnings("deprecation") @Override public void initializeMemberVariables() { xmlFilename = new String("mapred-default.xml"); configurationClasses = new Class[] { MRJobConfig.class, MRConfig.class, JHAdminConfig.class, ShuffleHandler.class, FileOutputFormat.class, FileInputFormat.class, Job.class, NLineInputFormat.class, JobConf.class, FileOutputCommitter.class }; // Initialize used variables configurationPropsToSkipCompare = new HashSet<String>(); // Set error modes errorIfMissingConfigProps = true; errorIfMissingXmlProps = false; // Ignore deprecated MR1 properties in JobConf configurationPropsToSkipCompare .add(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY); configurationPropsToSkipCompare .add(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY); }
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter( TaskAttemptContext context) throws IOException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputDir.getFileSystem(conf); int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384); // Default to snappy. Compression.Algorithm compressionAlgorithm = getAlgorithm( conf.get(Constants.HFILE_COMPRESSION)); final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize) .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId())) .withCompression(compressionAlgorithm) .build(); return new HFileRecordWriter(writer); }
/** * Creates a new {@link RecordWriter} to output temporary data. * @param <V> value type * @param context current context * @param name output name * @param dataType value type * @return the created writer * @throws IOException if failed to create a new {@link RecordWriter} * @throws InterruptedException if interrupted */ public <V> RecordWriter<NullWritable, V> createRecordWriter( TaskAttemptContext context, String name, Class<V> dataType) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); Path file = new Path( committer.getWorkPath(), FileOutputFormat.getUniqueFile(context, name, "")); //$NON-NLS-1$ ModelOutput<V> out = TemporaryStorage.openOutput(conf, dataType, file); return new RecordWriter<NullWritable, V>() { @Override public void write(NullWritable key, V value) throws IOException { out.write(value); } @Override public void close(TaskAttemptContext ignored) throws IOException { out.close(); } @Override public String toString() { return String.format("TemporaryOutput(%s)", file); //$NON-NLS-1$ } }; }
@Override public void open(String uId) throws Exception { this.hash = uId.hashCode(); Job job = ((ConfigurableHDFSFileSink<K, V>) getWriteOperation().getSink()).jobInstance(); FileOutputFormat.setOutputPath(job, new Path(path)); // Each Writer is responsible for writing one bundle of elements and is represented by one // unique Hadoop task based on uId/hash. All tasks share the same job ID. Since Dataflow // handles retrying of failed bundles, each task has one attempt only. JobID jobId = job.getJobID(); TaskID taskId = new TaskID(jobId, TaskType.REDUCE, hash); configure(job); context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(taskId, 0)); FileOutputFormat<K, V> outputFormat = formatClass.newInstance(); recordWriter = outputFormat.getRecordWriter(context); outputCommitter = (FileOutputCommitter) outputFormat.getOutputCommitter(context); }
/** * Queries the file system for the URIs of all files in the base output directory that are not * directories and whose name isn't {@link FileOutputCommitter#SUCCEEDED_FILE_NAME}. * * @return a list of all URIs in the form of strings. * @throws IOException if unable to query for the files in the base output directory. */ protected List<String> getOutputFileURIs() throws IOException { // Enumerate over all files in the output path. FileStatus[] outputFiles = outputFileSystem.listStatus(outputPath); ArrayList<String> sourceUris = new ArrayList<String>(outputFiles.length); for (int i = 0; i < outputFiles.length; i++) { FileStatus fileStatus = outputFiles[i]; // Skip the success file and directories as they're not relevant to BigQuery. if (!fileStatus.isDir() && !fileStatus.getPath().getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) { sourceUris.add(fileStatus.getPath().toString()); } } return sourceUris; }
@Override public void init() throws IOException { super.init(); Configuration taskConf = new Configuration(); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); taskConf.set(FileOutputFormat.OUTDIR, stagingResultDir.toString()); ExecutionBlockId ebId = taskAttemptId.getTaskId().getExecutionBlockId(); writerContext = new TaskAttemptContextImpl(taskConf, new TaskAttemptID(ebId.getQueryId().toString(), ebId.getId(), TaskType.MAP, taskAttemptId.getTaskId().getId(), taskAttemptId.getId())); HFileOutputFormat2 hFileOutputFormat2 = new HFileOutputFormat2(); try { writer = hFileOutputFormat2.getRecordWriter(writerContext); committer = new FileOutputCommitter(FileOutputFormat.getOutputPath(writerContext), writerContext); workingFilePath = committer.getWorkPath(); } catch (InterruptedException e) { throw new IOException(e.getMessage(), e); } LOG.info("Created hbase file writer: " + workingFilePath); }
private static void populateMRSettingsToRetain() { // FileInputFormat mrSettingsToRetain.add(FileInputFormat.INPUT_DIR); mrSettingsToRetain.add(FileInputFormat.SPLIT_MAXSIZE); mrSettingsToRetain.add(FileInputFormat.SPLIT_MINSIZE); mrSettingsToRetain.add(FileInputFormat.PATHFILTER_CLASS); mrSettingsToRetain.add(FileInputFormat.NUM_INPUT_FILES); mrSettingsToRetain.add(FileInputFormat.INPUT_DIR_RECURSIVE); // FileOutputFormat mrSettingsToRetain.add(MRConfiguration.OUTPUT_BASENAME); mrSettingsToRetain.add(FileOutputFormat.COMPRESS); mrSettingsToRetain.add(FileOutputFormat.COMPRESS_CODEC); mrSettingsToRetain.add(FileOutputFormat.COMPRESS_TYPE); mrSettingsToRetain.add(FileOutputFormat.OUTDIR); mrSettingsToRetain.add(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER); }
@Test(timeout = 5000) public void testNewAPI_TextOutputFormat() throws Exception { String outputPath = "/tmp/output"; Configuration conf = new Configuration(); conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true); DataSinkDescriptor dataSink = MROutput .createConfigBuilder(conf, TextOutputFormat.class, outputPath) .build(); OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload()); MROutput output = new MROutput(outputContext, 2); output.initialize(); assertEquals(true, output.isMapperOutput); assertEquals(true, output.useNewApi); assertEquals(TextOutputFormat.class, output.newOutputFormat.getClass()); assertNull(output.oldOutputFormat); assertNotNull(output.newApiTaskAttemptContext); assertNull(output.oldApiTaskAttemptContext); assertNotNull(output.newRecordWriter); assertNull(output.oldRecordWriter); assertEquals(FileOutputCommitter.class, output.committer.getClass()); }
@Test(timeout = 5000) public void testOldAPI_TextOutputFormat() throws Exception { String outputPath = "/tmp/output"; Configuration conf = new Configuration(); conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false); DataSinkDescriptor dataSink = MROutput .createConfigBuilder(conf, org.apache.hadoop.mapred.TextOutputFormat.class, outputPath) .build(); OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload()); MROutput output = new MROutput(outputContext, 2); output.initialize(); assertEquals(false, output.isMapperOutput); assertEquals(false, output.useNewApi); assertEquals(org.apache.hadoop.mapred.TextOutputFormat.class, output.oldOutputFormat.getClass()); assertNull(output.newOutputFormat); assertNotNull(output.oldApiTaskAttemptContext); assertNull(output.newApiTaskAttemptContext); assertNotNull(output.oldRecordWriter); assertNull(output.newRecordWriter); assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass()); }
@Test(timeout = 5000) public void testNewAPI_SequenceFileOutputFormat() throws Exception { String outputPath = "/tmp/output"; JobConf conf = new JobConf(); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); DataSinkDescriptor dataSink = MROutput .createConfigBuilder(conf, SequenceFileOutputFormat.class, outputPath) .build(); OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload()); MROutput output = new MROutput(outputContext, 2); output.initialize(); assertEquals(true, output.useNewApi); assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass()); assertNull(output.oldOutputFormat); assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass()); assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass()); assertNull(output.oldApiTaskAttemptContext); assertNotNull(output.newRecordWriter); assertNull(output.oldRecordWriter); assertEquals(FileOutputCommitter.class, output.committer.getClass()); }
@Test(timeout = 5000) public void testOldAPI_SequenceFileOutputFormat() throws Exception { String outputPath = "/tmp/output"; JobConf conf = new JobConf(); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); DataSinkDescriptor dataSink = MROutput .createConfigBuilder(conf, org.apache.hadoop.mapred.SequenceFileOutputFormat.class, outputPath) .build(); OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload()); MROutput output = new MROutput(outputContext, 2); output.initialize(); assertEquals(false, output.useNewApi); assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass()); assertNull(output.newOutputFormat); assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass()); assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass()); assertNull(output.newApiTaskAttemptContext); assertNotNull(output.oldRecordWriter); assertNull(output.newRecordWriter); assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass()); }
@Test(timeout = 5000) public void testNewAPI_WorkOutputPathOutputFormat() throws Exception { String outputPath = "/tmp/output"; Configuration conf = new Configuration(); conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true); DataSinkDescriptor dataSink = MROutput .createConfigBuilder(conf, NewAPI_WorkOutputPathReadingOutputFormat.class, outputPath) .build(); OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload()); MROutput output = new MROutput(outputContext, 2); output.initialize(); assertEquals(true, output.isMapperOutput); assertEquals(true, output.useNewApi); assertEquals(NewAPI_WorkOutputPathReadingOutputFormat.class, output.newOutputFormat.getClass()); assertNull(output.oldOutputFormat); assertNotNull(output.newApiTaskAttemptContext); assertNull(output.oldApiTaskAttemptContext); assertNotNull(output.newRecordWriter); assertNull(output.oldRecordWriter); assertEquals(FileOutputCommitter.class, output.committer.getClass()); }
@Test(timeout = 5000) public void testOldAPI_WorkOutputPathOutputFormat() throws Exception { String outputPath = "/tmp/output"; Configuration conf = new Configuration(); conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false); DataSinkDescriptor dataSink = MROutput .createConfigBuilder(conf, OldAPI_WorkOutputPathReadingOutputFormat.class, outputPath) .build(); OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload()); MROutput output = new MROutput(outputContext, 2); output.initialize(); assertEquals(false, output.isMapperOutput); assertEquals(false, output.useNewApi); assertEquals(OldAPI_WorkOutputPathReadingOutputFormat.class, output.oldOutputFormat.getClass()); assertNull(output.newOutputFormat); assertNotNull(output.oldApiTaskAttemptContext); assertNull(output.newApiTaskAttemptContext); assertNotNull(output.oldRecordWriter); assertNull(output.newRecordWriter); assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass()); }
@Test (timeout = 60000) public void testRandomWriter() throws IOException, InterruptedException, ClassNotFoundException { LOG.info("\n\n\nStarting testRandomWriter()."); if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test."); return; } RandomTextWriterJob randomWriterJob = new RandomTextWriterJob(); mrCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072"); mrCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024"); Job job = randomWriterJob.createJob(mrCluster.getConfig()); Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output"); FileOutputFormat.setOutputPath(job, outputDir); job.setSpeculativeExecution(false); job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. job.setJarByClass(RandomTextWriterJob.class); job.setMaxMapAttempts(1); // speed up failures job.submit(); String trackingUrl = job.getTrackingURL(); String jobId = job.getJobID().toString(); boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState()); Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId , trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/")); // Make sure there are three files in the output-dir RemoteIterator<FileStatus> iterator = FileContext.getFileContext(mrCluster.getConfig()).listStatus( outputDir); int count = 0; while (iterator.hasNext()) { FileStatus file = iterator.next(); if (!file.getPath().getName() .equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) { count++; } } Assert.assertEquals("Number of part files is wrong!", 3, count); verifyRandomWriterCounters(job); // TODO later: add explicit "isUber()" checks of some sort }
@Test public void testMr() throws InterruptedException, IOException, ClassNotFoundException { String dirInput = "/tmp/wordcount/input"; String dirOutput = "/tmp/wordcount/output"; String fileInput = new Path(dirInput, "file1.txt").toString(); BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(getDfsServer().getFileSystem().create(getDfsServer().getPath(fileInput)))); writer.write("a a a a a\n"); writer.write("b b\n"); writer.close(); Job job = Job.getInstance(getMrServer().getConf()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MapClass.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); FileInputFormat.setInputPaths(job, getDfsServer().getPathUri(dirInput)); FileOutputFormat.setOutputPath(job, new Path(getDfsServer().getPathUri(dirOutput))); assertTrue(job.waitForCompletion(true)); Path[] outputFiles = FileUtil.stat2Paths(getDfsServer().getFileSystem().listStatus(getDfsServer().getPath(dirOutput), path -> !path .getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME))); assertEquals(1, outputFiles.length); InputStream in = getDfsServer().getFileSystem().open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); assertEquals("a\t5", reader.readLine()); assertEquals("b\t2", reader.readLine()); assertNull(reader.readLine()); reader.close(); }
@Override public void cleanup(Context context) throws IOException, InterruptedException { for (String partition : partitions) { FileSystem.get(context.getConfiguration()) .delete(new Path(context.getConfiguration().get(FileOutputFormat.OUTDIR) + Path.SEPARATOR_CHAR + partition, FileOutputCommitter.SUCCEEDED_FILE_NAME), false); } partitions.clear(); multipleOutputsAvro.close(); }
@Override public void initializeMemberVariables() { xmlFilename = new String("mapred-default.xml"); configurationClasses = new Class[] { MRJobConfig.class, MRConfig.class, JHAdminConfig.class, ShuffleHandler.class, FileOutputFormat.class, FileInputFormat.class, Job.class, NLineInputFormat.class, JobConf.class, FileOutputCommitter.class }; // Initialize used variables configurationPropsToSkipCompare = new HashSet<String>(); xmlPropsToSkipCompare = new HashSet<String>(); // Set error modes errorIfMissingConfigProps = true; errorIfMissingXmlProps = false; // Ignore deprecated MR1 properties in JobConf configurationPropsToSkipCompare .add(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY); configurationPropsToSkipCompare .add(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY); // Obsolete entries listed in MAPREDUCE-6057 were removed from trunk // but not removed from branch-2. xmlPropsToSkipCompare.add("map.sort.class"); xmlPropsToSkipCompare.add("mapreduce.local.clientfactory.class.name"); xmlPropsToSkipCompare.add("mapreduce.jobtracker.system.dir"); xmlPropsToSkipCompare.add("mapreduce.jobtracker.staging.root.dir"); }
@Override public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException { FileOutputCommitter committer = (FileOutputCommitter) super.getOutputCommitter(context); return new Path(committer.getWorkPath(), getUniqueFile(context, "part", extension)); }
@Test (timeout = 5000) public void testOldAPI_MR() throws Exception { String outputPath = "/tmp/output"; JobConf conf = new JobConf(); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath)); // the output is attached to reducer conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false); UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf); OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()) .setUserPayload(vertexPayload); DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null); OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload()); MROutputLegacy output = new MROutputLegacy(outputContext, 2); output.initialize(); assertEquals(false, output.useNewApi); assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass()); assertNull(output.newOutputFormat); assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass()); assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass()); assertNull(output.newApiTaskAttemptContext); assertNotNull(output.oldRecordWriter); assertNull(output.newRecordWriter); assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass()); }
@Test (timeout = 5000) public void testNewAPI_MR() throws Exception { String outputPath = "/tmp/output"; Job job = Job.getInstance(); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath)); job.getConfiguration().setBoolean("mapred.reducer.new-api", true); // the output is attached to reducer job.getConfiguration().setBoolean(MRConfig.IS_MAP_PROCESSOR, false); UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(job.getConfiguration()); OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()) .setUserPayload(vertexPayload); DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null); OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload()); MROutputLegacy output = new MROutputLegacy(outputContext, 2); output.initialize(); assertEquals(true, output.useNewApi); assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass()); assertNull(output.oldOutputFormat); assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass()); assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass()); assertNull(output.oldApiTaskAttemptContext); assertNotNull(output.newRecordWriter); assertNull(output.oldRecordWriter); assertEquals(FileOutputCommitter.class, output.committer.getClass()); }
@Test (timeout = 5000) public void testOldAPI_MapperOnly() throws Exception { String outputPath = "/tmp/output"; JobConf conf = new JobConf(); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath)); // the output is attached to mapper conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true); UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf); OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()) .setUserPayload(vertexPayload); DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null); OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload()); MROutputLegacy output = new MROutputLegacy(outputContext, 2); output.initialize(); assertEquals(false, output.useNewApi); assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass()); assertNull(output.newOutputFormat); assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass()); assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass()); assertNull(output.newApiTaskAttemptContext); assertNotNull(output.oldRecordWriter); assertNull(output.newRecordWriter); assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass()); }
@Test (timeout = 5000) public void testNewAPI_MapperOnly() throws Exception { String outputPath = "/tmp/output"; Job job = Job.getInstance(); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath)); job.getConfiguration().setBoolean("mapred.mapper.new-api", true); // the output is attached to mapper job.getConfiguration().setBoolean(MRConfig.IS_MAP_PROCESSOR, true); UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(job.getConfiguration()); OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()) .setUserPayload(vertexPayload); DataSinkDescriptor sink = DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null); OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload()); MROutputLegacy output = new MROutputLegacy(outputContext, 2); output.initialize(); assertEquals(true, output.useNewApi); assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass()); assertNull(output.oldOutputFormat); assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass()); assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass()); assertNull(output.oldApiTaskAttemptContext); assertNotNull(output.newRecordWriter); assertNull(output.oldRecordWriter); assertEquals(FileOutputCommitter.class, output.committer.getClass()); }
@Override protected boolean shouldCopy(Path path, DistCpOptions options) { if (path.getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) { return false; } return regex == null || regex.matcher(path.toString()).find(); }
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { return new FileOutputCommitter(FileOutputFormat.getOutputPath(context), context); }
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { return new FileOutputCommitter(FileOutputFormat.getOutputPath(context), context); }
@Before public void setup() throws Exception { Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.DEBUG); Logger.getLogger(QueryLogic.class).setLevel(Level.DEBUG); Logger.getLogger(RangeCalculator.class).setLevel(Level.DEBUG); conf.set(AggregatingRecordReader.START_TOKEN, "<page>"); conf.set(AggregatingRecordReader.END_TOKEN, "</page>"); conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME); conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1"); conf.set(WikipediaConfiguration.NUM_GROUPS, "1"); MockInstance i = new MockInstance(); c = i.getConnector("root", new PasswordToken("")); WikipediaIngester.createTables(c.tableOperations(), TABLE_NAME, false); for (String table : TABLE_NAMES) { writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1)); } TaskAttemptID id = new TaskAttemptID(); TaskAttemptContext context = new TaskAttemptContext(conf, id); RawLocalFileSystem fs = new RawLocalFileSystem(); fs.setConf(conf); URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml"); Assert.assertNotNull(url); File data = new File(url.toURI()); Path tmpFile = new Path(data.getAbsolutePath()); // Setup the Mapper WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null), 0); AggregatingRecordReader rr = new AggregatingRecordReader(); Path ocPath = new Path(tmpFile, "oc"); OutputCommitter oc = new FileOutputCommitter(ocPath, context); fs.deleteOnExit(ocPath); StandaloneStatusReporter sr = new StandaloneStatusReporter(); rr.initialize(split, context); MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter(); WikipediaMapper mapper = new WikipediaMapper(); // Load data into Mock Accumulo Mapper<LongWritable,Text,Text,Mutation>.Context con = mapper.new Context(conf, id, rr, rw, oc, sr, split); mapper.run(con); // Flush and close record writers. rw.close(context); table = new QueryLogic(); table.setMetadataTableName(METADATA_TABLE_NAME); table.setTableName(TABLE_NAME); table.setIndexTableName(INDEX_TABLE_NAME); table.setReverseIndexTableName(RINDEX_TABLE_NAME); table.setUseReadAheadIterator(false); table.setUnevaluatedFields(Collections.singletonList("TEXT")); }