/** * Adds a named output for the job. * <p/> * * @param job job to add the named output * @param namedOutput named output name, it has to be a word, letters * and numbers only, cannot be the word 'part' as * that is reserved for the default output. * @param outputFormatClass OutputFormat class. * @param keySchema Schema for the Key * @param valueSchema Schema for the Value (used in case of AvroKeyValueOutputFormat or null) */ @SuppressWarnings("rawtypes") public static void addNamedOutput(Job job, String namedOutput, Class<? extends OutputFormat> outputFormatClass, Schema keySchema, Schema valueSchema) { checkNamedOutputName(job, namedOutput, true); Configuration conf = job.getConfiguration(); conf.set(MULTIPLE_OUTPUTS, conf.get(MULTIPLE_OUTPUTS, "") + " " + namedOutput); conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class); conf.set(MO_PREFIX+namedOutput+".keyschema", keySchema.toString()); if(valueSchema!=null){ conf.set(MO_PREFIX+namedOutput+".valueschema",valueSchema.toString()); } }
@Override public void updateTable(ExportJobContext context) throws IOException, ExportException { logExportTableDetails(context); context.setConnManager(this); @SuppressWarnings("rawtypes") Class<? extends OutputFormat> oraOopOutputFormatClass; try { oraOopOutputFormatClass = OraOopOutputFormatUpdate.class; } catch (NoClassDefFoundError ex) { explainWhyExportClassCannotBeLoaded(ex, "OraOopOutputFormatUpdate"); throw ex; } JdbcUpdateExportJob exportJob = new JdbcUpdateExportJob(context, null, null, oraOopOutputFormatClass); exportJob.runExport(); }
@Override protected Class<? extends OutputFormat> getOutputFormatClass() throws ClassNotFoundException { if (isHCatJob) { LOG.debug("Returning HCatOutputFormat for output format"); return SqoopHCatUtilities.getOutputFormatClass(); } if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) { return RawKeyTextOutputFormat.class; } else if (options.getFileLayout() == SqoopOptions.FileLayout.SequenceFile) { return SequenceFileOutputFormat.class; } else if (options.getFileLayout() == SqoopOptions.FileLayout.AvroDataFile) { return AvroOutputFormat.class; } else if (options.getFileLayout() == SqoopOptions.FileLayout.ParquetFile) { return DatasetKeyOutputFormat.class; } return null; }
private void writeBadOutput(TaskAttempt attempt, Configuration conf) throws Exception { TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID())); TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat .getRecordWriter(tContext); NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key2, val2); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val2); theRecordWriter.write(nullWritable, val1); theRecordWriter.write(key1, nullWritable); theRecordWriter.write(key2, null); theRecordWriter.write(null, null); theRecordWriter.write(key1, val1); } finally { theRecordWriter.close(tContext); } OutputFormat outputFormat = ReflectionUtils.newInstance( tContext.getOutputFormatClass(), conf); OutputCommitter committer = outputFormat.getOutputCommitter(tContext); committer.commitTask(tContext); }
private void writeOutput(TaskAttempt attempt, Configuration conf) throws Exception { TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID())); TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat .getRecordWriter(tContext); NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(tContext); } OutputFormat outputFormat = ReflectionUtils.newInstance( tContext.getOutputFormatClass(), conf); OutputCommitter committer = outputFormat.getOutputCommitter(tContext); committer.commitTask(tContext); }
private void testSuccessfulJob(String filename, Class<? extends OutputFormat> output, String[] exclude) throws Exception { Path outDir = getNewOutputDir(); Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0); job.setOutputFormatClass(output); assertTrue("Job failed!", job.waitForCompletion(true)); Path testFile = new Path(outDir, filename); assertTrue("Done file missing for job " + job.getJobID(), fs.exists(testFile)); // check if the files from the missing set exists for (String ex : exclude) { Path file = new Path(outDir, ex); assertFalse("File " + file + " should not be present for successful job " + job.getJobID(), fs.exists(file)); } }
private void testFailedJob(String fileName, Class<? extends OutputFormat> output, String[] exclude) throws Exception { Path outDir = getNewOutputDir(); Job job = MapReduceTestUtil.createFailJob(conf, outDir, inDir); job.setOutputFormatClass(output); assertFalse("Job did not fail!", job.waitForCompletion(true)); if (fileName != null) { Path testFile = new Path(outDir, fileName); assertTrue("File " + testFile + " missing for failed job " + job.getJobID(), fs.exists(testFile)); } // check if the files from the missing set exists for (String ex : exclude) { Path file = new Path(outDir, ex); assertFalse("File " + file + " should not be present for failed job " + job.getJobID(), fs.exists(file)); } }
private org.apache.hadoop.mapreduce.OutputCommitter createOutputCommitter(boolean newApiCommitter, JobID jobId, Configuration conf) throws Exception { org.apache.hadoop.mapreduce.OutputCommitter committer = null; LOG.info("OutputCommitter set in config " + conf.get("mapred.output.committer.class")); if (newApiCommitter) { org.apache.hadoop.mapreduce.TaskID taskId = new org.apache.hadoop.mapreduce.TaskID(jobId, TaskType.MAP, 0); org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptID = new org.apache.hadoop.mapreduce.TaskAttemptID(taskId, 0); org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = new TaskAttemptContextImpl(conf, taskAttemptID); OutputFormat outputFormat = ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), conf); committer = outputFormat.getOutputCommitter(taskContext); } else { committer = ReflectionUtils.newInstance(conf.getClass( "mapred.output.committer.class", FileOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class), conf); } LOG.info("OutputCommitter is " + committer.getClass().getName()); return committer; }
MapRunner() throws IOException, InterruptedException, ClassNotFoundException { // initiate the real mapper that does the work mapper = ReflectionUtils.newInstance(mapClass, outer.getConfiguration()); @SuppressWarnings("unchecked") OutputFormat<K2, V2> outputFormat = (OutputFormat<K2, V2>) ReflectionUtils.newInstance(outer.getOutputFormatClass(), outer.getConfiguration()); try { // outputFormat is not initialized. Relying on everything it // needs can be obtained from the AssignmentManager singleton. writer = outputFormat.getRecordWriter(outer); subcontext = (Context)ReflectionUtil.createMapperContext( mapper, outer.getConfiguration(), outer.getTaskAttemptID(), new SubMapRecordReader(), writer, outer.getOutputCommitter(), new SubMapStatusReporter(), outer.getInputSplit()); } catch (Exception e) { throw new IOException("Error creating mapper context", e); } }
public LocalMapTask(InputFormat<INKEY, INVALUE> inputFormat, OutputFormat<OUTKEY, OUTVALUE> outputFormat, Configuration conf, int id, InputSplit split, ContentPumpReporter reporter, AtomicInteger pctProgress) { this.inputFormat = inputFormat; this.outputFormat = outputFormat; this.conf = conf; this.id = id; this.split = split; this.pctProgress = pctProgress; this.reporter = reporter; try { mapperClass = job.getMapperClass(); } catch (ClassNotFoundException e) { LOG.error("Mapper class not found", e); } }
@Test(enabled = true) public void testWriteBufferData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<DurableBuffer<?>> sess = new MneDurableOutputSession<DurableBuffer<?>>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<DurableBuffer<?>> mdvalue = new MneDurableOutputValue<DurableBuffer<?>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableBuffer<?>>>(); RecordWriter<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> writer = outputFormat.getRecordWriter(m_tacontext); DurableBuffer<?> dbuf = null; Checksum cs = new CRC32(); cs.reset(); for (int i = 0; i < m_reccnt; ++i) { dbuf = genupdDurableBuffer(sess, cs); Assert.assertNotNull(dbuf); writer.write(nada, mdvalue.of(dbuf)); } m_checksum = cs.getValue(); writer.close(m_tacontext); sess.close(); }
@Test(enabled = true) public void testWriteChunkData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<DurableChunk<?>> sess = new MneDurableOutputSession<DurableChunk<?>>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<DurableChunk<?>> mdvalue = new MneDurableOutputValue<DurableChunk<?>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<DurableChunk<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableChunk<?>>>(); RecordWriter<NullWritable, MneDurableOutputValue<DurableChunk<?>>> writer = outputFormat.getRecordWriter(m_tacontext); DurableChunk<?> dchunk = null; Checksum cs = new CRC32(); cs.reset(); for (int i = 0; i < m_reccnt; ++i) { dchunk = genupdDurableChunk(sess, cs); Assert.assertNotNull(dchunk); writer.write(nada, mdvalue.of(dchunk)); } m_checksum = cs.getValue(); writer.close(m_tacontext); sess.close(); }
@Test(enabled = true) public void testWritePersonData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<Person<Long>> sess = new MneDurableOutputSession<Person<Long>>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<Person<Long>> mdvalue = new MneDurableOutputValue<Person<Long>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<Person<Long>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<Person<Long>>>(); RecordWriter<NullWritable, MneDurableOutputValue<Person<Long>>> writer = outputFormat.getRecordWriter(m_tacontext); Person<Long> person = null; for (int i = 0; i < m_reccnt; ++i) { person = sess.newDurableObjectRecord(); person.setAge((short) m_rand.nextInt(50)); person.setName(String.format("Name: [%s]", Utils.genRandomString()), true); m_sumage += person.getAge(); writer.write(nada, mdvalue.of(person)); } writer.close(m_tacontext); sess.close(); }
@Test(enabled = true) public void testWriteLongData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<Long> sess = new MneDurableOutputSession<Long>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<Long> mdvalue = new MneDurableOutputValue<Long>(sess); OutputFormat<NullWritable, MneDurableOutputValue<Long>> outputFormat = new MneOutputFormat<MneDurableOutputValue<Long>>(); RecordWriter<NullWritable, MneDurableOutputValue<Long>> writer = outputFormat.getRecordWriter(m_tacontext); Long val = null; for (int i = 0; i < m_reccnt; ++i) { val = m_rand.nextLong(); m_sum += val; writer.write(nada, mdvalue.of(val)); } writer.close(m_tacontext); sess.close(); }
@Test public void testBasicMultiSave() throws Exception { JobConf conf = createJobConf(); conf.set(ConfigurationOptions.ES_RESOURCE, "oldapi/multi-save"); MultiOutputFormat.addOutputFormat(conf, EsOutputFormat.class); MultiOutputFormat.addOutputFormat(conf, PrintStreamOutputFormat.class); //MultiOutputFormat.addOutputFormat(conf, TextOutputFormat.class); PrintStreamOutputFormat.stream(conf, Stream.OUT); //conf.set("mapred.output.dir", "foo/bar"); //FileOutputFormat.setOutputPath(conf, new Path("foo/bar")); conf.setClass("mapred.output.format.class", MultiOutputFormat.class, OutputFormat.class); runJob(conf); }
private org.apache.hadoop.mapreduce.OutputCommitter createOutputCommitter(boolean newApiCommitter, JobID jobId, Configuration conf) throws Exception { org.apache.hadoop.mapreduce.OutputCommitter committer = null; LOG.info("OutputCommitter set in config " + conf.get("mapred.output.committer.class")); if (newApiCommitter) { org.apache.hadoop.mapreduce.TaskID taskId = new org.apache.hadoop.mapreduce.TaskID(jobId, true, 0); org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptID = new org.apache.hadoop.mapreduce.TaskAttemptID(taskId, 0); org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = new TaskAttemptContextImpl(conf, taskAttemptID); OutputFormat outputFormat = ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), conf); committer = outputFormat.getOutputCommitter(taskContext); } else { committer = ReflectionUtils.newInstance(conf.getClass( "mapred.output.committer.class", FileOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class), conf); } LOG.info("OutputCommitter is " + committer.getClass().getName()); return committer; }
private void testSuccessfulJob(String filename, Class<? extends OutputFormat> output, String[] exclude) throws Exception { Path outDir = getNewOutputDir(); Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0); job.setOutputFormatClass(output); assertTrue("Job failed!", job.waitForCompletion(true)); Path testFile = new Path(outDir, filename); assertTrue("Done file missing for job ", fs.exists(testFile)); // check if the files from the missing set exists for (String ex : exclude) { Path file = new Path(outDir, ex); assertFalse( "File " + file + " should not be present for successful job ", fs .exists(file)); } }
private void testFailedJob(String fileName, Class<? extends OutputFormat> output, String[] exclude) throws Exception { Path outDir = getNewOutputDir(); Job job = MapReduceTestUtil.createFailJob(conf, outDir, inDir); job.setOutputFormatClass(output); assertFalse("Job did not fail!", job.waitForCompletion(true)); if (fileName != null) { Path testFile = new Path(outDir, fileName); assertTrue("File " + testFile + " missing for failed job ", fs .exists(testFile)); } // check if the files from the missing set exists for (String ex : exclude) { Path file = new Path(outDir, ex); assertFalse("File " + file + " should not be present for failed job ", fs .exists(file)); } }
@SuppressWarnings("deprecation") public static boolean runJob(Configuration conf, Class<? extends InputFormat<?,?>> inputFormatClass, Class<? extends Mapper<?,?,?,?>> mapperClass, Class<? extends OutputFormat<?,?>> outputFormatClass) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(conf); job.setInputFormatClass(inputFormatClass); job.setMapperClass(mapperClass); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputFormatClass(outputFormatClass); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); boolean ret = job.waitForCompletion(true); // Hadoop 1.0 (and 0.20) have nasty bug when job committer is not called in // LocalJobRuner if (isHadoop1()) { callOutputCommitter(job, outputFormatClass); } return ret; }
public static TitanGraph generateGraph(final ModifiableHadoopConfiguration titanConf) { final Class<? extends OutputFormat> format = titanConf.getClass(OUTPUT_FORMAT, OutputFormat.class, OutputFormat.class); if (TitanOutputFormat.class.isAssignableFrom(format)) { ModifiableConfiguration mc = titanConf.getOutputConf(); boolean present = mc.has(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE); LOGGER.trace("Keyspace in_config=" + present + " value=" + mc.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE)); TitanGraph g = TitanFactory.open(mc); // final boolean checkTypes = titanConf.get(TitanHadoopConfiguration.OUTPUT_TITAN_TYPE_CHECKING); // // if (checkTypes) { // FaunusSchemaManager.getTypeManager(null).setSchemaProvider(new SchemaContainer(g)); // } return g; } else { throw new RuntimeException("The provide graph output format is not a supported TitanOutputFormat: " + format.getName()); } }
@Test public void testAddNamedOutputWithValueSchemaSet() throws Exception { // given String namedOutput = "metaI0"; Configuration conf = new Configuration(); doReturn(conf).when(job).getConfiguration(); // execute AvroMultipleOutputs.addNamedOutput(job, namedOutput, outputFormatClass, Identifier.SCHEMA$, Identifier.SCHEMA$); // assert assertEquals(' ' + namedOutput, conf.get(MULTIPLE_OUTPUTS)); assertTrue(outputFormatClass == conf.getClass(MO_PREFIX + namedOutput + FORMAT, OutputFormat.class)); assertEquals(Identifier.SCHEMA$.toString(), conf.get(MO_PREFIX+namedOutput+".keyschema")); assertEquals(Identifier.SCHEMA$.toString(), conf.get(MO_PREFIX+namedOutput+".valueschema")); }
@Override protected Class<? extends OutputFormat> getOutputFormatClass() throws ClassNotFoundException { if (isHCatJob) { return SqoopHCatUtilities.getOutputFormatClass(); } else { return RawKeyTextOutputFormat.class; } }
public ImportJobBase(final SqoopOptions opts, final Class<? extends Mapper> mapperClass, final Class<? extends InputFormat> inputFormatClass, final Class<? extends OutputFormat> outputFormatClass, final ImportJobContext context) { super(opts, mapperClass, inputFormatClass, outputFormatClass); this.context = context; }