public ReduceContextImpl(Configuration conf, TaskAttemptID taskid, RawKeyValueIterator input, Counter inputKeyCounter, Counter inputValueCounter, RecordWriter<KEYOUT,VALUEOUT> output, OutputCommitter committer, StatusReporter reporter, RawComparator<KEYIN> comparator, Class<KEYIN> keyClass, Class<VALUEIN> valueClass ) throws InterruptedException, IOException{ super(conf, taskid, output, committer, reporter); this.input = input; this.inputKeyCounter = inputKeyCounter; this.inputValueCounter = inputValueCounter; this.comparator = comparator; this.serializationFactory = new SerializationFactory(conf); this.keyDeserializer = serializationFactory.getDeserializer(keyClass); this.keyDeserializer.open(buffer); this.valueDeserializer = serializationFactory.getDeserializer(valueClass); this.valueDeserializer.open(buffer); hasMore = input.next(); this.keyClass = keyClass; this.valueClass = valueClass; this.conf = conf; this.taskid = taskid; }
/** * 定义每条数据的输出格式,输入数据是由reduce任务每次执行write方法输出的数据 */ @Override public RecordWriter<BaseDimension, BaseStatsValueWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Connection conn = null; IDimensionConverter converter = new DimensionConverterImpl(); try { conn = JdbcManager.getConnection(conf, GlobalConstants.WAREHOUSE_OF_REPORT); conn.setAutoCommit(false); } catch (SQLException e) { logger.error("获取数据库连接失败", e); throw new IOException("获取数据库连接失败", e); } return new TransformerRecordWriter(conn, conf, converter); }
@Override /** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { DBConfiguration dbConf = new DBConfiguration(context.getConfiguration()); String tableName = dbConf.getOutputTableName(); String[] fieldNames = dbConf.getOutputFieldNames(); if (fieldNames == null) { fieldNames = new String[dbConf.getOutputFieldCount()]; } try { Connection connection = dbConf.getConnection(); PreparedStatement statement = null; statement = connection.prepareStatement( constructQuery(tableName, fieldNames)); return new com.cloudera.sqoop.mapreduce.db.DBOutputFormat.DBRecordWriter( connection, statement); } catch (Exception ex) { throw new IOException(ex); } }
private void writeBadOutput(TaskAttempt attempt, Configuration conf) throws Exception { TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID())); TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat .getRecordWriter(tContext); NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key2, val2); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val2); theRecordWriter.write(nullWritable, val1); theRecordWriter.write(key1, nullWritable); theRecordWriter.write(key2, null); theRecordWriter.write(null, null); theRecordWriter.write(key1, val1); } finally { theRecordWriter.close(tContext); } OutputFormat outputFormat = ReflectionUtils.newInstance( tContext.getOutputFormatClass(), conf); OutputCommitter committer = outputFormat.getOutputCommitter(tContext); committer.commitTask(tContext); }
private void writeOutput(TaskAttempt attempt, Configuration conf) throws Exception { TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID())); TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat .getRecordWriter(tContext); NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(tContext); } OutputFormat outputFormat = ReflectionUtils.newInstance( tContext.getOutputFormatClass(), conf); OutputCommitter committer = outputFormat.getOutputCommitter(tContext); committer.commitTask(tContext); }
@SuppressWarnings("unchecked") private void writeOutput(RecordWriter theRecordWriter, TaskAttemptContext context) throws IOException, InterruptedException { NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(context); } }
/** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { DBConfiguration dbConf = new DBConfiguration(context.getConfiguration()); String tableName = dbConf.getOutputTableName(); String[] fieldNames = dbConf.getOutputFieldNames(); if(fieldNames == null) { fieldNames = new String[dbConf.getOutputFieldCount()]; } try { Connection connection = dbConf.getConnection(); PreparedStatement statement = null; statement = connection.prepareStatement( constructQuery(tableName, fieldNames)); return new DBRecordWriter(connection, statement); } catch (Exception ex) { throw new IOException(ex.getMessage()); } }
/** * Add mapper(the first mapper) that reads input from the input * context and writes to queue */ @SuppressWarnings("unchecked") void addMapper(TaskInputOutputContext inputContext, ChainBlockingQueue<KeyValuePair<?, ?>> output, int index) throws IOException, InterruptedException { Configuration conf = getConf(index); Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class); Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS, Object.class); RecordReader rr = new ChainRecordReader(inputContext); RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output, conf); Mapper.Context mapperContext = createMapContext(rr, rw, (MapContext) inputContext, getConf(index)); MapRunner runner = new MapRunner(mappers.get(index), mapperContext, rr, rw); threads.add(runner); }
/** * Add mapper that reads and writes from/to the queue */ @SuppressWarnings("unchecked") void addMapper(ChainBlockingQueue<KeyValuePair<?, ?>> input, ChainBlockingQueue<KeyValuePair<?, ?>> output, TaskInputOutputContext context, int index) throws IOException, InterruptedException { Configuration conf = getConf(index); Class<?> keyClass = conf.getClass(MAPPER_INPUT_KEY_CLASS, Object.class); Class<?> valueClass = conf.getClass(MAPPER_INPUT_VALUE_CLASS, Object.class); Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class); Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS, Object.class); RecordReader rr = new ChainRecordReader(keyClass, valueClass, input, conf); RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output, conf); MapRunner runner = new MapRunner(mappers.get(index), createMapContext(rr, rw, context, getConf(index)), rr, rw); threads.add(runner); }
private void writeOutput(RecordWriter theRecordWriter, TaskAttemptContext context) throws IOException, InterruptedException { NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(context); } }
/** * Write random values to the writer assuming a table created using * {@link #FAMILIES} as column family descriptors */ private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer, TaskAttemptContext context, Set<byte[]> families, int numRows) throws IOException, InterruptedException { byte keyBytes[] = new byte[Bytes.SIZEOF_INT]; int valLength = 10; byte valBytes[] = new byte[valLength]; int taskId = context.getTaskAttemptID().getTaskID().getId(); assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!"; final byte [] qualifier = Bytes.toBytes("data"); Random random = new Random(); for (int i = 0; i < numRows; i++) { Bytes.putInt(keyBytes, 0, i); random.nextBytes(valBytes); ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes); for (byte[] family : families) { Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes); writer.write(key, kv); } } }
/** * Write random values to the writer assuming a table created using * {@link #FAMILIES} as column family descriptors */ private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, KeyValue> writer, TaskAttemptContext context, Set<byte[]> families, int numRows) throws IOException, InterruptedException { byte keyBytes[] = new byte[Bytes.SIZEOF_INT]; int valLength = 10; byte valBytes[] = new byte[valLength]; int taskId = context.getTaskAttemptID().getTaskID().getId(); assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!"; final byte [] qualifier = Bytes.toBytes("data"); Random random = new Random(); for (int i = 0; i < numRows; i++) { Bytes.putInt(keyBytes, 0, i); random.nextBytes(valBytes); ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes); for (byte[] family : families) { KeyValue kv = new KeyValue(keyBytes, family, qualifier, valBytes); writer.write(key, kv); } } }
@SuppressWarnings({ "unchecked", "rawtypes" }) @Override public void prepareToWrite(RecordWriter writer) throws IOException { this.writer = writer; Properties props = getUDFProperties(); String s = props.getProperty(ResourceSchema.class.getName()); if (!StringUtils.hasText(s)) { log.warn("No resource schema found; using an empty one...."); this.schema = new ResourceSchema(); } else { this.schema = IOUtils.deserializeFromBase64(s); } this.pigTuple = new PigTuple(schema); }
@Test(enabled = true) public void testWriteLongData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<Long> sess = new MneDurableOutputSession<Long>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<Long> mdvalue = new MneDurableOutputValue<Long>(sess); OutputFormat<NullWritable, MneDurableOutputValue<Long>> outputFormat = new MneOutputFormat<MneDurableOutputValue<Long>>(); RecordWriter<NullWritable, MneDurableOutputValue<Long>> writer = outputFormat.getRecordWriter(m_tacontext); Long val = null; for (int i = 0; i < m_reccnt; ++i) { val = m_rand.nextLong(); m_sum += val; writer.write(nada, mdvalue.of(val)); } writer.close(m_tacontext); sess.close(); }
@Test(enabled = true) public void testWritePersonData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<Person<Long>> sess = new MneDurableOutputSession<Person<Long>>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<Person<Long>> mdvalue = new MneDurableOutputValue<Person<Long>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<Person<Long>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<Person<Long>>>(); RecordWriter<NullWritable, MneDurableOutputValue<Person<Long>>> writer = outputFormat.getRecordWriter(m_tacontext); Person<Long> person = null; for (int i = 0; i < m_reccnt; ++i) { person = sess.newDurableObjectRecord(); person.setAge((short) m_rand.nextInt(50)); person.setName(String.format("Name: [%s]", Utils.genRandomString()), true); m_sumage += person.getAge(); writer.write(nada, mdvalue.of(person)); } writer.close(m_tacontext); sess.close(); }
@Override @SuppressWarnings("unchecked") public RecordWriter<K, T> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Class<? extends DataStore<K,T>> dataStoreClass = (Class<? extends DataStore<K,T>>) conf.getClass(DATA_STORE_CLASS, null); Class<K> keyClass = (Class<K>) conf.getClass(OUTPUT_KEY_CLASS, null); Class<T> rowClass = (Class<T>) conf.getClass(OUTPUT_VALUE_CLASS, null); final DataStore<K, T> store = DataStoreFactory.createDataStore(dataStoreClass, keyClass, rowClass, context.getConfiguration()); setOutputPath(store, context); return new GoraRecordWriter(store, context); }
@Override public RecordWriter<K,VariantContextWritable> getRecordWriter( TaskAttemptContext context) throws IOException { final Configuration conf = ContextUtil.getConfiguration(context); initBaseOF(conf); if (baseOF.getHeader() == null) { if(conf.get(OUT_PATH_PROP) != null){ final Path p = new Path(conf.get(OUT_PATH_PROP)); baseOF.readHeaderFrom(p, p.getFileSystem(conf)); } } if(conf.getBoolean(GaeaVCFOutputFormat.HEADER_MODIFY, false)){ final boolean wh = ContextUtil.getConfiguration(context).getBoolean( KeyIgnoringVCFOutputFormat.WRITE_HEADER_PROPERTY, true); return new GaeaKeyIgnoringVCFRecordWriter<K>(getDefaultWorkFile(context, ""),baseOF.getHeader(),wh,context); } return baseOF.getRecordWriter(context, getDefaultWorkFile(context, "")); }
@Test(enabled = true) public void testWriteBufferData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<DurableBuffer<?>> sess = new MneDurableOutputSession<DurableBuffer<?>>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<DurableBuffer<?>> mdvalue = new MneDurableOutputValue<DurableBuffer<?>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableBuffer<?>>>(); RecordWriter<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> writer = outputFormat.getRecordWriter(m_tacontext); DurableBuffer<?> dbuf = null; Checksum cs = new CRC32(); cs.reset(); for (int i = 0; i < m_reccnt; ++i) { dbuf = genupdDurableBuffer(sess, cs); Assert.assertNotNull(dbuf); writer.write(nada, mdvalue.of(dbuf)); } m_checksum = cs.getValue(); writer.close(m_tacontext); sess.close(); }
@Override public RecordWriter<NullWritable,SpreadSheetCellDAO> getRecordWriter(TaskAttemptContext context) throws IOException { // check if mimeType is set. If not assume new Excel format (.xlsx) Configuration conf=context.getConfiguration(); String defaultConf=conf.get(HadoopOfficeWriteConfiguration.CONF_MIMETYPE,ExcelFileOutputFormat.DEFAULT_MIMETYPE); conf.set(HadoopOfficeWriteConfiguration.CONF_MIMETYPE,defaultConf); // add suffix Path file = getDefaultWorkFile(context,ExcelFileOutputFormat.getSuffix(conf.get(HadoopOfficeWriteConfiguration.CONF_MIMETYPE))); try { return new ExcelRecordWriter<>(HadoopUtil.getDataOutputStream(conf,file,context,getCompressOutput(context),getOutputCompressorClass(context, ExcelFileOutputFormat.defaultCompressorClass)),file.getName(),conf); } catch (InvalidWriterConfigurationException | InvalidCellSpecificationException | FormatNotUnderstoodException | GeneralSecurityException | OfficeWriterException e) { LOG.error(e); } return null; }
@Override public RecordWriter<K,GridmixRecord> getRecordWriter( TaskAttemptContext job) throws IOException { Path file = getDefaultWorkFile(job, ""); final DataOutputStream fileOut; fileOut = new DataOutputStream(CompressionEmulationUtil .getPossiblyCompressedOutputStream(file, job.getConfiguration())); return new RecordWriter<K,GridmixRecord>() { @Override public void write(K ignored, GridmixRecord value) throws IOException { // Let the Gridmix record fill itself. value.write(fileOut); } @Override public void close(TaskAttemptContext ctxt) throws IOException { fileOut.close(); } }; }
@Override public RecordWriter<NullWritable,IParser> getRecordWriter( final TaskAttemptContext taskAttemptContext ) throws IOException,InterruptedException{ Configuration config = taskAttemptContext.getConfiguration(); String extension = ".mds"; Path file = getDefaultWorkFile( taskAttemptContext, extension ); FileSystem fs = file.getFileSystem( config ); long dfsBlockSize = Math.max( fs.getDefaultBlockSize( file ) , 1024 * 1024 * 256 ); OutputStream out = fs.create( file , true , 4096 , fs.getDefaultReplication(file) , dfsBlockSize ); return new MDSParserRecordWriter( out , new jp.co.yahoo.dataplatform.config.Configuration() ); }
@Override public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(TaskAttemptContext ctx, Path outputPath) throws IOException { // the writers require a header in order to create a codec, even if // the header isn't being written out setSAMHeader(samheader); setWriteHeader(writeHeader); return super.getRecordWriter(ctx, outputPath); }
@Override /** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { try { return new UpdateRecordWriter(context); } catch (Exception e) { throw new IOException(e); } }
@Override /** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { try { return new OracleExportRecordWriter<K, V>(context); } catch (Exception e) { throw new IOException(e); } }
@Override /** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { try { return new CubridUpsertRecordWriter(context); } catch (Exception e) { throw new IOException(e); } }
@Override /** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { try { return new OracleUpsertRecordWriter(context); } catch (Exception e) { throw new IOException(e); } }
@Override /** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { try { return new DelegatingRecordWriter(context); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } }
@Override /** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { try { return new ExportBatchRecordWriter<K, V>(context); } catch (Exception e) { throw new IOException(e); } }
@Override /** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { try { return new ExportCallRecordWriter(context); } catch (Exception e) { throw new IOException(e); } }
@Override /** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { try { return new SQLServerUpdateRecordWriter(context); } catch (Exception e) { throw new IOException(e); } }