/** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(FileSystem filesystem, JobConf job, String name, Progressable progress) throws IOException { org.apache.hadoop.mapreduce.RecordWriter<K, V> w = super.getRecordWriter( new TaskAttemptContextImpl(job, TaskAttemptID.forName(job.get(MRJobConfig.TASK_ATTEMPT_ID)))); org.apache.hadoop.mapreduce.lib.db.DBOutputFormat.DBRecordWriter writer = (org.apache.hadoop.mapreduce.lib.db.DBOutputFormat.DBRecordWriter) w; try { return new DBRecordWriter(writer.getConnection(), writer.getStatement()); } catch(SQLException se) { throw new IOException(se); } }
public RecordWriter<WritableComparable<?>, Writable> getRecordWriter( final FileSystem fs, JobConf job, String name, final Progressable progress) throws IOException { final Path segmentDumpFile = new Path( FileOutputFormat.getOutputPath(job), name); // Get the old copy out of the way if (fs.exists(segmentDumpFile)) fs.delete(segmentDumpFile, true); final PrintStream printStream = new PrintStream( fs.create(segmentDumpFile)); return new RecordWriter<WritableComparable<?>, Writable>() { public synchronized void write(WritableComparable<?> key, Writable value) throws IOException { printStream.println(value); } public synchronized void close(Reporter reporter) throws IOException { printStream.close(); } }; }
@Override public RecordWriter<Text, NutchIndexAction> getRecordWriter( FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { final IndexWriters writers = new IndexWriters(job); writers.open(job, name); return new RecordWriter<Text, NutchIndexAction>() { public void close(Reporter reporter) throws IOException { writers.close(); } public void write(Text key, NutchIndexAction indexAction) throws IOException { if (indexAction.action == NutchIndexAction.ADD) { writers.write(indexAction.doc); } else if (indexAction.action == NutchIndexAction.DELETE) { writers.delete(key.toString()); } } }; }
@Override public RecordWriter<NullWritable,SpreadSheetCellDAO> getRecordWriter(FileSystem ignored, JobConf conf, String name, Progressable progress) throws IOException { // check if mimeType is set. If not assume new Excel format (.xlsx) String defaultConf=conf.get(HadoopOfficeWriteConfiguration.CONF_MIMETYPE,ExcelFileOutputFormat.DEFAULT_MIMETYPE); conf.set(HadoopOfficeWriteConfiguration.CONF_MIMETYPE,defaultConf); Path file = getTaskOutputPath(conf, name); // add suffix file=file.suffix(ExcelFileOutputFormat.getSuffix(conf.get(HadoopOfficeWriteConfiguration.CONF_MIMETYPE))); try { return new ExcelRecordWriter<>(HadoopUtil.getDataOutputStream(conf,file,progress,getCompressOutput(conf),getOutputCompressorClass(conf, ExcelFileOutputFormat.defaultCompressorClass)),file.getName(),conf); } catch (InvalidWriterConfigurationException | OfficeWriterException e) { LOG.error(e); } return null; }
@SuppressWarnings("rawtypes") public void close(final RecordWriter recordWriter, final Reporter reporter) throws IOException { throwCaughtException(); closePool.execute(new Runnable() { @Override public void run() { try { long start = time.getNanoTime(); recordWriter.close(reporter); long duration = time.getTimeSinceMs(start); log.info("Flushed file in " + (duration / 1000.0) + " seconds."); } catch (Throwable e) { log.error("Exeption caught while closing stream. This exception will be thrown later.", e); exception = e; } } }); }
@Override public RecordWriter<NullWritable, DynamoDBItemWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; DataOutputStream fileOut; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, job); extension = codec.getDefaultExtension(); } Path file = new Path(FileOutputFormat.getOutputPath(job), name + extension); FileSystem fs = file.getFileSystem(job); if (!isCompressed) { fileOut = fs.create(file, progress); } else { fileOut = new DataOutputStream(codec.createOutputStream(fs.create(file, progress))); } return new ExportRecordWriter(fileOut); }
@Test(enabled = true) public void testWriteBufferData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<DurableBuffer<?>> sess = new MneDurableOutputSession<DurableBuffer<?>>(null, m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<DurableBuffer<?>> mdvalue = new MneDurableOutputValue<DurableBuffer<?>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableBuffer<?>>>(); RecordWriter<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> writer = outputFormat.getRecordWriter(m_fs, m_conf, null, null); DurableBuffer<?> dbuf = null; Checksum cs = new CRC32(); cs.reset(); for (int i = 0; i < m_reccnt; ++i) { dbuf = genupdDurableBuffer(sess, cs); Assert.assertNotNull(dbuf); writer.write(nada, mdvalue.of(dbuf)); } m_checksum = cs.getValue(); writer.close(null); sess.close(); }
@Override @SuppressWarnings("unchecked") public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { // expecting exactly one path String tableName = job.get(OUTPUT_TABLE); HTable table = null; try { table = new HTable(HBaseConfiguration.create(job), tableName); } catch(IOException e) { LOG.error(e); throw e; } table.setAutoFlush(false); return new TableRecordWriter(table); }
public RecordWriter<Shard, Text> getRecordWriter(final FileSystem fs, JobConf job, String name, final Progressable progress) throws IOException { final Path perm = new Path(getWorkOutputPath(job), name); return new RecordWriter<Shard, Text>() { public void write(Shard key, Text value) throws IOException { assert (IndexUpdateReducer.DONE.equals(value)); String shardName = key.getDirectory(); shardName = shardName.replace("/", "_"); Path doneFile = new Path(perm, IndexUpdateReducer.DONE + "_" + shardName); if (!fs.exists(doneFile)) { fs.createNewFile(doneFile); } } public void close(final Reporter reporter) throws IOException { } }; }
/** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(FileSystem filesystem, JobConf job, String name, Progressable progress) throws IOException { DBConfiguration dbConf = new DBConfiguration(job); String tableName = dbConf.getOutputTableName(); String[] fieldNames = dbConf.getOutputFieldNames(); try { Connection connection = dbConf.getConnection(); PreparedStatement statement = null; statement = connection.prepareStatement(constructQuery(tableName, fieldNames)); return new DBRecordWriter(connection, statement); } catch (Exception ex) { throw new IOException(ex.getMessage()); } }
public RecordWriter<Text, LWDocumentWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { final LucidWorksWriter writer = new LucidWorksWriter(progress); writer.open(job, name); return new RecordWriter<Text, LWDocumentWritable>() { public void close(Reporter reporter) throws IOException { writer.close(); } public void write(Text key, LWDocumentWritable doc) throws IOException { writer.write(key, doc); } }; }
@Override @Nonnull public List<Processor> get(int count) { return processorList = range(0, count).mapToObj(i -> { try { String uuid = context.jetInstance().getCluster().getLocalMember().getUuid(); TaskAttemptID taskAttemptID = new TaskAttemptID("jet-node-" + uuid, jobContext.getJobID().getId(), JOB_SETUP, i, 0); jobConf.set("mapred.task.id", taskAttemptID.toString()); jobConf.setInt("mapred.task.partition", i); TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(jobConf, taskAttemptID); @SuppressWarnings("unchecked") OutputFormat<K, V> outFormat = jobConf.getOutputFormat(); RecordWriter<K, V> recordWriter = outFormat.getRecordWriter( null, jobConf, uuid + '-' + valueOf(i), Reporter.NULL); return new WriteHdfsP<>( recordWriter, taskAttemptContext, outputCommitter, extractKeyFn, extractValueFn); } catch (IOException e) { throw new JetException(e); } }).collect(toList()); }
public RecordWriter<WritableComparable, Writable> getRecordWriter( final FileSystem fs, JobConf job, String name, final Progressable progress) throws IOException { final Path segmentDumpFile = new Path(FileOutputFormat.getOutputPath(job), name); // Get the old copy out of the way if (fs.exists(segmentDumpFile)) fs.delete(segmentDumpFile, true); final PrintStream printStream = new PrintStream(fs.create(segmentDumpFile)); return new RecordWriter<WritableComparable, Writable>() { public synchronized void write(WritableComparable key, Writable value) throws IOException { printStream.println(value); } public synchronized void close(Reporter reporter) throws IOException { printStream.close(); } }; }
@Override @SuppressWarnings("unchecked") public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { // expecting exactly one path String tableName = job.get(OUTPUT_TABLE); HTable table = null; try { table = new HTable(HBaseConfiguration.create(job), tableName); } catch(IOException e) { LOG.error(e); throw e; } table.setAutoFlush(false, true); return new TableRecordWriter(table); }
@Override public RecordWriter getRecordWriter(FileSystem fileSystem, JobConf configuration, String s, Progressable progressable) throws IOException { String mapName = configuration.get(outputNamedMapProperty); Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration.getClass(outputNamedMapKeySerializerProperty, null); Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration.getClass(outputNamedMapValueSerializerProperty, null); int smOrdinal = configuration.getInt(SERIALIZATION_MODE, SerializationMode.DEFAULT.ordinal()); int amOrdinal = configuration.getInt(AVAILABILITY_MODE, AvailabilityMode.USE_REPLICAS.ordinal()); SerializationMode serializationMode = SerializationMode.values()[smOrdinal]; AvailabilityMode availabilityMode = AvailabilityMode.values()[amOrdinal]; if (mapName == null || mapName.length() == 0 || keySerializerClass == null || valueSerializerClass == null) { throw new IOException("Input format is not configured with a valid NamedMap."); } CustomSerializer<K> keySerializer = ReflectionUtils.newInstance(keySerializerClass, configuration); keySerializer.setObjectClass((Class<K>) configuration.getClass(outputNamedMapKeyProperty, null)); CustomSerializer<V> valueSerializer = ReflectionUtils.newInstance(valueSerializerClass, configuration); valueSerializer.setObjectClass((Class<V>) configuration.getClass(outputNamedMapValueProperty, null)); NamedMap<K, V> namedMap = NamedMapFactory.getMap(mapName, keySerializer, valueSerializer); namedMap.setAvailabilityMode(availabilityMode); namedMap.setSerializationMode(serializationMode); return new NamedMapRecordWriter<K, V>(namedMap); }
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { // We assume the name is the task ID. String taskId = job.get("mapred.task.id"); Preconditions.checkArgument(taskId != null, "mapred.task.id must be set"); LOG.debug("getRecordWriter name={}, mapred.task.id={}", name, taskId); TaskAttemptID taskAttemptId = TaskAttemptID.forName(taskId); LOG.debug("TaskAttemptId={}", taskAttemptId); TaskAttemptContext context = ReflectedTaskAttemptContextFactory.getContext(job, taskAttemptId); org.apache.hadoop.mapreduce.RecordWriter<K, JsonObject> mapreduceRecordWriter; try { mapreduceRecordWriter = mapreduceOutputFormat.getRecordWriter(context); } catch (InterruptedException ex) { throw new IOException(ex); } return new BigQueryMapredRecordWriter<K, V>(mapreduceRecordWriter, context); }
@Test public void testGetRecordWriter() throws IOException, InterruptedException { BigQueryMapredOutputFormat<LongWritable, JsonObject> outputFormat = new BigQueryMapredOutputFormat<LongWritable, JsonObject>(); outputFormat.setMapreduceOutputFormat(mockOutputFormat); when(mockOutputFormat.getRecordWriter(any(TaskAttemptContext.class))). thenReturn(mockMapreduceRecordWriter); JobConf jobConf = new JobConf(); String taskId = "attempt_201401010000_0000_r_000000_0"; jobConf.set("mapreduce.job.dir", "/a/path/job_1_2"); jobConf.set("mapred.task.id", taskId); String name = "foo"; RecordWriter<LongWritable, JsonObject> recordWriter = outputFormat.getRecordWriter( mockFileSystem, jobConf, name, mockProgressable); assertNotNull(recordWriter); verify(mockOutputFormat).getRecordWriter(any(TaskAttemptContext.class)); }
@Test public void testClose() throws IOException, InterruptedException { RecordWriter<LongWritable, JsonObject> recordWriter = new BigQueryMapredRecordWriter<LongWritable, JsonObject>( mockRecordWriter, mockTaskAttemptContext); Reporter reporter = null; // unused by code under test recordWriter.close(reporter); verify(mockRecordWriter).close(any(TaskAttemptContext.class)); doThrow(new IOException("test")). when(mockRecordWriter).close(any(TaskAttemptContext.class)); expectedException.expect(IOException.class); try { recordWriter.close(reporter); } finally { verify(mockRecordWriter, times(2)).close(any(TaskAttemptContext.class)); } }
@Test public void testWrite() throws IOException, InterruptedException { RecordWriter<LongWritable, JsonObject> recordWriter = new BigQueryMapredRecordWriter<LongWritable, JsonObject>( mockRecordWriter, mockTaskAttemptContext); LongWritable key = new LongWritable(123); JsonObject value = new JsonObject(); recordWriter.write(key, value); verify(mockRecordWriter).write( any(LongWritable.class), any(JsonObject.class)); recordWriter.write(key, null); verify(mockRecordWriter, times(2)).write( any(LongWritable.class), any(JsonObject.class)); doThrow(new IOException("test")). when(mockRecordWriter).write( any(LongWritable.class), any(JsonObject.class)); expectedException.expect(IOException.class); try { recordWriter.write(key, value); } finally { verify(mockRecordWriter, times(3)).write( any(LongWritable.class), any(JsonObject.class)); } }
public ParquetRecordWriterWrapper( final OutputFormat<Void, ArrayWritable> realOutputFormat, final JobConf jobConf, final String name, final Progressable progress) throws IOException { try { // create a TaskInputOutputContext TaskAttemptID taskAttemptID = TaskAttemptID.forName(jobConf.get("mapred.task.id")); if (taskAttemptID == null) { taskAttemptID = new TaskAttemptID(); } taskContext = ContextUtil.newTaskAttemptContext(jobConf, taskAttemptID); LOG.info("creating real writer to write at {}", name); realWriter = (org.apache.hadoop.mapreduce.RecordWriter<Void, ArrayWritable>) ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, new Path(name)); LOG.info("real writer: {}", realWriter); } catch (final InterruptedException e) { throw new IOException(e); } }
@Override public RecordWriter<NullWritable, OrcSerdeRow> getRecordWriter(FileSystem fileSystem, JobConf conf, String name, Progressable reporter) throws IOException { ReaderWriterProfiler.setProfilerOptions(conf); // To be compatible with older file formats like Sequence and RC // Only works if mapred.work.output.dir is set in the conf Path workOutputPath = FileOutputFormat.getWorkOutputPath(conf); Path outputPath = workOutputPath == null ? new Path(name) : new Path(workOutputPath, name); if (fileSystem == null && workOutputPath != null) { fileSystem = workOutputPath.getFileSystem(conf); } return new OrcRecordWriter(fileSystem, outputPath, conf, OrcConf.ConfVars.HIVE_ORC_STRIPE_SIZE.defaultLongVal, OrcConf.ConfVars.HIVE_ORC_COMPRESSION.defaultVal, OrcConf.ConfVars.HIVE_ORC_COMPRESSION_BLOCK_SIZE.defaultIntVal, OrcConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE.defaultIntVal); }
public RecordWriter<ExLinkKey, DbLinkLocationList> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { String nameOut = name.replace("part", Output.pageLinkOut.name()) ; String nameIn = name.replace("part", Output.pageLinkIn.name()) ; Path fileOut = FileOutputFormat.getTaskOutputPath(job, nameOut); FileSystem fsOut = fileOut.getFileSystem(job); FSDataOutputStream streamOut = fsOut.create(fileOut, progress); Path fileIn = FileOutputFormat.getTaskOutputPath(job, nameIn); FileSystem fsIn = fileIn.getFileSystem(job); FSDataOutputStream streamIn = fsIn.create(fileIn, progress); return new LinkSummaryRecordWriter(streamOut, streamIn); }