@Test /** * Create an IFile.Writer using GzipCodec since this code does not * have a compressor when run via the tests (ie no native libraries). */ public void testIFileWriterWithCodec() throws Exception { Configuration conf = new Configuration(); FileSystem localFs = FileSystem.getLocal(conf); FileSystem rfs = ((LocalFileSystem)localFs).getRaw(); Path path = new Path(new Path("build/test.ifile"), "data"); DefaultCodec codec = new GzipCodec(); codec.setConf(conf); IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, rfs.create(path), Text.class, Text.class, codec, null); writer.close(); }
@Test /** Same as above but create a reader. */ public void testIFileReaderWithCodec() throws Exception { Configuration conf = new Configuration(); FileSystem localFs = FileSystem.getLocal(conf); FileSystem rfs = ((LocalFileSystem)localFs).getRaw(); Path path = new Path(new Path("build/test.ifile"), "data"); DefaultCodec codec = new GzipCodec(); codec.setConf(conf); FSDataOutputStream out = rfs.create(path); IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, out, Text.class, Text.class, codec, null); writer.close(); FSDataInputStream in = rfs.open(path); IFile.Reader<Text, Text> reader = new IFile.Reader<Text, Text>(conf, in, rfs.getFileStatus(path).getLen(), codec, null); reader.close(); // test check sum byte[] ab= new byte[100]; int readed= reader.checksumIn.readWithChecksum(ab, 0, ab.length); assertEquals( readed,reader.checksumIn.getChecksum().length); }
/** * For running a few tests of methods herein. * @param args * @throws IOException */ public static void main(String[] args) throws IOException { int count = 1024; int size = 10240; for (String arg: args) { if (arg.startsWith(COUNT)) { count = Integer.parseInt(arg.replace(COUNT, "")); } else if (arg.startsWith(SIZE)) { size = Integer.parseInt(arg.replace(SIZE, "")); } else { usage(1); } } IPCUtil util = new IPCUtil(HBaseConfiguration.create()); ((Log4JLogger)IPCUtil.LOG).getLogger().setLevel(Level.ALL); timerTests(util, count, size, new KeyValueCodec(), null); timerTests(util, count, size, new KeyValueCodec(), new DefaultCodec()); timerTests(util, count, size, new KeyValueCodec(), new GzipCodec()); }
protected SequenceFile.Writer getSequenceWriter(TaskAttemptContext context, Class<?> keyClass, Class<?> valueClass) throws IOException { Configuration conf = context.getConfiguration(); CompressionCodec codec = null; CompressionType compressionType = CompressionType.NONE; if (getCompressOutput(context)) { // find the kind of compression to do compressionType = getOutputCompressionType(context); // find the right codec Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); } // get the path of the temporary output file Path file = getDefaultWorkFile(context, ""); FileSystem fs = file.getFileSystem(conf); return SequenceFile.createWriter(fs, conf, file, keyClass, valueClass, compressionType, codec, context); }
protected DataOutputStream getDataOutputStream(final TaskAttemptContext job) throws IOException, InterruptedException { final Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { final Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } final Path file = super.getDefaultWorkFile(job, extension); final FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { return new DataOutputStream(fs.create(file, false)); } else { return new DataOutputStream(codec.createOutputStream(fs.create(file, false))); } }
public MapOutputCopier(JobConf job, Reporter reporter, SecretKey jobTokenSecret) { setName("MapOutputCopier " + reduceTask.getTaskID() + "." + id); LOG.debug(getName() + " created"); this.reporter = reporter; this.jobTokenSecret = jobTokenSecret; shuffleConnectionTimeout = job.getInt("mapreduce.reduce.shuffle.connect.timeout", STALLED_COPY_TIMEOUT); shuffleReadTimeout = job.getInt("mapreduce.reduce.shuffle.read.timeout", DEFAULT_READ_TIMEOUT); if (job.getCompressMapOutput()) { Class<? extends CompressionCodec> codecClass = job.getMapOutputCompressorClass(DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, job); decompressor = CodecPool.getDecompressor(codec); } }
@Test /** * Create an IFile.Writer using GzipCodec since this codec does not * have a compressor when run via the tests (ie no native libraries). */ public void testIFileWriterWithCodec() throws Exception { Configuration conf = new Configuration(); FileSystem localFs = FileSystem.getLocal(conf); FileSystem rfs = ((LocalFileSystem)localFs).getRaw(); Path path = new Path(new Path("build/test.ifile"), "data"); DefaultCodec codec = new GzipCodec(); codec.setConf(conf); IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, rfs, path, Text.class, Text.class, codec, null); writer.close(); }
@Test /** Same as above but create a reader. */ public void testIFileReaderWithCodec() throws Exception { Configuration conf = new Configuration(); FileSystem localFs = FileSystem.getLocal(conf); FileSystem rfs = ((LocalFileSystem)localFs).getRaw(); Path path = new Path(new Path("build/test.ifile"), "data"); DefaultCodec codec = new GzipCodec(); codec.setConf(conf); IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, rfs, path, Text.class, Text.class, codec, null); writer.close(); IFile.Reader<Text, Text> reader = new IFile.Reader<Text, Text>(conf, rfs, path, codec, null); reader.close(); }
public MapOutputCopier(JobConf job, Reporter reporter) { setName("MapOutputCopier " + reduceTask.getTaskID() + "." + id); LOG.debug(getName() + " created"); this.reporter = reporter; shuffleConnectionTimeout = job.getInt("mapreduce.reduce.shuffle.connect.timeout", STALLED_COPY_TIMEOUT); shuffleReadTimeout = job.getInt("mapreduce.reduce.shuffle.read.timeout", DEFAULT_READ_TIMEOUT); if (job.getCompressMapOutput()) { Class<? extends CompressionCodec> codecClass = job.getMapOutputCompressorClass(DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, job); decompressor = CodecPool.getDecompressor(codec); } setDaemon(true); }
@Override public void setStoreLocation(String location, Job job) throws IOException { job.setOutputKeyClass(keyClass); job.setOutputValueClass(valueClass); if (compressionType != null && compressionCodecClass != null) { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat. setOutputCompressorClass(job, codecClass); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.valueOf(compressionType)); } FileOutputFormat.setOutputPath(job, new Path(location)); }
/** * See {@link StateStore#put(String, String, T)}. * * <p> * This implementation does not support putting the state object into an existing store as * append is to be supported by the Hadoop SequenceFile (HADOOP-7139). * </p> */ @Override public void put(String storeName, String tableName, T state) throws IOException { Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName); if (!this.fs.exists(tablePath) && !create(storeName, tableName)) { throw new IOException("Failed to create a state file for table " + tableName); } Closer closer = Closer.create(); try { SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec())); writer.append(new Text(Strings.nullToEmpty(state.getId())), state); } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } }
/** * See {@link StateStore#putAll(String, String, Collection)}. * * <p> * This implementation does not support putting the state objects into an existing store as * append is to be supported by the Hadoop SequenceFile (HADOOP-7139). * </p> */ @Override public void putAll(String storeName, String tableName, Collection<T> states) throws IOException { Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName); if (!this.fs.exists(tablePath) && !create(storeName, tableName)) { throw new IOException("Failed to create a state file for table " + tableName); } Closer closer = Closer.create(); try { SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec())); for (T state : states) { writer.append(new Text(Strings.nullToEmpty(state.getId())), state); } } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } }
/** * @return the compression codec in use for this file. */ public Class<? extends CompressionCodec> getCompressionCodecClass() { if (!isReader) return DefaultCodec.class; Reader reader = null; try { reader = createReader(); if (reader.getCompressionCodec() == null) return null; return reader.getCompressionCodec().getClass(); } catch (final Exception e) { throw new RuntimeException(e); } finally { if (reader != null) try { reader.close(); } catch (final IOException e1) { } } }
@Test /** * Create an IFile.Writer using GzipCodec since this code does not * have a compressor when run via the tests (ie no native libraries). */ public void testIFileWriterWithCodec() throws Exception { Configuration conf = new Configuration(); FileSystem localFs = FileSystem.getLocal(conf); FileSystem rfs = ((LocalFileSystem)localFs).getRaw(); Path path = new Path(new Path("build/test.ifile"), "data"); DefaultCodec codec = new GzipCodec(); codec.setConf(conf); IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, rfs, path, Text.class, Text.class, codec, null); writer.close(); }
@Test /** Same as above but create a reader. */ public void testIFileReaderWithCodec() throws Exception { Configuration conf = new Configuration(); FileSystem localFs = FileSystem.getLocal(conf); FileSystem rfs = ((LocalFileSystem)localFs).getRaw(); Path path = new Path(new Path("build/test.ifile"), "data"); DefaultCodec codec = new GzipCodec(); codec.setConf(conf); IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, rfs, path, Text.class, Text.class, codec, null); writer.close(); IFile.Reader<Text, Text> reader = new IFile.Reader<Text, Text>(conf, rfs, path, codec, null); reader.close(); // test check sum byte[] ab= new byte[100]; int readed= reader.checksumIn.readWithChecksum(ab, 0, ab.length); assertEquals( readed,reader.checksumIn.getChecksum().length); }
@SuppressWarnings("unchecked") private Writer createWriter(Map<String, String> metadata) throws IOException { final Metadata md = new Metadata(); for (final Entry<String, String> e : metadata.entrySet()) { md.set(new Text(e.getKey()), new Text(e.getValue())); } final Class<K> keyClass = (Class<K>) ((ParameterizedType) getClass().getGenericSuperclass()) .getActualTypeArguments()[0]; final Class<V> valueClass = (Class<V>) ((ParameterizedType) getClass().getGenericSuperclass()) .getActualTypeArguments()[1]; return SequenceFile.createWriter(fileSystem, config, sequenceFilePath, keyClass, valueClass, compressionType, new DefaultCodec(), null, md); }
/** * Get the codec string associated with this <code>configuration</code> * * @param configuration the {@link Configuration} * @return the codec {@link String} */ public static String getCodecString(Configuration configuration) { boolean compress = configuration.getBoolean(FileOutputFormat.COMPRESS, false); String codecType = configuration.get(FileOutputFormat.COMPRESS_TYPE, null); if (compress && (codecType == null || !codecType.equals(CompressionType.NONE.toString()))) { Class<?> codecClass = configuration.getClass(FileOutputFormat.COMPRESS_CODEC, DefaultCodec.class); if (codecClass == null) { return CODEC_NONE; } else { try { return ((CompressionCodec) codecClass.newInstance()).getDefaultExtension().replace(".", ""); } catch (Exception exception) { throw new RuntimeException("Could not determine codec", exception); } } } return CODEC_NONE; }
@Test public void testGetCodecString() { Configuration configuration = dfsServer.getConf(); assertEquals(MrUtil.CODEC_NONE, MrUtil.getCodecString(configuration)); configuration.setBoolean(FileOutputFormat.COMPRESS, false); assertEquals(MrUtil.CODEC_NONE, MrUtil.getCodecString(configuration)); configuration.setBoolean(FileOutputFormat.COMPRESS, true); assertEquals(new DefaultCodec().getDefaultExtension().substring(1, new DefaultCodec().getDefaultExtension().length()), MrUtil.getCodecString(configuration)); configuration.set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName()); assertEquals(new SnappyCodec().getDefaultExtension().substring(1, new SnappyCodec().getDefaultExtension().length()), MrUtil.getCodecString(configuration)); configuration.set(FileOutputFormat.COMPRESS_TYPE, CompressionType.BLOCK.toString()); assertEquals(new SnappyCodec().getDefaultExtension().substring(1, new SnappyCodec().getDefaultExtension().length()), MrUtil.getCodecString(configuration)); configuration.set(FileOutputFormat.COMPRESS_TYPE, CompressionType.NONE.toString()); assertEquals(MrUtil.CODEC_NONE, MrUtil.getCodecString(configuration)); configuration.set(FileOutputFormat.COMPRESS_TYPE, CompressionType.BLOCK.toString()); configuration.setBoolean(FileOutputFormat.COMPRESS, false); assertEquals(MrUtil.CODEC_NONE, MrUtil.getCodecString(configuration)); }
public DataOutputStream getDataOuputStream(final TaskAttemptContext job) throws IOException, InterruptedException { org.apache.hadoop.conf.Configuration hadoopConf = DEFAULT_COMPAT.getContextConfiguration(job); this.faunusConf = ModifiableHadoopConfiguration.of(hadoopConf); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { final Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, hadoopConf); extension = codec.getDefaultExtension(); } final Path file = super.getDefaultWorkFile(job, extension); final FileSystem fs = file.getFileSystem(hadoopConf); if (!isCompressed) { return new DataOutputStream(fs.create(file, false)); } else { return new DataOutputStream(codec.createOutputStream(fs.create(file, false))); } }
public static Job createJob(Path[] inputPaths, Path outputPath, Map<String, String> metadata, Configuration config) throws IOException { final Job job = new Job(config); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setOutputFormatClass(MetadataSequenceFileOutputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPaths); SequenceFileOutputFormat.setOutputPath(job, outputPath); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); if (metadata != null) MetadataConfiguration.setMetadata(metadata, job.getConfiguration()); return job; }
/** Unit tests for SequenceFile. */ @Test public void testZlibSequenceFile() throws Exception { LOG.info("Testing SequenceFile with DefaultCodec"); compressedSeqFileTest(new DefaultCodec()); LOG.info("Successfully tested SequenceFile with DefaultCodec"); }
public void testValueIteratorWithCompression() throws Exception { Path tmpDir = new Path("build/test/test.reduce.task.compression"); Configuration conf = new Configuration(); DefaultCodec codec = new DefaultCodec(); codec.setConf(conf); for (Pair[] testCase: testCases) { runValueIterator(tmpDir, testCase, conf, codec); } }