private InputStream openFile(Path path) throws IOException { CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path); FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path); // check if compressed if (codec==null) { // uncompressed return fileIn; } else { // compressed Decompressor decompressor = CodecPool.getDecompressor(codec); this.openDecompressors.add(decompressor); // to be returned later using close if (codec instanceof SplittableCompressionCodec) { long end = dfsCluster.getFileSystem().getFileStatus(path).getLen(); final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS); return cIn; } else { return codec.createInputStream(fileIn,decompressor); } } }
public InputStream openFile(Path path) throws IOException { CompressionCodec codec=compressionCodecs.getCodec(path); FSDataInputStream fileIn=fs.open(path); // check if compressed if (codec==null) { // uncompressed LOG.debug("Reading from an uncompressed file \""+path+"\""); return fileIn; } else { // compressed Decompressor decompressor = CodecPool.getDecompressor(codec); this.openDecompressors.add(decompressor); // to be returned later using close if (codec instanceof SplittableCompressionCodec) { LOG.debug("Reading from a compressed file \""+path+"\" with splittable compression codec"); long end = fs.getFileStatus(path).getLen(); return ((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS); } else { LOG.debug("Reading from a compressed file \""+path+"\" with non-splittable compression codec"); return codec.createInputStream(fileIn,decompressor); } } }
private InputStream openFile(Path path) throws IOException { CompressionCodec codec=new CompressionCodecFactory(conf).getCodec(path); FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path); // check if compressed if (codec==null) { // uncompressed return fileIn; } else { // compressed Decompressor decompressor = CodecPool.getDecompressor(codec); this.openDecompressors.add(decompressor); // to be returned later using close if (codec instanceof SplittableCompressionCodec) { long end = dfsCluster.getFileSystem().getFileStatus(path).getLen(); final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS); return cIn; } else { return codec.createInputStream(fileIn,decompressor); } } }
@Override protected boolean isSplitable(JobContext context, Path file) { CompressionCodec codec; Configuration job = context.getConfiguration(); legionObjective = ObjectiveDeserializer.deserialize(job.get("legion_objective")); if (legionObjective.getCodecOverride() != null) { codec = new CompressionCodecFactory(context.getConfiguration()) .getCodecByClassName(legionObjective.getCodecOverride()); } else { codec = new CompressionCodecFactory(context.getConfiguration()) .getCodec(file); } if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; }
private InputStream getInputStream(JobConf jobConf, FileSplit split) throws IOException, ClassNotFoundException { FSDataInputStream fsin = null; // open the file and seek to the start of the split long splitStart = split.getStart(); long splitEnd = splitStart + split.getLength(); Path file = split.getPath(); FileSystem fs = file.getFileSystem(jobConf); fsin = fs.open(split.getPath()); fsin.seek(splitStart); Configuration conf = new Configuration(); CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf); CompressionCodec codec = compressionCodecFactory.getCodec(split.getPath()); Decompressor decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { return ((SplittableCompressionCodec) codec).createInputStream(fsin, decompressor, splitStart, splitEnd, SplittableCompressionCodec.READ_MODE.BYBLOCK); } else { return codec.createInputStream(fsin, decompressor); } }
protected boolean isSplitable(Configuration conf, Path file) { final CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(file); if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; }
@Override protected boolean isSplitable(JobContext context, Path file) { final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; }
protected boolean isSplitable(FileSystem fs, Path file) { final CompressionCodec codec = compressionCodecs.getCodec(file); if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; }
protected boolean isSplitable(FileSystem fs, Path file) { final CompressionCodec codec = new CompressionCodecFactory(fs.getConf()).getCodec(file); if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; }
public void initialize(InputSplit genericSplit, TaskAttemptContext context) { try { FileSplit split = (FileSplit)genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapreduce.input.linerecordreader.line.maxlength", 2147483647); this.start = split.getStart(); this.end = this.start + split.getLength(); Path file = split.getPath(); FileSystem fs = file.getFileSystem(job); this.fileIn = fs.open(file); CompressionCodec codec = (new CompressionCodecFactory(job)).getCodec(file); if(null != codec) { this.isCompressedInput = true; this.decompressor = CodecPool.getDecompressor(codec); if(codec instanceof SplittableCompressionCodec) { SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).createInputStream(this.fileIn, this.decompressor, this.start, this.end, SplittableCompressionCodec.READ_MODE.BYBLOCK); this.in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes); this.start = cIn.getAdjustedStart(); this.end = cIn.getAdjustedEnd(); this.filePosition = cIn; } else { this.in = new SplitLineReader(codec.createInputStream(this.fileIn, this.decompressor), job, this.recordDelimiterBytes); this.filePosition = this.fileIn; } } else { this.fileIn.seek(this.start); this.in = new SplitLineReader(this.fileIn, job, this.recordDelimiterBytes); this.filePosition = this.fileIn; } if(this.start != 0L) { this.start += (long)this.in.readLine(new Text(), 0, this.maxBytesToConsume(this.start)); } this.pos = this.start; }catch(Exception ex){ LOG.warn("Exception occurred during initialization {}", ex, ex); } }
/*** * Initializes readers * * @param split Split to be used (asssumed to be a file split) * ϟaram context context of the job * @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop * @throws java.lang.InterruptedException in case of thread interruption * */ @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fSplit = (FileSplit)split; // Initialize start and end of split start = fSplit.getStart(); end = start + fSplit.getLength(); final Path file = fSplit.getPath(); codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); final FileSystem fs = file.getFileSystem(context.getConfiguration()); FSDataInputStream fileIn = fs.open(file); // open stream if (isCompressedInput()) { // decompress decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS); ebr = new EthereumBlockReader(cIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; // take pos from compressed stream } else { ebr = new EthereumBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer); filePosition = fileIn; } } else { fileIn.seek(start); ebr = new EthereumBlockReader(fileIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer); filePosition = fileIn; } }
/** * * This method is experimental and derived from TextInputFormat. It is not necessary and not recommended to compress the blockchain files. Instead it is recommended to extract relevant data from the blockchain files once and store them in a format suitable for analytics (including compression), such as ORC or Parquet. * */ @Override protected boolean isSplitable(FileSystem fs, Path file) { if (!(this.isSplitable)) { return false; } final CompressionCodec codec = compressionCodecs.getCodec(file); if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; }
/** * Initializes reader * @param split Split to use (assumed to be a file split) * @param context context of the job * * * @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop * @throws java.lang.InterruptedException in case of thread interruption * */ @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fSplit = (FileSplit)split; // Initialize start and end of split start = fSplit.getStart(); end = start + fSplit.getLength(); final Path file = fSplit.getPath(); codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); final FileSystem fs = file.getFileSystem(context.getConfiguration()); FSDataInputStream fileIn = fs.open(file); // open stream if (isCompressedInput()) { // decompress decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS); bbr = new BitcoinBlockReader(cIn, this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,this.readAuxPOW); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; // take pos from compressed stream } else { bbr = new BitcoinBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,readAuxPOW); filePosition = fileIn; } } else { fileIn.seek(start); bbr = new BitcoinBlockReader(fileIn, this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,readAuxPOW); filePosition = fileIn; } // seek to block start (for the case a block overlaps a split) try { bbr.seekBlockStart(); } catch (BitcoinBlockReadException bbre) { LOG.error("Error reading Bitcoin blockchhain data"); LOG.error(bbre); } }
@Override protected boolean isSplitable(FileSystem fs, Path file) { final CompressionCodec codec = new CompressionCodecFactory(fs.getConf()).getCodec(file); if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; }
@Override protected boolean isSplitable(JobContext context, Path file) { CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; }