Java 类org.apache.hadoop.io.compress.SplitCompressionInputStream 实例源码

项目:hadoopoffice    文件:MapReduceExcelOutputIntegrationTest.java   
private InputStream openFile(Path path) throws IOException {
        CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
    FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
    // check if compressed
    if (codec==null) { // uncompressed
        return fileIn;
    } else { // compressed
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        this.openDecompressors.add(decompressor); // to be returned later using close
        if (codec instanceof SplittableCompressionCodec) {
            long end = dfsCluster.getFileSystem().getFileStatus(path).getLen(); 
                final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
                    return cIn;
            } else {
                return codec.createInputStream(fileIn,decompressor);
            }
    }
}
项目:hadoopoffice    文件:MapReduceExcelInputIntegrationTest.java   
private InputStream openFile(Path path) throws IOException {
        CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
    FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
    // check if compressed
    if (codec==null) { // uncompressed
        return fileIn;
    } else { // compressed
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        this.openDecompressors.add(decompressor); // to be returned later using close
        if (codec instanceof SplittableCompressionCodec) {
            long end = dfsCluster.getFileSystem().getFileStatus(path).getLen(); 
                final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
                    return cIn;
            } else {
                return codec.createInputStream(fileIn,decompressor);
            }
    }
}
项目:hadoopcryptoledger    文件:SparkBitcoinBlockCounterSparkMasterIntegrationTest.java   
private InputStream openFile(Path path) throws IOException {
        CompressionCodec codec=new CompressionCodecFactory(conf).getCodec(path);
    FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
    // check if compressed
    if (codec==null) { // uncompressed
        return fileIn;
    } else { // compressed
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        this.openDecompressors.add(decompressor); // to be returned later using close
        if (codec instanceof SplittableCompressionCodec) {
            long end = dfsCluster.getFileSystem().getFileStatus(path).getLen(); 
                final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
                    return cIn;
            } else {
                return codec.createInputStream(fileIn,decompressor);
            }
    }
}
项目:hadoopcryptoledger    文件:MapReduceBitcoinTransactionIntegrationTest.java   
private InputStream openFile(Path path) throws IOException {
        CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
    FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
    // check if compressed
    if (codec==null) { // uncompressed
        return fileIn;
    } else { // compressed
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        this.openDecompressors.add(decompressor); // to be returned later using close
        if (codec instanceof SplittableCompressionCodec) {
            long end = dfsCluster.getFileSystem().getFileStatus(path).getLen(); 
                final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
                    return cIn;
            } else {
                return codec.createInputStream(fileIn,decompressor);
            }
    }
}
项目:hadoopcryptoledger    文件:MapReduceEthereumBlockIntegrationTest.java   
private InputStream openFile(Path path) throws IOException {
        CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
    FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
    // check if compressed
    if (codec==null) { // uncompressed
        return fileIn;
    } else { // compressed
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        this.openDecompressors.add(decompressor); // to be returned later using close
        if (codec instanceof SplittableCompressionCodec) {
            long end = dfsCluster.getFileSystem().getFileStatus(path).getLen(); 
                final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
                    return cIn;
            } else {
                return codec.createInputStream(fileIn,decompressor);
            }
    }
}
项目:hadoopcryptoledger    文件:Spark2BitcoinBlockCounterSparkMasterIntegrationTest.java   
private InputStream openFile(Path path) throws IOException {
        CompressionCodec codec=new CompressionCodecFactory(conf).getCodec(path);
    FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
    // check if compressed
    if (codec==null) { // uncompressed
        return fileIn;
    } else { // compressed
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        this.openDecompressors.add(decompressor); // to be returned later using close
        if (codec instanceof SplittableCompressionCodec) {
            long end = dfsCluster.getFileSystem().getFileStatus(path).getLen(); 
                final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
                    return cIn;
            } else {
                return codec.createInputStream(fileIn,decompressor);
            }
    }
}
项目:hadoopcryptoledger    文件:MapReduceBitcoinBlockIntegrationTest.java   
private InputStream openFile(Path path) throws IOException {
        CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
    FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
    // check if compressed
    if (codec==null) { // uncompressed
        return fileIn;
    } else { // compressed
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        this.openDecompressors.add(decompressor); // to be returned later using close
        if (codec instanceof SplittableCompressionCodec) {
            long end = dfsCluster.getFileSystem().getFileStatus(path).getLen(); 
                final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
                    return cIn;
            } else {
                return codec.createInputStream(fileIn,decompressor);
            }
    }
}
项目:hadoop    文件:CompressedSplitLineReader.java   
public CompressedSplitLineReader(SplitCompressionInputStream in,
                                 Configuration conf,
                                 byte[] recordDelimiterBytes)
                                     throws IOException {
  super(in, conf, recordDelimiterBytes);
  scin = in;
  usingCRLF = (recordDelimiterBytes == null);
}
项目:spark-util    文件:ErrorHandlingLineRecordReader.java   
public void initialize(InputSplit genericSplit, TaskAttemptContext context)  {
    try {
        FileSplit split = (FileSplit)genericSplit;
        Configuration job = context.getConfiguration();
        this.maxLineLength = job.getInt("mapreduce.input.linerecordreader.line.maxlength", 2147483647);
        this.start = split.getStart();
        this.end = this.start + split.getLength();
        Path file = split.getPath();
        FileSystem fs = file.getFileSystem(job);
        this.fileIn = fs.open(file);
        CompressionCodec codec = (new CompressionCodecFactory(job)).getCodec(file);
        if(null != codec) {
            this.isCompressedInput = true;
            this.decompressor = CodecPool.getDecompressor(codec);
            if(codec instanceof SplittableCompressionCodec) {
                SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).createInputStream(this.fileIn, this.decompressor, this.start, this.end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
                this.in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
                this.start = cIn.getAdjustedStart();
                this.end = cIn.getAdjustedEnd();
                this.filePosition = cIn;
            } else {
                this.in = new SplitLineReader(codec.createInputStream(this.fileIn, this.decompressor), job, this.recordDelimiterBytes);
                this.filePosition = this.fileIn;
            }
        } else {
            this.fileIn.seek(this.start);
            this.in = new SplitLineReader(this.fileIn, job, this.recordDelimiterBytes);
            this.filePosition = this.fileIn;
        }

        if(this.start != 0L) {
            this.start += (long)this.in.readLine(new Text(), 0, this.maxBytesToConsume(this.start));
        }

        this.pos = this.start;
    }catch(Exception ex){
        LOG.warn("Exception occurred during initialization {}", ex, ex);
    }

}
项目:aliyun-oss-hadoop-fs    文件:CompressedSplitLineReader.java   
public CompressedSplitLineReader(SplitCompressionInputStream in,
                                 Configuration conf,
                                 byte[] recordDelimiterBytes)
                                     throws IOException {
  super(in, conf, recordDelimiterBytes);
  scin = in;
  usingCRLF = (recordDelimiterBytes == null);
}
项目:big-c    文件:CompressedSplitLineReader.java   
public CompressedSplitLineReader(SplitCompressionInputStream in,
                                 Configuration conf,
                                 byte[] recordDelimiterBytes)
                                     throws IOException {
  super(in, conf, recordDelimiterBytes);
  scin = in;
  usingCRLF = (recordDelimiterBytes == null);
}
项目:hadoopcryptoledger    文件:AbstractEthereumRecordReader.java   
/***
 * Initializes readers
 * 
 * @param split Split to be used (asssumed to be a file split)
 * ϟaram context context of the job
 * @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
 * @throws java.lang.InterruptedException in case of thread interruption
 * 
 */

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
       FileSplit fSplit = (FileSplit)split;
       // Initialize start and end of split
          start = fSplit.getStart();
          end = start + fSplit.getLength();
          final Path file = fSplit.getPath();
          codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
          final FileSystem fs = file.getFileSystem(context.getConfiguration());
          FSDataInputStream fileIn = fs.open(file);
          // open stream
            if (isCompressedInput()) { // decompress
                decompressor = CodecPool.getDecompressor(codec);
                if (codec instanceof SplittableCompressionCodec) {

                final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
                    ebr = new EthereumBlockReader(cIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
                    start = cIn.getAdjustedStart();
                    end = cIn.getAdjustedEnd();
                filePosition = cIn; // take pos from compressed stream
            } else {
                ebr = new EthereumBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
                filePosition = fileIn;
            }
          } else {
            fileIn.seek(start);
            ebr = new EthereumBlockReader(fileIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
            filePosition = fileIn;
          }
}
项目:hadoopcryptoledger    文件:AbstractBitcoinRecordReader.java   
/**
* Initializes reader
* @param split Split to use (assumed to be a file split)
* @param context context of the job
*
*
* @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
* @throws java.lang.InterruptedException in case of thread interruption
*
*/
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
   FileSplit fSplit = (FileSplit)split;
 // Initialize start and end of split
    start = fSplit.getStart();
    end = start + fSplit.getLength();
    final Path file = fSplit.getPath();
    codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    final FileSystem fs = file.getFileSystem(context.getConfiguration());
    FSDataInputStream fileIn = fs.open(file);
    // open stream
      if (isCompressedInput()) { // decompress
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {

            final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
                bbr = new BitcoinBlockReader(cIn, this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,this.readAuxPOW);
        start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn; // take pos from compressed stream
      } else {
    bbr = new BitcoinBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,readAuxPOW);
    filePosition = fileIn;
      }
    } else {
      fileIn.seek(start);
      bbr = new BitcoinBlockReader(fileIn, this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,readAuxPOW);  
      filePosition = fileIn;
    }
    // seek to block start (for the case a block overlaps a split)
    try {
        bbr.seekBlockStart();
    } catch (BitcoinBlockReadException bbre) {
        LOG.error("Error reading Bitcoin blockchhain data");
        LOG.error(bbre);
    } 
}
项目:hadoop-2.6.0-cdh5.4.3    文件:CompressedSplitLineReader.java   
public CompressedSplitLineReader(SplitCompressionInputStream in,
                                 Configuration conf,
                                 byte[] recordDelimiterBytes)
                                     throws IOException {
  super(in, conf, recordDelimiterBytes);
  scin = in;
  usingCRLF = (recordDelimiterBytes == null);
}
项目:hadoop-2.6.0-cdh5.4.3    文件:CompressedSplitLineReader.java   
public CompressedSplitLineReader(SplitCompressionInputStream in,
                                 Configuration conf,
                                 byte[] recordDelimiterBytes)
                                     throws IOException {
  super(in, conf, recordDelimiterBytes);
  scin = in;
  usingCRLF = (recordDelimiterBytes == null);
}
项目:FlexMap    文件:CompressedSplitLineReader.java   
public CompressedSplitLineReader(SplitCompressionInputStream in,
                                 Configuration conf,
                                 byte[] recordDelimiterBytes)
                                     throws IOException {
  super(in, conf, recordDelimiterBytes);
  scin = in;
  usingCRLF = (recordDelimiterBytes == null);
}
项目:hops    文件:CompressedSplitLineReader.java   
public CompressedSplitLineReader(SplitCompressionInputStream in,
                                 Configuration conf,
                                 byte[] recordDelimiterBytes)
                                     throws IOException {
  super(in, conf, recordDelimiterBytes);
  scin = in;
  usingCRLF = (recordDelimiterBytes == null);
}
项目:Hadoop-BAM    文件:BGZFCodec.java   
@Override
public SplitCompressionInputStream createInputStream(InputStream seekableIn,
    Decompressor decompressor, long start, long end, READ_MODE readMode) throws IOException {
  BGZFSplitGuesser splitGuesser = new BGZFSplitGuesser(seekableIn);
  long adjustedStart = splitGuesser.guessNextBGZFBlockStart(start, end);
  ((Seekable)seekableIn).seek(adjustedStart);
  return new BGZFSplitCompressionInputStream(seekableIn, adjustedStart, end);
}
项目:Hadoop-BAM    文件:BGZFEnhancedGzipCodec.java   
@Override
public SplitCompressionInputStream createInputStream(InputStream seekableIn, Decompressor decompressor, long start, long end, READ_MODE readMode) throws IOException {
  if (!(seekableIn instanceof Seekable)) {
    throw new IOException("seekableIn must be an instance of " +
        Seekable.class.getName());
  }
  if (!BlockCompressedInputStream.isValidFile(new BufferedInputStream(seekableIn))) {
    // data is regular gzip, not BGZF
    ((Seekable)seekableIn).seek(0);
    final CompressionInputStream compressionInputStream = createInputStream(seekableIn,
        decompressor);
    return new SplitCompressionInputStream(compressionInputStream, start, end) {
      @Override
      public int read(byte[] b, int off, int len) throws IOException {
        return compressionInputStream.read(b, off, len);
      }
      @Override
      public void resetState() throws IOException {
        compressionInputStream.resetState();
      }
      @Override
      public int read() throws IOException {
        return compressionInputStream.read();
      }
    };
  }
  BGZFSplitGuesser splitGuesser = new BGZFSplitGuesser(seekableIn);
  long adjustedStart = splitGuesser.guessNextBGZFBlockStart(start, end);
  ((Seekable)seekableIn).seek(adjustedStart);
  return new BGZFSplitCompressionInputStream(seekableIn, adjustedStart, end);
}
项目:incubator-tajo    文件:CompressedSplitLineReader.java   
public CompressedSplitLineReader(SplitCompressionInputStream in,
                                 Configuration conf,
                                 byte[] recordDelimiterBytes)
    throws IOException {
  super(in, conf, recordDelimiterBytes);
  scin = in;
  usingCRLF = (recordDelimiterBytes == null);
}
项目:tajo-cdh    文件:CompressedSplitLineReader.java   
public CompressedSplitLineReader(SplitCompressionInputStream in,
                                 Configuration conf,
                                 byte[] recordDelimiterBytes)
    throws IOException {
  super(in, conf, recordDelimiterBytes);
  scin = in;
  usingCRLF = (recordDelimiterBytes == null);
}
项目:hadoop-on-lustre2    文件:CompressedSplitLineReader.java   
public CompressedSplitLineReader(SplitCompressionInputStream in,
                                 Configuration conf,
                                 byte[] recordDelimiterBytes)
                                     throws IOException {
  super(in, conf, recordDelimiterBytes);
  scin = in;
  usingCRLF = (recordDelimiterBytes == null);
}
项目:spatedb    文件:SpatialRecordReader.java   
/**
 * Initialize from a path and range
 * @param job
 * @param s
 * @param l
 * @param p
 * @throws IOException
 */
public SpatialRecordReader(Configuration job, long s, long l, Path p) throws IOException {
  this.start = s;
  this.end = s + l;
  this.path = p;
  this.fs = this.path.getFileSystem(job);
  FSDataInputStream fileIn = fs.open(this.path);
  this.blockSize = fs.getFileStatus(this.path).getBlockSize();
  this.cellMbr = new Prism();

  LOG.info("Open a SpatialRecordReader to file: "+this.path);

  codec = new CompressionCodecFactory(job).getCodec(this.path);

  if (isCompressedInput()) {
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
          ((SplittableCompressionCodec)codec).createInputStream(
            fileIn, decompressor, start, end,
            SplittableCompressionCodec.READ_MODE.BYBLOCK);
      in = cIn;
      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn; // take pos from compressed stream
    } else {
      in = codec.createInputStream(fileIn, decompressor);
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    in = fileIn;
    filePosition = fileIn;
  }
  this.pos = start;
  this.maxShapesInOneRead = job.getInt(SpatialSite.MaxShapesInOneRead, 1000000);
  this.maxBytesInOneRead = job.getInt(SpatialSite.MaxBytesInOneRead, 32*1024*1024);

  initializeReader();
}
项目:hadoop    文件:LineRecordReader.java   
public void initialize(InputSplit genericSplit,
                       TaskAttemptContext context) throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();
  this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();

  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(file);

  CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
  if (null!=codec) {
    isCompressedInput = true;   
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn, decompressor, start, end,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
      in = new CompressedSplitLineReader(cIn, job,
          this.recordDelimiterBytes);
      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn;
    } else {
      in = new SplitLineReader(codec.createInputStream(fileIn,
          decompressor), job, this.recordDelimiterBytes);
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
    filePosition = fileIn;
  }
  // If this is not the first split, we always throw away first record
  // because we always (except the last split) read one extra line in
  // next() method.
  if (start != 0) {
    start += in.readLine(new Text(), 0, maxBytesToConsume(start));
  }
  this.pos = start;
}
项目:aliyun-oss-hadoop-fs    文件:LineRecordReader.java   
public void initialize(InputSplit genericSplit,
                       TaskAttemptContext context) throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();
  this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();

  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(file);

  CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
  if (null!=codec) {
    isCompressedInput = true;
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn, decompressor, start, end,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
      in = new CompressedSplitLineReader(cIn, job,
          this.recordDelimiterBytes);
      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn;
    } else {
      if (start != 0) {
        // So we have a split that is only part of a file stored using
        // a Compression codec that cannot be split.
        throw new IOException("Cannot seek in " +
            codec.getClass().getSimpleName() + " compressed stream");
      }

      in = new SplitLineReader(codec.createInputStream(fileIn,
          decompressor), job, this.recordDelimiterBytes);
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    in = new UncompressedSplitLineReader(
        fileIn, job, this.recordDelimiterBytes, split.getLength());
    filePosition = fileIn;
  }
  // If this is not the first split, we always throw away first record
  // because we always (except the last split) read one extra line in
  // next() method.
  if (start != 0) {
    start += in.readLine(new Text(), 0, maxBytesToConsume(start));
  }
  this.pos = start;
}
项目:big-c    文件:LineRecordReader.java   
public void initialize(InputSplit genericSplit,
                       TaskAttemptContext context) throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();
  this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();

  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(file);

  CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
  if (null!=codec) {
    isCompressedInput = true;   
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn, decompressor, start, end,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
      in = new CompressedSplitLineReader(cIn, job,
          this.recordDelimiterBytes);
      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn;
    } else {
      in = new SplitLineReader(codec.createInputStream(fileIn,
          decompressor), job, this.recordDelimiterBytes);
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
    filePosition = fileIn;
  }
  // If this is not the first split, we always throw away first record
  // because we always (except the last split) read one extra line in
  // next() method.
  if (start != 0) {
    start += in.readLine(new Text(), 0, maxBytesToConsume(start));
  }
  this.pos = start;
}
项目:hadoopoffice    文件:AbstractSpreadSheetDocumentRecordReader.java   
/**
* Creates an Abstract Record Reader for tables from various document formats
* @param split Split to use (assumed to be a file split)
* @param job Configuration:
* hadoopoffice.read.mimeType: Mimetype of the document
* hadoopoffice.read.locale: Locale of the document (e.g. needed for interpreting spreadsheets) in the BCP47 format (cf. https://tools.ietf.org/html/bcp47). If not specified then default system locale will be used.
* hadoopoffice.read.sheets: A ":" separated list of sheets to be read. If not specified then all sheets will be read one after the other
* hadoopoffice.read.linkedworkbooks: true if linkedworkbooks should be fetched. They must be in the same folder as the main workbook. Linked Workbooks will be processed together with the main workbook on one node and thus it should be avoided to have a lot of linked workbooks. It does only read the linked workbooks that are directly linked to the main workbook. Default: false
* hadoopoffice.read.ignoremissinglinkedworkbooks: true if missing linked workbooks should be ignored. Default: false
* hadoopoffice.read.security.crypt.password: if set then hadoopoffice will try to decrypt the file
* hadoopoffice.read.security.crypt.linkedworkbooks.*: if set then hadoopoffice will try to decrypt all the linked workbooks where a password has been specified. If no password is specified then it is assumed that the linked workbook is not encrypted. Example: Property key for file "linkedworkbook1.xlsx" is  "hadoopoffice.read.security.crypt.linkedworkbooks.linkedworkbook1.xslx". Value is the password. You must not include path or protocol information in the filename 
* hadoopoffice.read.filter.metadata: filters documents according to metadata. For example, hadoopoffice.read.filter.metadata.author will filter by author and the filter defined as value. Filtering is done by the parser and it is recommended that it supports regular expression for filtering, but this is up to the parser!
* @param reporter Reporter
*
* @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
* @throws org.zuinnote.hadoop.office.format.common.parser.FormatNotUnderstoodException in case the document has an invalid format
*
*/
public AbstractSpreadSheetDocumentRecordReader(FileSplit split, JobConf job, Reporter reporter) throws IOException,FormatNotUnderstoodException,GeneralSecurityException {
    // parse configuration
     this.conf=job; 
     this.reporter=reporter;
     this.reporter.setStatus("Initialize Configuration");
     this.hocr=new HadoopOfficeReadConfiguration(this.conf);
  // Initialize start and end of split
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    this.hocr.setFileName(file.getName());
    this.readKeyStore(job);
     compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);
    FSDataInputStream fileIn = file.getFileSystem(job).open(file);
    // open stream
      if (isCompressedInput()) { // decompress
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
        LOG.debug("Reading from a compressed file \""+file+"\" with splittable compression codec");
            final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
        officeReader = new OfficeReader(cIn, this.hocr);  
        start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn; // take pos from compressed stream
      } else {
    LOG.debug("Reading from a compressed file \""+file+"\" with non-splittable compression codec");
    officeReader = new OfficeReader(codec.createInputStream(fileIn,decompressor), this.hocr);
        filePosition = fileIn;
      }
    } else {
    LOG.debug("Reading from an uncompressed file \""+file+"\"");
      fileIn.seek(start);
    officeReader = new OfficeReader(fileIn, this.hocr);  
      filePosition = fileIn;
    }
     this.reporter.setStatus("Parsing document");
    // initialize reader
    this.officeReader.parse();
    // read linked workbooks
    this.reporter.setStatus("Reading linked documents");
    if (this.hocr.getReadLinkedWorkbooks()) {
    // get current path
    Path currentPath = split.getPath();
    Path parentPath = currentPath.getParent();
    // read linked workbook filenames
    List<String> linkedWorkbookList=this.officeReader.getCurrentParser().getLinkedWorkbooks();
    this.currentHFR = new HadoopFileReader(job);
    for (String listItem: linkedWorkbookList) {
        LOG.info("Adding linked workbook \""+listItem+"\"");
        String sanitizedListItem = new Path(listItem).getName();
        // read file from hadoop file
        Path currentFile=new Path(parentPath,sanitizedListItem);
        InputStream currentIn=this.currentHFR.openFile(currentFile);
        this.officeReader.getCurrentParser().addLinkedWorkbook(listItem,currentIn,this.hocr.getLinkedWBCredentialMap().get(sanitizedListItem));
    }
    }
}
项目:hadoopoffice    文件:AbstractSpreadSheetDocumentRecordReader.java   
/**
* Initializes reader
* @param split Split to use (assumed to be a file split)
* @param context context of the job
*
*
* @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
* @throws java.lang.InterruptedException in case of thread interruption
*
*/
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
try {
   FileSplit fSplit = (FileSplit)split;
 // Initialize start and end of split
    start = fSplit.getStart();
    end = start + fSplit.getLength();
    final Path file = fSplit.getPath();
    codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    this.hocr.setFileName(file.getName());
    this.readKeyStore(context.getConfiguration());
    FSDataInputStream fileIn = file.getFileSystem(conf).open(file);
    // open stream
      if (isCompressedInput()) { // decompress
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
        LOG.debug("Reading from a compressed file \""+file+"\" with splittable compression codec");
            final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
        officeReader = new OfficeReader(cIn, this.hocr);  
        start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn; // take pos from compressed stream
      } else {
    LOG.debug("Reading from a compressed file \""+file+"\" with non-splittable compression codec");
    officeReader = new OfficeReader(codec.createInputStream(fileIn,decompressor), this.hocr);
        filePosition = fileIn;
      }
    } else {
    LOG.debug("Reading from an uncompressed file \""+file+"\"");
      fileIn.seek(start);
    officeReader = new OfficeReader(fileIn, this.hocr);  
      filePosition = fileIn;
    }
    // initialize reader
    this.officeReader.parse();
    // read linked workbooks
    if (this.hocr.getReadLinkedWorkbooks()) {
    // get current path
    Path currentPath = fSplit.getPath();
    Path parentPath = currentPath.getParent();
    // read linked workbook filenames
    List<String> linkedWorkbookList=this.officeReader.getCurrentParser().getLinkedWorkbooks();
    LOG.debug(linkedWorkbookList.size());
    this.currentHFR = new HadoopFileReader(context.getConfiguration());
    for (String listItem: linkedWorkbookList) {
        LOG.info("Adding linked workbook \""+listItem+"\"");
        String sanitizedListItem = new Path(listItem).getName();
        // read file from hadoop file
        Path currentFile=new Path(parentPath,sanitizedListItem);
        InputStream currentIn=this.currentHFR.openFile(currentFile);
        this.officeReader.getCurrentParser().addLinkedWorkbook(listItem,currentIn,this.hocr.getLinkedWBCredentialMap().get(sanitizedListItem));
    }
    }
} catch (FormatNotUnderstoodException fnue) {
    LOG.error(fnue);    
    this.close();
    throw new InterruptedException();
}  
}
项目:hadoopcryptoledger    文件:AbstractEthereumRecordReader.java   
/**
* Creates an Abstract Record Reader for Ethereum blocks
* @param split Split to use (assumed to be a file split)
* @param job Configuration:
 * io.file.buffer.size: Size of in-memory  specified in the given Configuration. If io.file.buffer.size is not specified the default buffersize will be used. Furthermore, one may specify hadoopcryptoledger.ethereumblockinputformat.maxblocksize, which defines the maximum size a Ethereum block may have. By default it is 1M). If you want to experiment with performance using DirectByteBuffer instead of HeapByteBuffer you can use "hadoopcryptoledeger.ethereumblockinputformat.usedirectbuffer" (default: false). Note that it might have some unwanted consequences such as circumwenting Yarn memory management. The option is experimental and might be removed in future versions. 
* @param reporter Reporter
*
*
* @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
*
*/
public AbstractEthereumRecordReader(FileSplit split,JobConf job, Reporter reporter) throws IOException {
    LOG.debug("Reading configuration");
    // parse configuration
     this.reporter=reporter;
     this.conf=job; 
    this.maxSizeEthereumBlock=conf.getInt(AbstractEthereumRecordReader.CONF_MAXBLOCKSIZE,AbstractEthereumRecordReader.DEFAULT_MAXSIZE_ETHEREUMBLOCK);
    this.bufferSize=conf.getInt(AbstractEthereumRecordReader.CONF_BUFFERSIZE,AbstractEthereumRecordReader.DEFAULT_BUFFERSIZE);

    this.useDirectBuffer=conf.getBoolean(AbstractEthereumRecordReader.CONF_USEDIRECTBUFFER,AbstractEthereumRecordReader.DEFAULT_USEDIRECTBUFFER);
    // Initialize start and end of split
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    codec = new CompressionCodecFactory(job).getCodec(file);
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    // open stream
      if (isCompressedInput()) { // decompress
    LOG.debug("Decompressing file");
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
        LOG.debug("SplittableCompressionCodec");
            final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
            ebr = new EthereumBlockReader(cIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn; // take pos from compressed stream
      } else {
    LOG.debug("Not-splitable compression codec");
    ebr = new EthereumBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
            filePosition = fileIn;
      }
    } else {
      LOG.debug("Processing file without compression");
      fileIn.seek(start);
      ebr = new EthereumBlockReader(fileIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
         filePosition = fileIn;
    }
    // initialize reader

    this.reporter.setStatus("Ready to read");
}
项目:hadoopcryptoledger    文件:AbstractBitcoinRecordReader.java   
/**
* Creates an Abstract Record Reader for Bitcoin blocks
* @param split Split to use (assumed to be a file split)
* @param job Configuration:
* io.file.buffer.size: Size of in-memory  specified in the given Configuration. If io.file.buffer.size is not specified the default buffersize (maximum size of a bitcoin block) will be used. The configuration hadoopcryptoledger.bitcoinblockinputformat.filter.magic allows specifying the magic identifier of the block. The magic is a comma-separated list of Hex-values (e.g. F9BEB4D9,FABFB5DA,0B110907,0B110907). The default magic is always F9BEB4D9. One needs to specify at least one magic, otherwise it will be difficult to find blocks in splits. Furthermore, one may specify hadoopcryptoledger.bitcoinblockinputformat.maxblocksize, which defines the maximum size a bitcoin block may have. By default it is 8M). If you want to experiment with performance using DirectByteBuffer instead of HeapByteBuffer you can use "hadoopcryptoledeger.bitcoinblockinputformat.usedirectbuffer" (default: false). Note that it might have some unwanted consequences such as circumwenting Yarn memory management. The option is experimental and might be removed in future versions. 
* @param reporter Reporter
*
*
* @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
* @throws org.zuinnote.hadoop.bitcoin.format.exception.HadoopCryptoLedgerConfigurationException in case of an invalid HadoopCryptoLedger-specific configuration of the inputformat
* @throws org.zuinnote.hadoop.bitcoin.format.exception.BitcoinBlockReadException in case the Bitcoin data contains invalid blocks (e.g. magic might be different)
*
*/
public AbstractBitcoinRecordReader(FileSplit split,JobConf job, Reporter reporter) throws IOException,HadoopCryptoLedgerConfigurationException,BitcoinBlockReadException {
    LOG.debug("Reading configuration");
    // parse configuration
     this.reporter=reporter;
     this.conf=job; 
    this.maxSizeBitcoinBlock=conf.getInt(AbstractBitcoinRecordReader.CONF_MAXBLOCKSIZE,AbstractBitcoinRecordReader.DEFAULT_MAXSIZE_BITCOINBLOCK);
    this.bufferSize=conf.getInt(AbstractBitcoinRecordReader.CONF_BUFFERSIZE,AbstractBitcoinRecordReader.DEFAULT_BUFFERSIZE);
    this.specificMagic=conf.get(AbstractBitcoinRecordReader.CONF_FILTERMAGIC);
    // we need to provide at least 
    if ((this.specificMagic==null) || (this.specificMagic.length()==0)) {
         this.specificMagic=AbstractBitcoinRecordReader.DEFAULT_MAGIC;
    }
    if ((this.specificMagic!=null) && (this.specificMagic.length()>0)) {
        this.specificMagicStringArray=specificMagic.split(",");
        specificMagicByteArray=new byte[specificMagicStringArray.length][4]; // each magic is always 4 byte
        for (int i=0;i<specificMagicStringArray.length;i++) {
                byte[] currentMagicNo=BitcoinUtil.convertHexStringToByteArray(specificMagicStringArray[i]);
                if (currentMagicNo.length!=4) {
                    throw new HadoopCryptoLedgerConfigurationException("Error: Configuration. Magic number has not a length of 4 bytes. Index: "+i);
                }
                specificMagicByteArray[i]=currentMagicNo;
        }
    }   
    this.useDirectBuffer=conf.getBoolean(AbstractBitcoinRecordReader.CONF_USEDIRECTBUFFER,AbstractBitcoinRecordReader.DEFAULT_USEDIRECTBUFFER);
    this.readAuxPOW=conf.getBoolean(AbstractBitcoinRecordReader.CONF_READAUXPOW,AbstractBitcoinRecordReader.DEFAULT_READAUXPOW);
    // Initialize start and end of split
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    codec = new CompressionCodecFactory(job).getCodec(file);
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    // open stream
      if (isCompressedInput()) { // decompress
    LOG.debug("Decompressing file");
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
        LOG.debug("SplittableCompressionCodec");
            final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
        bbr = new BitcoinBlockReader(cIn, this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,this.readAuxPOW);  
        start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn; // take pos from compressed stream
      } else {
    LOG.debug("Not-splitable compression codec");
    bbr = new BitcoinBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,this.readAuxPOW);
        filePosition = fileIn;
      }
    } else {
      LOG.debug("Processing file without compression");
      fileIn.seek(start);
      bbr = new BitcoinBlockReader(fileIn, this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,this.readAuxPOW);  
      filePosition = fileIn;
    }
    // initialize reader
    // seek to block start (for the case a block overlaps a split)
    LOG.debug("Seeking to block start");
    this.reporter.setStatus("Seeking Block start");
    bbr.seekBlockStart();
    this.reporter.setStatus("Ready to read");
}
项目:hadoop-2.6.0-cdh5.4.3    文件:LineRecordReader.java   
public void initialize(InputSplit genericSplit,
                       TaskAttemptContext context) throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();
  this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();

  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(file);

  CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
  if (null!=codec) {
    isCompressedInput = true;   
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn, decompressor, start, end,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
      in = new CompressedSplitLineReader(cIn, job,
          this.recordDelimiterBytes);
      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn;
    } else {
      in = new SplitLineReader(codec.createInputStream(fileIn,
          decompressor), job, this.recordDelimiterBytes);
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
    filePosition = fileIn;
  }
  // If this is not the first split, we always throw away first record
  // because we always (except the last split) read one extra line in
  // next() method.
  if (start != 0) {
    start += in.readLine(new Text(), 0, maxBytesToConsume(start));
  }
  this.pos = start;
}
项目:hadoop-2.6.0-cdh5.4.3    文件:LineRecordReader.java   
public void initialize(InputSplit genericSplit,
                       TaskAttemptContext context) throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();
  this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength",
                                  Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();
  compressionCodecs = new CompressionCodecFactory(job);
  codec = compressionCodecs.getCodec(file);

  // open the file and seek to the start of the split
  FileSystem fs = file.getFileSystem(job);
  FSDataInputStream fileIn = fs.open(split.getPath());

  if (isCompressedInput()) {
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn, decompressor, start, end,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
      in = new CompressedSplitLineReader(cIn, job,
          this.recordDelimiterBytes);
      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn;
    } else {
      in = new SplitLineReader(codec.createInputStream(fileIn,
          decompressor), job, this.recordDelimiterBytes);
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
    filePosition = fileIn;
  }
  // If this is not the first split, we always throw away first record
  // because we always (except the last split) read one extra line in
  // next() method.
  if (start != 0) {
    start += in.readLine(new Text(), 0, maxBytesToConsume(start));
  }
  this.pos = start;
}
项目:hadoop-plus    文件:LineRecordReader.java   
public LineRecordReader(Configuration job, FileSplit split,
    byte[] recordDelimiter) throws IOException {
  this.maxLineLength = job.getInt(org.apache.hadoop.mapreduce.lib.input.
    LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();
  compressionCodecs = new CompressionCodecFactory(job);
  codec = compressionCodecs.getCodec(file);

  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(file);
  if (isCompressedInput()) {
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn, decompressor, start, end,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
      in = new LineReader(cIn, job, recordDelimiter);
      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn; // take pos from compressed stream
    } else {
      in = new LineReader(codec.createInputStream(fileIn, decompressor), job, recordDelimiter);
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    in = new LineReader(fileIn, job, recordDelimiter);
    filePosition = fileIn;
  }
  // If this is not the first split, we always throw away first record
  // because we always (except the last split) read one extra line in
  // next() method.
  if (start != 0) {
    start += in.readLine(new Text(), 0, maxBytesToConsume(start));
  }
  this.pos = start;
}
项目:hadoop-xz    文件:XZCodec.java   
@Override
public SplitCompressionInputStream createInputStream(InputStream seekableIn, Decompressor decompressor, long start, long end, READ_MODE readMode) throws IOException {
    return new XZSplitCompressionInputStream(seekableIn, start, end, readMode);
}
项目:FlexMap    文件:LineRecordReader.java   
public void initialize(InputSplit genericSplit,
                       TaskAttemptContext context) throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();
  this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();

  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(file);

  CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
  if (null!=codec) {
    isCompressedInput = true;   
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn, decompressor, start, end,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
      in = new CompressedSplitLineReader(cIn, job,
          this.recordDelimiterBytes);
      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn;
    } else {
      in = new SplitLineReader(codec.createInputStream(fileIn,
          decompressor), job, this.recordDelimiterBytes);
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
    filePosition = fileIn;
  }
  // If this is not the first split, we always throw away first record
  // because we always (except the last split) read one extra line in
  // next() method.
  if (start != 0) {
    start += in.readLine(new Text(), 0, maxBytesToConsume(start));
  }
  this.pos = start;
}
项目:hops    文件:LineRecordReader.java   
public void initialize(InputSplit genericSplit,
                       TaskAttemptContext context) throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();
  this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();

  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(file);

  CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
  if (null!=codec) {
    isCompressedInput = true;
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn, decompressor, start, end,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
      in = new CompressedSplitLineReader(cIn, job,
          this.recordDelimiterBytes);
      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn;
    } else {
      if (start != 0) {
        // So we have a split that is only part of a file stored using
        // a Compression codec that cannot be split.
        throw new IOException("Cannot seek in " +
            codec.getClass().getSimpleName() + " compressed stream");
      }

      in = new SplitLineReader(codec.createInputStream(fileIn,
          decompressor), job, this.recordDelimiterBytes);
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    in = new UncompressedSplitLineReader(
        fileIn, job, this.recordDelimiterBytes, split.getLength());
    filePosition = fileIn;
  }
  // If this is not the first split, we always throw away first record
  // because we always (except the last split) read one extra line in
  // next() method.
  if (start != 0) {
    start += in.readLine(new Text(), 0, maxBytesToConsume(start));
  }
  this.pos = start;
}
项目:hadoop-TCP    文件:LineRecordReader.java   
public LineRecordReader(Configuration job, FileSplit split,
    byte[] recordDelimiter) throws IOException {
  this.maxLineLength = job.getInt(org.apache.hadoop.mapreduce.lib.input.
    LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();
  compressionCodecs = new CompressionCodecFactory(job);
  codec = compressionCodecs.getCodec(file);

  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(file);
  if (isCompressedInput()) {
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn, decompressor, start, end,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
      in = new LineReader(cIn, job, recordDelimiter);
      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn; // take pos from compressed stream
    } else {
      in = new LineReader(codec.createInputStream(fileIn, decompressor), job, recordDelimiter);
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    in = new LineReader(fileIn, job, recordDelimiter);
    filePosition = fileIn;
  }
  // If this is not the first split, we always throw away first record
  // because we always (except the last split) read one extra line in
  // next() method.
  if (start != 0) {
    start += in.readLine(new Text(), 0, maxBytesToConsume(start));
  }
  this.pos = start;
}
项目:hadoop-TCP    文件:LineRecordReader.java   
public void initialize(InputSplit genericSplit,
                       TaskAttemptContext context) throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();
  this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();

  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(file);

  CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
  if (null!=codec) {
    isCompressedInput = true;   
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn, decompressor, start, end,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
      if (null == this.recordDelimiterBytes){
        in = new LineReader(cIn, job);
      } else {
        in = new LineReader(cIn, job, this.recordDelimiterBytes);
      }

      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn;
    } else {
      if (null == this.recordDelimiterBytes) {
        in = new LineReader(codec.createInputStream(fileIn, decompressor),
            job);
      } else {
        in = new LineReader(codec.createInputStream(fileIn,
            decompressor), job, this.recordDelimiterBytes);
      }
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    if (null == this.recordDelimiterBytes){
      in = new LineReader(fileIn, job);
    } else {
      in = new LineReader(fileIn, job, this.recordDelimiterBytes);
    }

    filePosition = fileIn;
  }
  // If this is not the first split, we always throw away first record
  // because we always (except the last split) read one extra line in
  // next() method.
  if (start != 0) {
    start += in.readLine(new Text(), 0, maxBytesToConsume(start));
  }
  this.pos = start;
}
项目:hardfs    文件:LineRecordReader.java   
public LineRecordReader(Configuration job, FileSplit split,
    byte[] recordDelimiter) throws IOException {
  this.maxLineLength = job.getInt(org.apache.hadoop.mapreduce.lib.input.
    LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();
  compressionCodecs = new CompressionCodecFactory(job);
  codec = compressionCodecs.getCodec(file);

  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(file);
  if (isCompressedInput()) {
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn, decompressor, start, end,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
      in = new LineReader(cIn, job, recordDelimiter);
      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn; // take pos from compressed stream
    } else {
      in = new LineReader(codec.createInputStream(fileIn, decompressor), job, recordDelimiter);
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    in = new LineReader(fileIn, job, recordDelimiter);
    filePosition = fileIn;
  }
  // If this is not the first split, we always throw away first record
  // because we always (except the last split) read one extra line in
  // next() method.
  if (start != 0) {
    start += in.readLine(new Text(), 0, maxBytesToConsume(start));
  }
  this.pos = start;
}
项目:hardfs    文件:LineRecordReader.java   
public void initialize(InputSplit genericSplit,
                       TaskAttemptContext context) throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();
  this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();

  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(file);

  CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
  if (null!=codec) {
    isCompressedInput = true;   
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn, decompressor, start, end,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
      if (null == this.recordDelimiterBytes){
        in = new LineReader(cIn, job);
      } else {
        in = new LineReader(cIn, job, this.recordDelimiterBytes);
      }

      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn;
    } else {
      if (null == this.recordDelimiterBytes) {
        in = new LineReader(codec.createInputStream(fileIn, decompressor),
            job);
      } else {
        in = new LineReader(codec.createInputStream(fileIn,
            decompressor), job, this.recordDelimiterBytes);
      }
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    if (null == this.recordDelimiterBytes){
      in = new LineReader(fileIn, job);
    } else {
      in = new LineReader(fileIn, job, this.recordDelimiterBytes);
    }

    filePosition = fileIn;
  }
  // If this is not the first split, we always throw away first record
  // because we always (except the last split) read one extra line in
  // next() method.
  if (start != 0) {
    start += in.readLine(new Text(), 0, maxBytesToConsume(start));
  }
  this.pos = start;
}