Java 类org.apache.hadoop.io.compress.BZip2Codec 实例源码

项目:hadoopcryptoledger    文件:BitcoinFormatHadoopTest.java   
@Test
 public void readBitcoinRawBlockInputFormatBzip2Compressed() throws IOException {
   JobConf job = new JobConf(defaultConf);
   CompressionCodec bzip2 = new BZip2Codec();
   ReflectionUtils.setConf(bzip2, job);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version4comp.blk.bz2";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();    
   Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
   format.configure(job);
   InputSplit[] inputSplits = format.getSplits(job,1);
   assertEquals( 1, inputSplits.length,"Only one split generated for compressed block");
    RecordReader<BytesWritable, BytesWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned  null RecordReader");
BytesWritable key = new BytesWritable();    
BytesWritable block = new BytesWritable();
assertTrue( reader.next(key,block),"Input Split for block version contains at least one block");
assertEquals( 998039, block.getLength(),"Compressed block must have a size of 998.039 bytes");
BytesWritable emptyKey = new BytesWritable();
    BytesWritable emptyBlock = new BytesWritable();
    assertFalse( reader.next(emptyKey,emptyBlock),"No further blocks in compressed block");
reader.close();
 }
项目:hadoopcryptoledger    文件:BitcoinFormatHadoopTest.java   
@Test
 public void readBitcoinTransactionInputFormatBzip2Compressed() throws IOException {
     JobConf job = new JobConf(defaultConf);
   CompressionCodec bzip2 = new BZip2Codec();
   ReflectionUtils.setConf(bzip2, job);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version4comp.blk.bz2";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();    
   Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   BitcoinTransactionFileInputFormat format = new BitcoinTransactionFileInputFormat();
   format.configure(job);
   InputSplit[] inputSplits = format.getSplits(job,1);
   assertEquals( 1, inputSplits.length,"Only one split generated for compressed block");
    RecordReader<BytesWritable, BitcoinTransaction> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned  null RecordReader");
BytesWritable key = new BytesWritable();    
BitcoinTransaction transaction = new BitcoinTransaction();
int transactCount=0;
while (reader.next(key,transaction)) {
    transactCount++;
}
    assertEquals( 936, transactCount,"Compressed block must have at least 936 transactions");
reader.close();
 }
项目:hadoopcryptoledger    文件:BitcoinFormatHadoopTest.java   
@Test
 public void readBitcoinRawBlockInputFormatBzip2Compressed() throws IOException, InterruptedException {
Configuration conf = new Configuration(defaultConf);
   Job job = Job.getInstance(conf);
   CompressionCodec bzip2 = new BZip2Codec();
   ReflectionUtils.setConf(bzip2, conf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version4comp.blk.bz2";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();    
   Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
   List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for compressed block");
    RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable key = new BytesWritable();    
BytesWritable block = new BytesWritable();
assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block");
block=reader.getCurrentValue();
assertEquals( 998039, block.getLength(),"Compressed block must have a size of 998.039 bytes");
    assertFalse( reader.nextKeyValue(),"No further blocks in compressed block");
reader.close();
 }
项目:hadoopcryptoledger    文件:BitcoinFormatHadoopTest.java   
@Test
 public void readBitcoinTransactionInputFormatBzip2Compressed() throws IOException, InterruptedException {
Configuration conf = new Configuration(defaultConf);
   Job job = Job.getInstance(conf);
   CompressionCodec bzip2 = new BZip2Codec();
   ReflectionUtils.setConf(bzip2, conf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version4comp.blk.bz2";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();    
   Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   BitcoinTransactionFileInputFormat format = new BitcoinTransactionFileInputFormat();
   List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for compressed block");
    RecordReader<BytesWritable, BitcoinTransaction> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
int transactCount=0;
while (reader.nextKeyValue()) {
    transactCount++;
}
    assertEquals( 936, transactCount,"Compressed block must have at least 936 transactions");
reader.close();
 }
项目:pregelix    文件:DataBalancer.java   
public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(DataBalancer.class);

    job.setJobName(DataBalancer.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(Integer.parseInt(args[2]));

    if (args.length > 3) {
        if (args[3].startsWith("bzip"))
            FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
        if (args[3].startsWith("gz"))
            FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    }
    JobClient.runJob(job);
}
项目:hadoop-in-action    文件:SortDataPreprocessor.java   
@Override
public int run(String[] args) throws Exception {
    Job job = JobBuilder.parseInputAndOutput(this, super.getConf(), args);
    if (job == null) {
        return -1;
    }

    job.setMapperClass(CleanerMapper.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job,
        CompressionType.BLOCK);

    return job.waitForCompletion(true) ? 0 : 1;
}
项目:hadoop-in-action    文件:SortByTemperatureToMapFile.java   
@Override
public int run(String[] args) throws Exception {
    Job job = JobBuilder.parseInputAndOutput(this, getConf(), args);
    if (job == null) {
        return -1;
    }

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    return job.waitForCompletion(true) ? 0 : 1;
}
项目:hadoop    文件:TestLineRecordReader.java   
@Test
public void testMultipleClose() throws IOException {
  URL testFileUrl = getClass().getClassLoader().
      getResource("recordSpanningMultipleSplits.txt.bz2");
  assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
      testFileUrl);
  File testFile = new File(testFileUrl.getFile());
  Path testFilePath = new Path(testFile.getAbsolutePath());
  long testFileSize = testFile.length();
  Configuration conf = new Configuration();
  conf.setInt(org.apache.hadoop.mapreduce.lib.input.
      LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
  FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
      (String[])null);

  LineRecordReader reader = new LineRecordReader(conf, split);
  LongWritable key = new LongWritable();
  Text value = new Text();
  //noinspection StatementWithEmptyBody
  while (reader.next(key, value)) ;
  reader.close();
  reader.close();

  BZip2Codec codec = new BZip2Codec();
  codec.setConf(conf);
  Set<Decompressor> decompressors = new HashSet<Decompressor>();
  for (int i = 0; i < 10; ++i) {
    decompressors.add(CodecPool.getDecompressor(codec));
  }
  assertEquals(10, decompressors.size());
}
项目:hadoop    文件:TestLineRecordReader.java   
@Test
public void testMultipleClose() throws IOException {
  URL testFileUrl = getClass().getClassLoader().
      getResource("recordSpanningMultipleSplits.txt.bz2");
  assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
      testFileUrl);
  File testFile = new File(testFileUrl.getFile());
  Path testFilePath = new Path(testFile.getAbsolutePath());
  long testFileSize = testFile.length();
  Configuration conf = new Configuration();
  conf.setInt(org.apache.hadoop.mapreduce.lib.input.
      LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
  TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

  // read the data and check whether BOM is skipped
  FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
  LineRecordReader reader = new LineRecordReader();
  reader.initialize(split, context);

  //noinspection StatementWithEmptyBody
  while (reader.nextKeyValue()) ;
  reader.close();
  reader.close();

  BZip2Codec codec = new BZip2Codec();
  codec.setConf(conf);
  Set<Decompressor> decompressors = new HashSet<Decompressor>();
  for (int i = 0; i < 10; ++i) {
    decompressors.add(CodecPool.getDecompressor(codec));
  }
  assertEquals(10, decompressors.size());
}
项目:aliyun-oss-hadoop-fs    文件:TestLineRecordReader.java   
@Test
public void testMultipleClose() throws IOException {
  URL testFileUrl = getClass().getClassLoader().
      getResource("recordSpanningMultipleSplits.txt.bz2");
  assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
      testFileUrl);
  File testFile = new File(testFileUrl.getFile());
  Path testFilePath = new Path(testFile.getAbsolutePath());
  long testFileSize = testFile.length();
  Configuration conf = new Configuration();
  conf.setInt(org.apache.hadoop.mapreduce.lib.input.
      LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
  FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
      (String[])null);

  LineRecordReader reader = new LineRecordReader(conf, split);
  LongWritable key = new LongWritable();
  Text value = new Text();
  //noinspection StatementWithEmptyBody
  while (reader.next(key, value)) ;
  reader.close();
  reader.close();

  BZip2Codec codec = new BZip2Codec();
  codec.setConf(conf);
  Set<Decompressor> decompressors = new HashSet<Decompressor>();
  for (int i = 0; i < 10; ++i) {
    decompressors.add(CodecPool.getDecompressor(codec));
  }
  assertEquals(10, decompressors.size());
}
项目:aliyun-oss-hadoop-fs    文件:TestLineRecordReader.java   
@Test
public void testMultipleClose() throws IOException {
  URL testFileUrl = getClass().getClassLoader().
      getResource("recordSpanningMultipleSplits.txt.bz2");
  assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
      testFileUrl);
  File testFile = new File(testFileUrl.getFile());
  Path testFilePath = new Path(testFile.getAbsolutePath());
  long testFileSize = testFile.length();
  Configuration conf = new Configuration();
  conf.setInt(org.apache.hadoop.mapreduce.lib.input.
      LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
  TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

  // read the data and check whether BOM is skipped
  FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
  LineRecordReader reader = new LineRecordReader();
  reader.initialize(split, context);

  //noinspection StatementWithEmptyBody
  while (reader.nextKeyValue()) ;
  reader.close();
  reader.close();

  BZip2Codec codec = new BZip2Codec();
  codec.setConf(conf);
  Set<Decompressor> decompressors = new HashSet<Decompressor>();
  for (int i = 0; i < 10; ++i) {
    decompressors.add(CodecPool.getDecompressor(codec));
  }
  assertEquals(10, decompressors.size());
}
项目:big-c    文件:TestLineRecordReader.java   
@Test
public void testMultipleClose() throws IOException {
  URL testFileUrl = getClass().getClassLoader().
      getResource("recordSpanningMultipleSplits.txt.bz2");
  assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
      testFileUrl);
  File testFile = new File(testFileUrl.getFile());
  Path testFilePath = new Path(testFile.getAbsolutePath());
  long testFileSize = testFile.length();
  Configuration conf = new Configuration();
  conf.setInt(org.apache.hadoop.mapreduce.lib.input.
      LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
  FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
      (String[])null);

  LineRecordReader reader = new LineRecordReader(conf, split);
  LongWritable key = new LongWritable();
  Text value = new Text();
  //noinspection StatementWithEmptyBody
  while (reader.next(key, value)) ;
  reader.close();
  reader.close();

  BZip2Codec codec = new BZip2Codec();
  codec.setConf(conf);
  Set<Decompressor> decompressors = new HashSet<Decompressor>();
  for (int i = 0; i < 10; ++i) {
    decompressors.add(CodecPool.getDecompressor(codec));
  }
  assertEquals(10, decompressors.size());
}
项目:big-c    文件:TestLineRecordReader.java   
@Test
public void testMultipleClose() throws IOException {
  URL testFileUrl = getClass().getClassLoader().
      getResource("recordSpanningMultipleSplits.txt.bz2");
  assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
      testFileUrl);
  File testFile = new File(testFileUrl.getFile());
  Path testFilePath = new Path(testFile.getAbsolutePath());
  long testFileSize = testFile.length();
  Configuration conf = new Configuration();
  conf.setInt(org.apache.hadoop.mapreduce.lib.input.
      LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
  TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

  // read the data and check whether BOM is skipped
  FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
  LineRecordReader reader = new LineRecordReader();
  reader.initialize(split, context);

  //noinspection StatementWithEmptyBody
  while (reader.nextKeyValue()) ;
  reader.close();
  reader.close();

  BZip2Codec codec = new BZip2Codec();
  codec.setConf(conf);
  Set<Decompressor> decompressors = new HashSet<Decompressor>();
  for (int i = 0; i < 10; ++i) {
    decompressors.add(CodecPool.getDecompressor(codec));
  }
  assertEquals(10, decompressors.size());
}
项目:hadoopcryptoledger    文件:BitcoinFormatHadoopTest.java   
@Test
 public void readBitcoinBlockInputFormatBzip2Compressed() throws IOException {
   JobConf job = new JobConf(defaultConf);
   CompressionCodec bzip2 = new BZip2Codec();
   ReflectionUtils.setConf(bzip2, job);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version4comp.blk.bz2";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();    
   Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   BitcoinBlockFileInputFormat format = new BitcoinBlockFileInputFormat();
   format.configure(job);
   InputSplit[] inputSplits = format.getSplits(job,1);
   assertEquals( 1, inputSplits.length,"Only one split generated for compressed block");
    RecordReader<BytesWritable, BitcoinBlock> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned  null RecordReader");
BytesWritable key = new BytesWritable();    
BitcoinBlock block = new BitcoinBlock();
assertTrue( reader.next(key,block),"Input Split for block version contains at least one block");
assertEquals( 936, block.getTransactions().size(),"Compressed block must have at least 936 transactions");
assertEquals( 4, block.getTransactions().get(0).getListOfInputs().get(0).getTxInScript().length,"Compressed block must contain exactly 936 transactions of which the first has one input and script length 4");
assertEquals( 2, block.getTransactions().get(0).getListOfOutputs().size(),"Compressed block must contain exactly 936 transactions of which the first has two outputs");
assertEquals( 25, block.getTransactions().get(0).getListOfOutputs().get(0).getTxOutScript().length,"Compressed block must contain exactly 936 transactions of which the first has two output and the first output script length 25");
BytesWritable emptyKey = new BytesWritable();
    BitcoinBlock emptyBlock = new BitcoinBlock();
    assertFalse( reader.next(emptyKey,emptyBlock),"No further blocks in compressed block");
reader.close();
 }
项目:hadoopcryptoledger    文件:BitcoinFormatHadoopTest.java   
@Test
 public void readBitcoinBlockInputFormatBzip2Compressed() throws IOException, InterruptedException {
Configuration conf = new Configuration(defaultConf);
   Job job = Job.getInstance(conf);
   CompressionCodec bzip2 = new BZip2Codec();
   ReflectionUtils.setConf(bzip2, conf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version4comp.blk.bz2";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();    
   Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   BitcoinBlockFileInputFormat format = new BitcoinBlockFileInputFormat();
   List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for compressed block");
    RecordReader<BytesWritable, BitcoinBlock> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable key = new BytesWritable();    
BitcoinBlock block = new BitcoinBlock();
assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block");
block=reader.getCurrentValue();
assertEquals( 936, block.getTransactions().size(),"Compressed block must have at least 936 transactions");
assertEquals( 4, block.getTransactions().get(0).getListOfInputs().get(0).getTxInScript().length,"Compressed block must contain exactly 936 transactions of which the first has one input and script length 4");
assertEquals( 2, block.getTransactions().get(0).getListOfOutputs().size(),"Compressed block must contain exactly 936 transactions of which the first has two outputs");
assertEquals( 25, block.getTransactions().get(0).getListOfOutputs().get(0).getTxOutScript().length,"Compressed block must contain exactly 936 transactions of which the first has two output and the first output script length 25");
    assertFalse( reader.nextKeyValue(),"No further blocks in compressed block");
reader.close();
 }
项目:sqoop-on-spark    文件:TestPartitioner.java   
@DataProvider(name="test-hdfs-partitioner")
public static Object[][] data() {
  List<Object[]> parameters = new ArrayList<Object[]>();
  for (Class<?> compressionClass : new Class<?>[]{null, DefaultCodec.class, BZip2Codec.class}) {
    for (Object outputFileType : new Object[]{TEXT_FILE, SEQUENCE_FILE}) {
      parameters.add(new Object[]{outputFileType, compressionClass});
    }
  }
  return parameters.toArray(new Object[0][]);
}
项目:sqoop-on-spark    文件:TestExtractor.java   
@DataProvider(name="test-hdfs-extractor")
public static Object[][] data() {
  List<Object[]> parameters = new ArrayList<Object[]>();
  for (Class<?> compressionClass : new Class<?>[]{null, DefaultCodec.class, BZip2Codec.class}) {
    for (Object outputFileType : new Object[]{TEXT_FILE, SEQUENCE_FILE}) {
      parameters.add(new Object[]{outputFileType, compressionClass});
    }
  }
  return parameters.toArray(new Object[0][]);
}
项目:hops    文件:TestLineRecordReader.java   
@Test
public void testMultipleClose() throws IOException {
  URL testFileUrl = getClass().getClassLoader().
      getResource("recordSpanningMultipleSplits.txt.bz2");
  assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
      testFileUrl);
  File testFile = new File(testFileUrl.getFile());
  Path testFilePath = new Path(testFile.getAbsolutePath());
  long testFileSize = testFile.length();
  Configuration conf = new Configuration();
  conf.setInt(org.apache.hadoop.mapreduce.lib.input.
      LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
  FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
      (String[])null);

  LineRecordReader reader = new LineRecordReader(conf, split);
  LongWritable key = new LongWritable();
  Text value = new Text();
  //noinspection StatementWithEmptyBody
  while (reader.next(key, value)) ;
  reader.close();
  reader.close();

  BZip2Codec codec = new BZip2Codec();
  codec.setConf(conf);
  Set<Decompressor> decompressors = new HashSet<Decompressor>();
  for (int i = 0; i < 10; ++i) {
    decompressors.add(CodecPool.getDecompressor(codec));
  }
  assertEquals(10, decompressors.size());
}
项目:hops    文件:TestLineRecordReader.java   
@Test
public void testMultipleClose() throws IOException {
  URL testFileUrl = getClass().getClassLoader().
      getResource("recordSpanningMultipleSplits.txt.bz2");
  assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
      testFileUrl);
  File testFile = new File(testFileUrl.getFile());
  Path testFilePath = new Path(testFile.getAbsolutePath());
  long testFileSize = testFile.length();
  Configuration conf = new Configuration();
  conf.setInt(org.apache.hadoop.mapreduce.lib.input.
      LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
  TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

  // read the data and check whether BOM is skipped
  FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
  LineRecordReader reader = new LineRecordReader();
  reader.initialize(split, context);

  //noinspection StatementWithEmptyBody
  while (reader.nextKeyValue()) ;
  reader.close();
  reader.close();

  BZip2Codec codec = new BZip2Codec();
  codec.setConf(conf);
  Set<Decompressor> decompressors = new HashSet<Decompressor>();
  for (int i = 0; i < 10; ++i) {
    decompressors.add(CodecPool.getDecompressor(codec));
  }
  assertEquals(10, decompressors.size());
}
项目:spork-streaming    文件:PigStorage.java   
private void setCompression(Path path, Job job) {
    String location=path.getName();
    if (location.endsWith(".bz2") || location.endsWith(".bz")) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job,  BZip2Codec.class);
    }  else if (location.endsWith(".gz")) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    } else {
        FileOutputFormat.setCompressOutput( job, false);
    }
}
项目:spork-streaming    文件:MultiStorage.java   
@Override
public void setStoreLocation(String location, Job job) throws IOException {
  job.getConfiguration().set("mapred.textoutputformat.separator", "");
  FileOutputFormat.setOutputPath(job, new Path(location));
  if (comp == Compression.bz2 || comp == Compression.bz) {
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job,  BZip2Codec.class);
  } else if (comp == Compression.gz) {
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  }
}
项目:spork    文件:PigStorage.java   
private void setCompression(Path path, Job job) {
    String location=path.getName();
    if (location.endsWith(".bz2") || location.endsWith(".bz")) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job,  BZip2Codec.class);
    }  else if (location.endsWith(".gz")) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    } else {
        FileOutputFormat.setCompressOutput( job, false);
    }
}
项目:spork    文件:MultiStorage.java   
@Override
public void setStoreLocation(String location, Job job) throws IOException {
  job.getConfiguration().set(MRConfiguration.TEXTOUTPUTFORMAT_SEPARATOR, "");
  FileOutputFormat.setOutputPath(job, new Path(location));
  if (comp == Compression.bz2 || comp == Compression.bz) {
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job,  BZip2Codec.class);
  } else if (comp == Compression.gz) {
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  }
}
项目:PonIC    文件:PigStorage.java   
private void setCompression(Path path, Job job) {
    String location=path.getName();
    if (location.endsWith(".bz2") || location.endsWith(".bz")) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job,  BZip2Codec.class);
    }  else if (location.endsWith(".gz")) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    } else {
        FileOutputFormat.setCompressOutput( job, false);
    }
}
项目:sedge    文件:PigStorage.java   
private void setCompression(Path path, Job job) {
    String location=path.getName();
    if (location.endsWith(".bz2") || location.endsWith(".bz")) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job,  BZip2Codec.class);
    }  else if (location.endsWith(".gz")) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    } else {
        FileOutputFormat.setCompressOutput( job, false);
    }
}
项目:sedge    文件:MultiStorage.java   
@Override
public void setStoreLocation(String location, Job job) throws IOException {
  job.getConfiguration().set("mapred.textoutputformat.separator", "");
  FileOutputFormat.setOutputPath(job, new Path(location));
  if (comp == Compression.bz2 || comp == Compression.bz) {
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job,  BZip2Codec.class);
  } else if (comp == Compression.gz) {
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  }
}
项目:aliyun-maxcompute-data-collectors    文件:TestCompression.java   
public void testBzip2TextCompression() throws IOException {
  runTextCompressionTest(new BZip2Codec(), 4);
}
项目:aliyun-maxcompute-data-collectors    文件:TestCompression.java   
public void testBzip2SequenceFileCompression() throws Exception {
  runSequenceFileCompressionTest(new BZip2Codec(), 4);
}
项目:aliyun-oss-hadoop-fs    文件:TestConcatenatedCompressedInput.java   
/**
 * Test using the bzip2 codec for reading
 */
@Test
public void testBzip2() throws IOException {
  JobConf jobConf = new JobConf(defaultConf);

  CompressionCodec bzip2 = new BZip2Codec();
  ReflectionUtils.setConf(bzip2, jobConf);
  localFs.delete(workDir, true);

  System.out.println(COLOR_BR_CYAN +
    "testBzip2() using non-native CBZip2InputStream (presumably)" +
    COLOR_NORMAL);

  // copy prebuilt (correct!) version of concat.bz2 to HDFS
  final String fn = "concat" + bzip2.getDefaultExtension();
  Path fnLocal = new Path(System.getProperty("test.concat.data", "/tmp"), fn);
  Path fnHDFS  = new Path(workDir, fn);
  localFs.copyFromLocalFile(fnLocal, fnHDFS);

  writeFile(localFs, new Path(workDir, "part2.txt.bz2"), bzip2,
            "this is a test\nof bzip2\n");
  FileInputFormat.setInputPaths(jobConf, workDir);
  TextInputFormat format = new TextInputFormat();  // extends FileInputFormat
  format.configure(jobConf);
  format.setMinSplitSize(256);  // work around 2-byte splits issue
  // [135 splits for a 208-byte file and a 62-byte file(!)]

  InputSplit[] splits = format.getSplits(jobConf, 100);
  assertEquals("compressed splits == 2", 2, splits.length);
  FileSplit tmp = (FileSplit) splits[0];
  if (tmp.getPath().getName().equals("part2.txt.bz2")) {
    splits[0] = splits[1];
    splits[1] = tmp;
  }

  List<Text> results = readSplit(format, splits[0], jobConf);
  assertEquals("splits[0] num lines", 6, results.size());
  assertEquals("splits[0][5]", "member #3",
               results.get(5).toString());

  results = readSplit(format, splits[1], jobConf);
  assertEquals("splits[1] num lines", 2, results.size());
  assertEquals("splits[1][0]", "this is a test",
               results.get(0).toString());
  assertEquals("splits[1][1]", "of bzip2",
               results.get(1).toString());
}
项目:aliyun-oss-hadoop-fs    文件:TestConcatenatedCompressedInput.java   
/**
   * Extended bzip2 test, similar to BuiltInGzipDecompressor test above.
   */
  @Test
  public void testMoreBzip2() throws IOException {
    JobConf jobConf = new JobConf(defaultConf);

    CompressionCodec bzip2 = new BZip2Codec();
    ReflectionUtils.setConf(bzip2, jobConf);
    localFs.delete(workDir, true);

    System.out.println(COLOR_BR_MAGENTA +
      "testMoreBzip2() using non-native CBZip2InputStream (presumably)" +
      COLOR_NORMAL);

    // copy single-member test file to HDFS
    String fn1 = "testConcatThenCompress.txt" + bzip2.getDefaultExtension();
    Path fnLocal1 = new Path(System.getProperty("test.concat.data","/tmp"),fn1);
    Path fnHDFS1  = new Path(workDir, fn1);
    localFs.copyFromLocalFile(fnLocal1, fnHDFS1);

    // copy multiple-member test file to HDFS
    String fn2 = "testCompressThenConcat.txt" + bzip2.getDefaultExtension();
    Path fnLocal2 = new Path(System.getProperty("test.concat.data","/tmp"),fn2);
    Path fnHDFS2  = new Path(workDir, fn2);
    localFs.copyFromLocalFile(fnLocal2, fnHDFS2);

    FileInputFormat.setInputPaths(jobConf, workDir);

    // here's first pair of BlockDecompressorStreams:
    final FileInputStream in1 = new FileInputStream(fnLocal1.toString());
    final FileInputStream in2 = new FileInputStream(fnLocal2.toString());
    assertEquals("concat bytes available", 2567, in1.available());
    assertEquals("concat bytes available", 3056, in2.available());

/*
    // FIXME
    // The while-loop below dies at the beginning of the 2nd concatenated
    // member (after 17 lines successfully read) with:
    //
    //   java.io.IOException: bad block header
    //   at org.apache.hadoop.io.compress.bzip2.CBZip2InputStream.initBlock(
    //   CBZip2InputStream.java:527)
    //
    // It is not critical to concatenated-gzip support, HADOOP-6835, so it's
    // simply commented out for now (and HADOOP-6852 filed).  If and when the
    // latter issue is resolved--perhaps by fixing an error here--this code
    // should be reenabled.  Note that the doMultipleBzip2BufferSizes() test
    // below uses the same testCompressThenConcat.txt.bz2 file but works fine.

    CompressionInputStream cin2 = bzip2.createInputStream(in2);
    LineReader in = new LineReader(cin2);
    Text out = new Text();

    int numBytes, totalBytes=0, lineNum=0;
    while ((numBytes = in.readLine(out)) > 0) {
      ++lineNum;
      totalBytes += numBytes;
    }
    in.close();
    assertEquals("total uncompressed bytes in concatenated test file",
                 5346, totalBytes);
    assertEquals("total uncompressed lines in concatenated test file",
                 84, lineNum);
 */

    // test CBZip2InputStream with lots of different input-buffer sizes
    doMultipleBzip2BufferSizes(jobConf);
  }
项目:hadoopoffice    文件:OfficeFormatHadoopExcelTest.java   
@Test
   public void readExcelInputFormatBzip2CompressedExcel2013MultiSheetAll() throws IOException {
      JobConf job = new JobConf(defaultConf);
      CompressionCodec bzip2 = new BZip2Codec();
      ReflectionUtils.setConf(bzip2, job);
ClassLoader classLoader = getClass().getClassLoader();
    String fileName="excel2013testmultisheet.xlsx.bz2";
    String fileNameSpreadSheet=classLoader.getResource(fileName).getFile(); 
    Path file = new Path(fileNameSpreadSheet);
    FileInputFormat.setInputPaths(job, file);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
    ExcelFileInputFormat format = new ExcelFileInputFormat();
    format.configure(job);
    InputSplit[] inputSplits = format.getSplits(job,1);
    assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned  null RecordReader");
Text spreadSheetKey = new Text();   
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1 (first sheet)"); 
assertEquals("[excel2013testmultisheet.xlsx.bz2]Sheet1!A1",spreadSheetKey.toString(),"Input Split for Excel file has keyname == \"[excel2013testmultisheet.xlsx.bz2]Sheet1!A1\"");
assertEquals(4,spreadSheetValue.get().length,"Input Split for Excel file contains row 1 with 4 columns");
assertEquals("test1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
assertEquals("Sheet1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getSheetName(),"Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");  
assertEquals("A1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getAddress(),"Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");  
assertEquals("test2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 2 == \"test2\""); 
assertEquals("test3",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 3 == \"test3\""); 
assertEquals("test4",((SpreadSheetCellDAO)spreadSheetValue.get()[3]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 4 == \"test4\""); 
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 2 (first sheet)");
assertEquals(1,spreadSheetValue.get().length,"Input Split for Excel file contains row 2 with 1 column");
assertEquals("4",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 2 with cell 1 == \"4\""); 
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 3 (first sheet)"); 
assertEquals(5,spreadSheetValue.get().length,"Input Split for Excel file contains row 3 with 5 columns");
assertEquals("31/12/99",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 1 == \"31/12/99\"");   
assertEquals("5",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 2 == \"5\""); 
assertNull(spreadSheetValue.get()[2],"Input Split for Excel file contains row 3 with cell 3 == null");  
assertNull(spreadSheetValue.get()[3],"Input Split for Excel file contains row 3 with cell 4 == null");  
assertEquals("null",((SpreadSheetCellDAO)spreadSheetValue.get()[4]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 5 == \"null\"");       
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 4 (first sheet)");
assertEquals(1,spreadSheetValue.get().length,"Input Split for Excel file contains row 4 with 1 column");
assertEquals("1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 4 with cell 1 == \"1\""); 
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 5 (first sheet)");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 5 with 3 columns");
assertEquals("2",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 5 with cell 1 == \"2\"");          
assertEquals("6",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 5 with cell 2== \"6\"");
assertEquals("10",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 5 with cell 3== \"10\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 6 (first sheet)"); 
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 6 with 3 columns");
assertEquals("3",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 6 with cell 1 == \"3\"");      
assertEquals("4",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 6 with cell 2== \"4\"");
assertEquals("15",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 6 with cell 3== \"15\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 7 (second sheet)");    
assertEquals("8",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 7 with cell 1 == \"8\""); 
assertEquals("99",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 7 with cell 2 == \"99\"");   
assertEquals(2,spreadSheetValue.get().length,"Input Split for Excel file contains row 7 with 2 columns");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 8 (second sheet)");    
assertEquals(1,spreadSheetValue.get().length,"Input Split for Excel file contains row 8 with 1 column");
assertEquals("test",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 8 with cell 1 == \"test\"");   
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 9 (second sheet)");    
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 9 with 3 columns");
assertNull(spreadSheetValue.get()[0],"Input Split for Excel file contains row 9 with cell 1 == null");  
assertNull(spreadSheetValue.get()[1],"Input Split for Excel file contains row 9 with cell 2 == null");  
assertEquals("seven",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 9 with cell 3 == \"seven\"");
   }
项目:pregelix    文件:DataGenerator.java   
public static void main(String[] args) throws IOException {

        JobConf job = new JobConf(DataGenerator.class);
        FileSystem dfs = FileSystem.get(job);
        String maxFile = "/maxtemp";
        dfs.delete(new Path(maxFile), true);

        job.setJobName(DataGenerator.class.getSimpleName() + "max ID");
        job.setMapperClass(MapMaxId.class);
        job.setCombinerClass(CombineMaxId.class);
        job.setReducerClass(ReduceMaxId.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(VLongWritable.class);

        job.setInputFormat(TextInputFormat.class);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(maxFile));
        job.setNumReduceTasks(1);
        JobClient.runJob(job);

        job = new JobConf(DataGenerator.class);
        job.set("hyracks.maxid.file", maxFile);
        job.setInt("hyracks.x", Integer.parseInt(args[2]));
        dfs.delete(new Path(args[1]), true);

        job.setJobName(DataGenerator.class.getSimpleName());
        job.setMapperClass(MapRecordGen.class);
        job.setReducerClass(ReduceRecordGen.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setInputFormat(TextInputFormat.class);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        job.setNumReduceTasks(Integer.parseInt(args[3]));

        if (args.length > 4) {
            if (args[4].startsWith("bzip"))
                FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
            if (args[4].startsWith("gz"))
                FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
        }
        JobClient.runJob(job);
    }
项目:zSqoop    文件:TestCompression.java   
public void testBzip2TextCompression() throws IOException {
  runTextCompressionTest(new BZip2Codec(), 4);
}
项目:zSqoop    文件:TestCompression.java   
public void testBzip2SequenceFileCompression() throws Exception {
  runSequenceFileCompressionTest(new BZip2Codec(), 4);
}
项目:sqoop    文件:TestCompression.java   
public void testBzip2TextCompression() throws IOException {
  runTextCompressionTest(new BZip2Codec(), 4);
}
项目:sqoop    文件:TestCompression.java   
public void testBzip2SequenceFileCompression() throws Exception {
  runSequenceFileCompressionTest(new BZip2Codec(), 4);
}
项目:MRSmallFileCombiner    文件:ManyTxtToFewSeqJob.java   
public static void main(String[] args) throws Exception {
    if (args.length < 3) {
        System.out
                .println("ManyTxtToFewSeqJob <inputPath> <outputPath> <# mappers> <compressionCodec>");
        System.out.println();
        System.out
                .println("Example: ManyTxtToFewSeqJob ./input ./output 20 snappy");
        return;
    }

    // Get values from args
    String inputPath = args[0];
    String outputPath = args[1];
    String numberOfMappers = args[2];
    String compressionCodec = args[3];

    // Create job
    Job job = new Job();
    job.setJobName("ManyTxtToFewSeqJob");


    job.setJarByClass(ManyTxtToFewSeqJob.class);
    // Define input format and path
    job.setInputFormatClass(ConfigurableInputFormat.class);
    ConfigurableInputFormat.setInputPath(job, inputPath);
    ConfigurableInputFormat.setMapperNumber(job, Integer.parseInt(numberOfMappers));

    // Define output format and path
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
    if (compressionCodec.toLowerCase().equals("gzip")) {
        SequenceFileOutputFormat.setOutputCompressorClass(job,
                GzipCodec.class);
    } else if (compressionCodec.toLowerCase().equals("bzip2")) {
        SequenceFileOutputFormat.setOutputCompressorClass(job,
                BZip2Codec.class);
    } else {
        SequenceFileOutputFormat.setOutputCompressorClass(job,
                SnappyCodec.class);
    }

    SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));

    // Define the mapper and reducer
    job.setMapperClass(ConsalidatorMapper.class);
    // job.setReducerClass(Reducer.class);

    // Define the key and value format
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    job.setNumReduceTasks(0);

    Configuration config = new Configuration();
    FileSystem hdfs = FileSystem.get(config);

    hdfs.delete(new Path(outputPath), true);

    // Exit
    job.waitForCompletion(true);
}
项目:eoulsan    文件:HadoopCompressionCodecs.java   
/**
 * Create a bzip2 input stream.
 * @param is input stream
 * @return an uncompressed input stream
 * @throws IOException if an error occurs while creating the input stream
 */
public static InputStream createBZip2InputStream(final InputStream is)
    throws IOException {

  return new BZip2Codec().createInputStream(is);
}
项目:eoulsan    文件:HadoopCompressionCodecs.java   
/**
 * Create a bzip2 output stream.
 * @param os the output stream to compress
 * @return a compressed output stream
 * @throws IOException if an error occurs while creating the output stream
 */
public static OutputStream createBZip2OutputStream(final OutputStream os)
    throws IOException {

  return new BZip2Codec().createOutputStream(os);
}