Java 类org.apache.hadoop.mapred.lib.CombineTextInputFormat 实例源码

项目：hadoop 文件：TestCombineTextInputFormat.java

/**
 * Test using the gzip codec for reading
 */
@Test(timeout=10000)
public void testGzip() throws IOException {
  JobConf job = new JobConf(defaultConf);
  CompressionCodec gzip = new GzipCodec();
  ReflectionUtils.setConf(gzip, job);
  localFs.delete(workDir, true);
  writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip,
            "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
  writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
            "this is a test\nof gzip\n");
  FileInputFormat.setInputPaths(job, workDir);
  CombineTextInputFormat format = new CombineTextInputFormat();
  InputSplit[] splits = format.getSplits(job, 100);
  assertEquals("compressed splits == 1", 1, splits.length);
  List<Text> results = readSplit(format, splits[0], job);
  assertEquals("splits[0] length", 8, results.size());

  final String[] firstList =
    {"the quick", "brown", "fox jumped", "over", " the lazy", " dog"};
  final String[] secondList = {"this is a test", "of gzip"};
  String first = results.get(0).toString();
  if (first.equals(firstList[0])) {
    testResults(results, firstList, secondList);
  } else if (first.equals(secondList[0])) {
    testResults(results, secondList, firstList);
  } else {
    fail("unexpected first token!");
  }
}

项目：aliyun-oss-hadoop-fs 文件：TestCombineTextInputFormat.java

/**
 * Test using the gzip codec for reading
 */
@Test(timeout=10000)
public void testGzip() throws IOException {
  JobConf job = new JobConf(defaultConf);
  CompressionCodec gzip = new GzipCodec();
  ReflectionUtils.setConf(gzip, job);
  localFs.delete(workDir, true);
  writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip,
            "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
  writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
            "this is a test\nof gzip\n");
  FileInputFormat.setInputPaths(job, workDir);
  CombineTextInputFormat format = new CombineTextInputFormat();
  InputSplit[] splits = format.getSplits(job, 100);
  assertEquals("compressed splits == 1", 1, splits.length);
  List<Text> results = readSplit(format, splits[0], job);
  assertEquals("splits[0] length", 8, results.size());

  final String[] firstList =
    {"the quick", "brown", "fox jumped", "over", " the lazy", " dog"};
  final String[] secondList = {"this is a test", "of gzip"};
  String first = results.get(0).toString();
  if (first.equals(firstList[0])) {
    testResults(results, firstList, secondList);
  } else if (first.equals(secondList[0])) {
    testResults(results, secondList, firstList);
  } else {
    fail("unexpected first token!");
  }
}

项目：big-c 文件：TestCombineTextInputFormat.java

/**
 * Test using the gzip codec for reading
 */
@Test(timeout=10000)
public void testGzip() throws IOException {
  JobConf job = new JobConf(defaultConf);
  CompressionCodec gzip = new GzipCodec();
  ReflectionUtils.setConf(gzip, job);
  localFs.delete(workDir, true);
  writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip,
            "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
  writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
            "this is a test\nof gzip\n");
  FileInputFormat.setInputPaths(job, workDir);
  CombineTextInputFormat format = new CombineTextInputFormat();
  InputSplit[] splits = format.getSplits(job, 100);
  assertEquals("compressed splits == 1", 1, splits.length);
  List<Text> results = readSplit(format, splits[0], job);
  assertEquals("splits[0] length", 8, results.size());

  final String[] firstList =
    {"the quick", "brown", "fox jumped", "over", " the lazy", " dog"};
  final String[] secondList = {"this is a test", "of gzip"};
  String first = results.get(0).toString();
  if (first.equals(firstList[0])) {
    testResults(results, firstList, secondList);
  } else if (first.equals(secondList[0])) {
    testResults(results, secondList, firstList);
  } else {
    fail("unexpected first token!");
  }
}

项目：hadoop-2.6.0-cdh5.4.3 文件：TestCombineTextInputFormat.java

/**
 * Test using the gzip codec for reading
 */
@Test(timeout=10000)
public void testGzip() throws IOException {
  JobConf job = new JobConf(defaultConf);
  CompressionCodec gzip = new GzipCodec();
  ReflectionUtils.setConf(gzip, job);
  localFs.delete(workDir, true);
  writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip,
            "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
  writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
            "this is a test\nof gzip\n");
  FileInputFormat.setInputPaths(job, workDir);
  CombineTextInputFormat format = new CombineTextInputFormat();
  InputSplit[] splits = format.getSplits(job, 100);
  assertEquals("compressed splits == 1", 1, splits.length);
  List<Text> results = readSplit(format, splits[0], job);
  assertEquals("splits[0] length", 8, results.size());

  final String[] firstList =
    {"the quick", "brown", "fox jumped", "over", " the lazy", " dog"};
  final String[] secondList = {"this is a test", "of gzip"};
  String first = results.get(0).toString();
  if (first.equals(firstList[0])) {
    testResults(results, firstList, secondList);
  } else if (first.equals(secondList[0])) {
    testResults(results, secondList, firstList);
  } else {
    fail("unexpected first token!");
  }
}

项目：hadoop-plus 文件：TestCombineTextInputFormat.java

/**
 * Test using the gzip codec for reading
 */
@Test(timeout=10000)
public void testGzip() throws IOException {
  JobConf job = new JobConf(defaultConf);
  CompressionCodec gzip = new GzipCodec();
  ReflectionUtils.setConf(gzip, job);
  localFs.delete(workDir, true);
  writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip,
            "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
  writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
            "this is a test\nof gzip\n");
  FileInputFormat.setInputPaths(job, workDir);
  CombineTextInputFormat format = new CombineTextInputFormat();
  InputSplit[] splits = format.getSplits(job, 100);
  assertEquals("compressed splits == 1", 1, splits.length);
  List<Text> results = readSplit(format, splits[0], job);
  assertEquals("splits[0] length", 8, results.size());

  final String[] firstList =
    {"the quick", "brown", "fox jumped", "over", " the lazy", " dog"};
  final String[] secondList = {"this is a test", "of gzip"};
  String first = results.get(0).toString();
  if (first.equals(firstList[0])) {
    testResults(results, firstList, secondList);
  } else if (first.equals(secondList[0])) {
    testResults(results, secondList, firstList);
  } else {
    fail("unexpected first token!");
  }
}

项目：FlexMap 文件：TestCombineTextInputFormat.java

/**
 * Test using the gzip codec for reading
 */
@Test(timeout=10000)
public void testGzip() throws IOException {
  JobConf job = new JobConf(defaultConf);
  CompressionCodec gzip = new GzipCodec();
  ReflectionUtils.setConf(gzip, job);
  localFs.delete(workDir, true);
  writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip,
            "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
  writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
            "this is a test\nof gzip\n");
  FileInputFormat.setInputPaths(job, workDir);
  CombineTextInputFormat format = new CombineTextInputFormat();
  InputSplit[] splits = format.getSplits(job, 100);
  assertEquals("compressed splits == 1", 1, splits.length);
  List<Text> results = readSplit(format, splits[0], job);
  assertEquals("splits[0] length", 8, results.size());

  final String[] firstList =
    {"the quick", "brown", "fox jumped", "over", " the lazy", " dog"};
  final String[] secondList = {"this is a test", "of gzip"};
  String first = results.get(0).toString();
  if (first.equals(firstList[0])) {
    testResults(results, firstList, secondList);
  } else if (first.equals(secondList[0])) {
    testResults(results, secondList, firstList);
  } else {
    fail("unexpected first token!");
  }
}

项目：hops 文件：TestCombineTextInputFormat.java

/**
 * Test using the gzip codec for reading
 */
@Test(timeout=10000)
public void testGzip() throws IOException {
  JobConf job = new JobConf(defaultConf);
  CompressionCodec gzip = new GzipCodec();
  ReflectionUtils.setConf(gzip, job);
  localFs.delete(workDir, true);
  writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip,
            "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
  writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
            "this is a test\nof gzip\n");
  FileInputFormat.setInputPaths(job, workDir);
  CombineTextInputFormat format = new CombineTextInputFormat();
  InputSplit[] splits = format.getSplits(job, 100);
  assertEquals("compressed splits == 1", 1, splits.length);
  List<Text> results = readSplit(format, splits[0], job);
  assertEquals("splits[0] length", 8, results.size());

  final String[] firstList =
    {"the quick", "brown", "fox jumped", "over", " the lazy", " dog"};
  final String[] secondList = {"this is a test", "of gzip"};
  String first = results.get(0).toString();
  if (first.equals(firstList[0])) {
    testResults(results, firstList, secondList);
  } else if (first.equals(secondList[0])) {
    testResults(results, secondList, firstList);
  } else {
    fail("unexpected first token!");
  }
}

项目：hadoop-TCP 文件：TestCombineTextInputFormat.java

/**
 * Test using the gzip codec for reading
 */
@Test(timeout=10000)
public void testGzip() throws IOException {
  JobConf job = new JobConf(defaultConf);
  CompressionCodec gzip = new GzipCodec();
  ReflectionUtils.setConf(gzip, job);
  localFs.delete(workDir, true);
  writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip,
            "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
  writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
            "this is a test\nof gzip\n");
  FileInputFormat.setInputPaths(job, workDir);
  CombineTextInputFormat format = new CombineTextInputFormat();
  InputSplit[] splits = format.getSplits(job, 100);
  assertEquals("compressed splits == 1", 1, splits.length);
  List<Text> results = readSplit(format, splits[0], job);
  assertEquals("splits[0] length", 8, results.size());

  final String[] firstList =
    {"the quick", "brown", "fox jumped", "over", " the lazy", " dog"};
  final String[] secondList = {"this is a test", "of gzip"};
  String first = results.get(0).toString();
  if (first.equals(firstList[0])) {
    testResults(results, firstList, secondList);
  } else if (first.equals(secondList[0])) {
    testResults(results, secondList, firstList);
  } else {
    fail("unexpected first token!");
  }
}

项目：hardfs 文件：TestCombineTextInputFormat.java

/**
 * Test using the gzip codec for reading
 */
@Test(timeout=10000)
public void testGzip() throws IOException {
  JobConf job = new JobConf(defaultConf);
  CompressionCodec gzip = new GzipCodec();
  ReflectionUtils.setConf(gzip, job);
  localFs.delete(workDir, true);
  writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip,
            "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
  writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
            "this is a test\nof gzip\n");
  FileInputFormat.setInputPaths(job, workDir);
  CombineTextInputFormat format = new CombineTextInputFormat();
  InputSplit[] splits = format.getSplits(job, 100);
  assertEquals("compressed splits == 1", 1, splits.length);
  List<Text> results = readSplit(format, splits[0], job);
  assertEquals("splits[0] length", 8, results.size());

  final String[] firstList =
    {"the quick", "brown", "fox jumped", "over", " the lazy", " dog"};
  final String[] secondList = {"this is a test", "of gzip"};
  String first = results.get(0).toString();
  if (first.equals(firstList[0])) {
    testResults(results, firstList, secondList);
  } else if (first.equals(secondList[0])) {
    testResults(results, secondList, firstList);
  } else {
    fail("unexpected first token!");
  }
}

项目：hadoop-on-lustre2 文件：TestCombineTextInputFormat.java

/**
 * Test using the gzip codec for reading
 */
@Test(timeout=10000)
public void testGzip() throws IOException {
  JobConf job = new JobConf(defaultConf);
  CompressionCodec gzip = new GzipCodec();
  ReflectionUtils.setConf(gzip, job);
  localFs.delete(workDir, true);
  writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip,
            "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
  writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
            "this is a test\nof gzip\n");
  FileInputFormat.setInputPaths(job, workDir);
  CombineTextInputFormat format = new CombineTextInputFormat();
  InputSplit[] splits = format.getSplits(job, 100);
  assertEquals("compressed splits == 1", 1, splits.length);
  List<Text> results = readSplit(format, splits[0], job);
  assertEquals("splits[0] length", 8, results.size());

  final String[] firstList =
    {"the quick", "brown", "fox jumped", "over", " the lazy", " dog"};
  final String[] secondList = {"this is a test", "of gzip"};
  String first = results.get(0).toString();
  if (first.equals(firstList[0])) {
    testResults(results, firstList, secondList);
  } else if (first.equals(secondList[0])) {
    testResults(results, secondList, firstList);
  } else {
    fail("unexpected first token!");
  }
}