public void testTotalOrderWithCustomSerialization() throws Exception { TotalOrderPartitioner<String, NullWritable> partitioner = new TotalOrderPartitioner<String, NullWritable>(); Configuration conf = new Configuration(); conf.setStrings(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, JavaSerialization.class.getName(), WritableSerialization.class.getName()); conf.setClass(MRJobConfig.KEY_COMPARATOR, JavaSerializationComparator.class, Comparator.class); Path p = TestTotalOrderPartitioner.<String>writePartitionFile( "totalordercustomserialization", conf, splitJavaStrings); conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, String.class, Object.class); try { partitioner.setConf(conf); NullWritable nw = NullWritable.get(); for (Check<String> chk : testJavaStrings) { assertEquals(chk.data.toString(), chk.part, partitioner.getPartition(chk.data, nw, splitJavaStrings.length + 1)); } } finally { p.getFileSystem(conf).delete(p, true); } }
/** * HADOOP-4466: * This test verifies the JavSerialization impl can write to * SequenceFiles. by virtue other SequenceFileOutputFormat is not * coupled to Writable types, if so, the job will fail. * */ public void testWriteToSequencefile() throws Exception { JobConf conf = new JobConf(TestJavaSerialization.class); conf.setJobName("JavaSerialization"); FileSystem fs = FileSystem.get(conf); cleanAndCreateInput(fs); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInputFormat(TextInputFormat.class); // test we can write to sequence files conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(String.class); conf.setOutputValueClass(Long.class); conf.setOutputKeyComparatorClass(JavaSerializationComparator.class); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(SumReducer.class); conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME); FileInputFormat.setInputPaths(conf, INPUT_DIR); FileOutputFormat.setOutputPath(conf, OUTPUT_DIR); JobClient.runJob(conf); Path[] outputFiles = FileUtil.stat2Paths( fs.listStatus(OUTPUT_DIR, new Utils.OutputFileUtils.OutputFilesFilter())); assertEquals(1, outputFiles.length); }
public void testMapReduceJob() throws Exception { OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt")); Writer wr = new OutputStreamWriter(os); wr.write("b a\n"); wr.close(); JobConf conf = createJobConf(); conf.setJobName("JavaSerialization"); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(String.class); conf.setOutputValueClass(Long.class); conf.setOutputKeyComparatorClass(JavaSerializationComparator.class); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(SumReducer.class); FileInputFormat.setInputPaths(conf, getInputDir()); FileOutputFormat.setOutputPath(conf, getOutputDir()); JobClient.runJob(conf); Path[] outputFiles = FileUtil.stat2Paths( getFileSystem().listStatus(getOutputDir(), new Utils.OutputFileUtils.OutputFilesFilter())); assertEquals(1, outputFiles.length); InputStream is = getFileSystem().open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); assertEquals("a\t1", reader.readLine()); assertEquals("b\t1", reader.readLine()); assertNull(reader.readLine()); reader.close(); }
/** * HADOOP-4466: * This test verifies the JavSerialization impl can write to SequenceFiles. by virtue other * SequenceFileOutputFormat is not coupled to Writable types, if so, the job will fail. * */ public void testWriteToSequencefile() throws Exception { OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt")); Writer wr = new OutputStreamWriter(os); wr.write("b a\n"); wr.close(); JobConf conf = createJobConf(); conf.setJobName("JavaSerialization"); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); // test we can write to sequence files conf.setOutputKeyClass(String.class); conf.setOutputValueClass(Long.class); conf.setOutputKeyComparatorClass(JavaSerializationComparator.class); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(SumReducer.class); FileInputFormat.setInputPaths(conf, getInputDir()); FileOutputFormat.setOutputPath(conf, getOutputDir()); JobClient.runJob(conf); Path[] outputFiles = FileUtil.stat2Paths( getFileSystem().listStatus(getOutputDir(), new Utils.OutputFileUtils.OutputFilesFilter())); assertEquals(1, outputFiles.length); }
public void testMapReduceJob() throws Exception { JobConf conf = new JobConf(TestJavaSerialization.class); conf.setJobName("JavaSerialization"); FileSystem fs = FileSystem.get(conf); cleanAndCreateInput(fs); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(String.class); conf.setOutputValueClass(Long.class); conf.setOutputKeyComparatorClass(JavaSerializationComparator.class); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(SumReducer.class); conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME); FileInputFormat.setInputPaths(conf, INPUT_DIR); FileOutputFormat.setOutputPath(conf, OUTPUT_DIR); JobClient.runJob(conf); Path[] outputFiles = FileUtil.stat2Paths( fs.listStatus(OUTPUT_DIR, new Utils.OutputFileUtils.OutputFilesFilter())); assertEquals(1, outputFiles.length); InputStream is = fs.open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); assertEquals("a\t1", reader.readLine()); assertEquals("b\t1", reader.readLine()); assertNull(reader.readLine()); reader.close(); }
/** * HADOOP-4466: * This test verifies the JavSerialization impl can write to * SequenceFiles. by virtue other SequenceFileOutputFormat is not * coupled to Writable types, if so, the job will fail. * */ @Test public void testWriteToSequencefile() throws Exception { JobConf conf = new JobConf(TestJavaSerialization.class); conf.setJobName("JavaSerialization"); FileSystem fs = FileSystem.get(conf); cleanAndCreateInput(fs); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInputFormat(TextInputFormat.class); // test we can write to sequence files conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(String.class); conf.setOutputValueClass(Long.class); conf.setOutputKeyComparatorClass(JavaSerializationComparator.class); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(SumReducer.class); conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME); FileInputFormat.setInputPaths(conf, INPUT_DIR); FileOutputFormat.setOutputPath(conf, OUTPUT_DIR); JobClient.runJob(conf); Path[] outputFiles = FileUtil.stat2Paths( fs.listStatus(OUTPUT_DIR, new Utils.OutputFileUtils.OutputFilesFilter())); assertEquals(1, outputFiles.length); }
@Test(timeout = 30000) public void testAppendSort() throws Exception { GenericTestUtils.assumeInNativeProfile(); Path file = new Path(ROOT_PATH, "testseqappendSort.seq"); fs.delete(file, true); Path sortedFile = new Path(ROOT_PATH, "testseqappendSort.seq.sort"); fs.delete(sortedFile, true); SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, new JavaSerializationComparator<Long>(), Long.class, String.class, conf); Option compressOption = Writer.compression(CompressionType.BLOCK, new GzipCodec()); Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), compressOption); writer.append(2L, "two"); writer.append(1L, "one"); writer.close(); writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), compressOption); writer.append(4L, "four"); writer.append(3L, "three"); writer.close(); // Sort file after append sorter.sort(file, sortedFile); verifyAll4Values(sortedFile); fs.deleteOnExit(file); fs.deleteOnExit(sortedFile); }
public void testMapReduceJob() throws Exception { OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt")); Writer wr = new OutputStreamWriter(os); wr.write("b a\n"); wr.close(); JobConf conf = createJobConf(); conf.setJobName("JavaSerialization"); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(String.class); conf.setOutputValueClass(Long.class); conf.setOutputKeyComparatorClass(JavaSerializationComparator.class); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(SumReducer.class); FileInputFormat.setInputPaths(conf, getInputDir()); FileOutputFormat.setOutputPath(conf, getOutputDir()); JobClient.runJob(conf); Path[] outputFiles = FileUtil.stat2Paths( getFileSystem().listStatus(getOutputDir(), new OutputLogFilter())); assertEquals(1, outputFiles.length); InputStream is = getFileSystem().open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); assertEquals("a\t1", reader.readLine()); assertEquals("b\t1", reader.readLine()); assertNull(reader.readLine()); reader.close(); }
/** * HADOOP-4466: * This test verifies the JavSerialization impl can write to SequenceFiles. by virtue other * SequenceFileOutputFormat is not coupled to Writable types, if so, the job will fail. * */ public void testWriteToSequencefile() throws Exception { OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt")); Writer wr = new OutputStreamWriter(os); wr.write("b a\n"); wr.close(); JobConf conf = createJobConf(); conf.setJobName("JavaSerialization"); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); // test we can write to sequence files conf.setOutputKeyClass(String.class); conf.setOutputValueClass(Long.class); conf.setOutputKeyComparatorClass(JavaSerializationComparator.class); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(SumReducer.class); FileInputFormat.setInputPaths(conf, getInputDir()); FileOutputFormat.setOutputPath(conf, getOutputDir()); JobClient.runJob(conf); Path[] outputFiles = FileUtil.stat2Paths( getFileSystem().listStatus(getOutputDir(), new OutputLogFilter())); assertEquals(1, outputFiles.length); }
public void testMapReduceJob() throws Exception { JobConf conf = new JobConf(TestJavaSerialization.class); conf.setJobName("JavaSerialization"); FileSystem fs = FileSystem.get(conf); cleanAndCreateInput(fs); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(String.class); conf.setOutputValueClass(Long.class); conf.setOutputKeyComparatorClass(JavaSerializationComparator.class); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(SumReducer.class); FileInputFormat.setInputPaths(conf, INPUT_DIR); FileOutputFormat.setOutputPath(conf, OUTPUT_DIR); JobClient.runJob(conf); Path[] outputFiles = FileUtil.stat2Paths( fs.listStatus(OUTPUT_DIR, new Utils.OutputFileUtils.OutputFilesFilter())); assertEquals(1, outputFiles.length); InputStream is = fs.open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); assertEquals("a\t1", reader.readLine()); assertEquals("b\t1", reader.readLine()); assertNull(reader.readLine()); reader.close(); }
/** * HADOOP-4466: * This test verifies the JavSerialization impl can write to * SequenceFiles. by virtue other SequenceFileOutputFormat is not * coupled to Writable types, if so, the job will fail. * */ public void testWriteToSequencefile() throws Exception { JobConf conf = new JobConf(TestJavaSerialization.class); conf.setJobName("JavaSerialization"); FileSystem fs = FileSystem.get(conf); cleanAndCreateInput(fs); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInputFormat(TextInputFormat.class); // test we can write to sequence files conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(String.class); conf.setOutputValueClass(Long.class); conf.setOutputKeyComparatorClass(JavaSerializationComparator.class); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(SumReducer.class); FileInputFormat.setInputPaths(conf, INPUT_DIR); FileOutputFormat.setOutputPath(conf, OUTPUT_DIR); JobClient.runJob(conf); Path[] outputFiles = FileUtil.stat2Paths( fs.listStatus(OUTPUT_DIR, new Utils.OutputFileUtils.OutputFilesFilter())); assertEquals(1, outputFiles.length); }
public void testMapReduceJob() throws Exception { JobConf conf = new JobConf(TestJavaSerialization.class); conf.setJobName("JavaSerialization"); FileSystem fs = FileSystem.get(conf); cleanAndCreateInput(fs); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(String.class); conf.setOutputValueClass(Long.class); conf.setOutputKeyComparatorClass(JavaSerializationComparator.class); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(SumReducer.class); conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME); FileInputFormat.setInputPaths(conf, INPUT_DIR); FileOutputFormat.setOutputPath(conf, OUTPUT_DIR); String inputFileContents = FileUtils.readFileToString(new File(INPUT_FILE.toUri().getPath())); assertTrue("Input file contents not as expected; contents are '" + inputFileContents + "', expected \"b a\n\" ", inputFileContents.equals("b a\n")); JobClient.runJob(conf); Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(OUTPUT_DIR, new Utils.OutputFileUtils.OutputFilesFilter())); assertEquals(1, outputFiles.length); InputStream is = fs.open(outputFiles[0]); String reduceOutput = org.apache.commons.io.IOUtils.toString(is); String[] lines = reduceOutput.split(System.getProperty("line.separator")); assertEquals("Unexpected output; received output '" + reduceOutput + "'", "a\t1", lines[0]); assertEquals("Unexpected output; received output '" + reduceOutput + "'", "b\t1", lines[1]); assertEquals("Reduce output has extra lines; output is '" + reduceOutput + "'", 2, lines.length); is.close(); }
protected void _testMOWithJavaSerialization(boolean withCounters) throws Exception { String input = "a\nb\nc\nd\ne\nc\nd\ne"; Configuration conf = createJobConf(); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input); job.setJobName("mo"); MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, Long.class, String.class); MultipleOutputs.setCountersEnabled(job, withCounters); job.setSortComparatorClass(JavaSerializationComparator.class); job.setMapOutputKeyClass(Long.class); job.setMapOutputValueClass(String.class); job.setOutputKeyClass(Long.class); job.setOutputValueClass(String.class); job.setMapperClass(MOJavaSerDeMap.class); job.setReducerClass(MOJavaSerDeReduce.class); job.waitForCompletion(true); // assert number of named output part files int namedOutputCount = 0; int valueBasedOutputCount = 0; FileSystem fs = OUT_DIR.getFileSystem(conf); FileStatus[] statuses = fs.listStatus(OUT_DIR); for (FileStatus status : statuses) { String fileName = status.getPath().getName(); if (fileName.equals("text-m-00000") || fileName.equals("text-m-00001") || fileName.equals("text-r-00000")) { namedOutputCount++; } else if (fileName.equals("a-r-00000") || fileName.equals("b-r-00000") || fileName.equals("c-r-00000") || fileName.equals("d-r-00000") || fileName.equals("e-r-00000")) { valueBasedOutputCount++; } } assertEquals(3, namedOutputCount); assertEquals(5, valueBasedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader( new InputStreamReader(fs.open( new Path(FileOutputFormat.getOutputPath(job), "text-r-00000")))); int count = 0; String line = reader.readLine(); while (line != null) { assertTrue(line.endsWith(TEXT)); line = reader.readLine(); count++; } reader.close(); assertFalse(count == 0); if (withCounters) { CounterGroup counters = job.getCounters().getGroup(MultipleOutputs.class.getName()); assertEquals(6, counters.size()); assertEquals(4, counters.findCounter(TEXT).getValue()); assertEquals(2, counters.findCounter("a").getValue()); assertEquals(2, counters.findCounter("b").getValue()); assertEquals(4, counters.findCounter("c").getValue()); assertEquals(4, counters.findCounter("d").getValue()); assertEquals(4, counters.findCounter("e").getValue()); } }