private void writeBadOutput(TaskAttempt attempt, Configuration conf) throws Exception { TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID())); TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat .getRecordWriter(tContext); NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key2, val2); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val2); theRecordWriter.write(nullWritable, val1); theRecordWriter.write(key1, nullWritable); theRecordWriter.write(key2, null); theRecordWriter.write(null, null); theRecordWriter.write(key1, val1); } finally { theRecordWriter.close(tContext); } OutputFormat outputFormat = ReflectionUtils.newInstance( tContext.getOutputFormatClass(), conf); OutputCommitter committer = outputFormat.getOutputCommitter(tContext); committer.commitTask(tContext); }
private void writeOutput(TaskAttempt attempt, Configuration conf) throws Exception { TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID())); TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat .getRecordWriter(tContext); NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(tContext); } OutputFormat outputFormat = ReflectionUtils.newInstance( tContext.getOutputFormatClass(), conf); OutputCommitter committer = outputFormat.getOutputCommitter(tContext); committer.commitTask(tContext); }
public void testEmptyOutput() throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // Do not write any output // do commit committer.commitTask(tContext); committer.commitJob(jContext); FileUtil.fullyDelete(new File(outDir.toString())); }
/** * Tests the class loader set by * {@link Configuration#setClassLoader(ClassLoader)} * is inherited by any {@link WrappedRecordReader}s created by * {@link CompositeRecordReader} */ public void testClassLoader() throws Exception { Configuration conf = new Configuration(); Fake_ClassLoader classLoader = new Fake_ClassLoader(); conf.setClassLoader(classLoader); assertTrue(conf.getClassLoader() instanceof Fake_ClassLoader); FileSystem fs = FileSystem.get(conf); Path testdir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(fs); Path base = new Path(testdir, "/empty"); Path[] src = { new Path(base, "i0"), new Path("i1"), new Path("i2") }; conf.set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose("outer", IF_ClassLoaderChecker.class, src)); CompositeInputFormat<NullWritable> inputFormat = new CompositeInputFormat<NullWritable>(); // create dummy TaskAttemptID TaskAttemptID tid = new TaskAttemptID("jt", 1, TaskType.MAP, 0, 0); conf.set(MRJobConfig.TASK_ATTEMPT_ID, tid.toString()); inputFormat.createRecordReader (inputFormat.getSplits(Job.getInstance(conf)).get(0), new TaskAttemptContextImpl(conf, tid)); }
@Test public void testReinit() throws Exception { // Test that a split containing multiple files works correctly, // with the child RecordReader getting its initialize() method // called a second time. TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); Configuration conf = new Configuration(); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskId); // This will create a CombineFileRecordReader that itself contains a // DummyRecordReader. InputFormat inputFormat = new ChildRRInputFormat(); Path [] files = { new Path("file1"), new Path("file2") }; long [] lengths = { 1, 1 }; CombineFileSplit split = new CombineFileSplit(files, lengths); RecordReader rr = inputFormat.createRecordReader(split, context); assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader); // first initialize() call comes from MapTask. We'll do it here. rr.initialize(split, context); // First value is first filename. assertTrue(rr.nextKeyValue()); assertEquals("file1", rr.getCurrentValue().toString()); // The inner RR will return false, because it only emits one (k, v) pair. // But there's another sub-split to process. This returns true to us. assertTrue(rr.nextKeyValue()); // And the 2nd rr will have its initialize method called correctly. assertEquals("file2", rr.getCurrentValue().toString()); // But after both child RR's have returned their singleton (k, v), this // should also return false. assertFalse(rr.nextKeyValue()); }
private org.apache.hadoop.mapreduce.OutputCommitter createOutputCommitter(boolean newApiCommitter, JobID jobId, Configuration conf) throws Exception { org.apache.hadoop.mapreduce.OutputCommitter committer = null; LOG.info("OutputCommitter set in config " + conf.get("mapred.output.committer.class")); if (newApiCommitter) { org.apache.hadoop.mapreduce.TaskID taskId = new org.apache.hadoop.mapreduce.TaskID(jobId, TaskType.MAP, 0); org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptID = new org.apache.hadoop.mapreduce.TaskAttemptID(taskId, 0); org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = new TaskAttemptContextImpl(conf, taskAttemptID); OutputFormat outputFormat = ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), conf); committer = outputFormat.getOutputCommitter(taskContext); } else { committer = ReflectionUtils.newInstance(conf.getClass( "mapred.output.committer.class", FileOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class), conf); } LOG.info("OutputCommitter is " + committer.getClass().getName()); return committer; }
/** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(FileSystem filesystem, JobConf job, String name, Progressable progress) throws IOException { org.apache.hadoop.mapreduce.RecordWriter<K, V> w = super.getRecordWriter( new TaskAttemptContextImpl(job, TaskAttemptID.forName(job.get(MRJobConfig.TASK_ATTEMPT_ID)))); org.apache.hadoop.mapreduce.lib.db.DBOutputFormat.DBRecordWriter writer = (org.apache.hadoop.mapreduce.lib.db.DBOutputFormat.DBRecordWriter) w; try { return new DBRecordWriter(writer.getConnection(), writer.getStatement()); } catch(SQLException se) { throw new IOException(se); } }
public void testInvalidVersionNumber() throws IOException { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 3); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); try { new FileOutputCommitter(outDir, tContext); fail("should've thrown an exception!"); } catch (IOException e) { //test passed } }
private static SortedSet<byte[]> readFileToSearch(final Configuration conf, final FileSystem fs, final LocatedFileStatus keyFileStatus) throws IOException, InterruptedException { SortedSet<byte []> result = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR); // Return entries that are flagged Counts.UNDEFINED in the value. Return the row. This is // what is missing. TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); try (SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader rr = new SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader()) { InputSplit is = new FileSplit(keyFileStatus.getPath(), 0, keyFileStatus.getLen(), new String [] {}); rr.initialize(is, context); while (rr.nextKeyValue()) { rr.getCurrentKey(); BytesWritable bw = rr.getCurrentValue(); if (Verify.VerifyReducer.whichType(bw.getBytes()) == Verify.Counts.UNDEFINED) { byte[] key = new byte[rr.getCurrentKey().getLength()]; System.arraycopy(rr.getCurrentKey().getBytes(), 0, key, 0, rr.getCurrentKey() .getLength()); result.add(key); } } } return result; }
@Test public void readBitcoinTransactionInputFormatMultiBlock() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="multiblock.blk"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); BitcoinTransactionFileInputFormat format = new BitcoinTransactionFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for multi block"); RecordReader<BytesWritable, BitcoinTransaction> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); int transactCount=0; while (reader.nextKeyValue()) { transactCount++; } assertEquals( 346, transactCount,"Multiblock must contain exactly 1+2+343=346 transactions"); reader.close(); }
@Test public void readEthereumBlockInputFormatBlock1() throws IOException, EthereumBlockReadException, ParseException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="eth1.bin"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for block 1"); RecordReader<BytesWritable, EthereumBlock> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); BytesWritable key = new BytesWritable(); EthereumBlock block = new EthereumBlock(); assertTrue( reader.nextKeyValue(),"Input Split for block 1 contains at least one block"); key=reader.getCurrentKey(); block=reader.getCurrentValue(); assertEquals( 0, block.getEthereumTransactions().size(),"Block 1 must have 0 transactions"); assertFalse( reader.nextKeyValue(),"No further blocks in block 1"); reader.close(); }
@Test public void readEthereumBlockInputFormatBlock1346406Bzip2Compressed() throws IOException, EthereumBlockReadException, ParseException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="eth1346406.bin.bz2"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for block 1346406"); RecordReader<BytesWritable, EthereumBlock> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); BytesWritable key = new BytesWritable(); EthereumBlock block = new EthereumBlock(); assertTrue( reader.nextKeyValue(),"Input Split for block 1346406 contains at least one block"); key=reader.getCurrentKey(); block=reader.getCurrentValue(); assertEquals( 6, block.getEthereumTransactions().size(),"Block 1346406 must have 6 transactions"); assertFalse( reader.nextKeyValue(),"No further blocks in block 1346406"); reader.close(); }
@Test public void readBitcoinRawBlockInputFormatBlockVersion2() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="version2.blk"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for block version 2"); RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); BytesWritable key = new BytesWritable(); BytesWritable block = new BytesWritable(); assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block"); block=reader.getCurrentValue(); assertEquals( 191198, block.getLength(),"Random block version 2 must have size of 191.198 bytes"); assertFalse( reader.nextKeyValue(),"No further blocks in block version 2"); reader.close(); }
public HadoopElementIterator(final HadoopGraph graph) { try { this.graph = graph; final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration()); final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil.getReaderAsInputFormat(configuration); if (inputFormat instanceof FileInputFormat) { final Storage storage = FileSystemStorage.open(configuration); if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) return; // there is no input location and thus, no data (empty graph) if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).isPresent()) return; // there is no data at the input location (empty graph) configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get()); } final List<InputSplit> splits = inputFormat.getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1))); for (final InputSplit split : splits) { this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID()))); } } catch (final Exception e) { throw new IllegalStateException(e.getMessage(), e); } }
@Test public void readBitcoinTransactionInputFormatBzip2Compressed() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); Job job = Job.getInstance(conf); CompressionCodec bzip2 = new BZip2Codec(); ReflectionUtils.setConf(bzip2, conf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="version4comp.blk.bz2"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); FileInputFormat.setInputPaths(job, file); BitcoinTransactionFileInputFormat format = new BitcoinTransactionFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for compressed block"); RecordReader<BytesWritable, BitcoinTransaction> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); int transactCount=0; while (reader.nextKeyValue()) { transactCount++; } assertEquals( 936, transactCount,"Compressed block must have at least 936 transactions"); reader.close(); }
@Override public void open(HadoopInputSplit split) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { TaskAttemptContext context = new TaskAttemptContextImpl(configuration, new TaskAttemptID()); try { this.recordReader = this.mapreduceInputFormat .createRecordReader(split.getHadoopInputSplit(), context); this.recordReader.initialize(split.getHadoopInputSplit(), context); } catch (InterruptedException e) { throw new IOException("Could not create RecordReader.", e); } finally { this.fetched = false; } } }
@Test public void readBitcoinTransactionInputFormatBlockVersion3() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="version3.blk"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); BitcoinTransactionFileInputFormat format = new BitcoinTransactionFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for block version 3"); RecordReader<BytesWritable, BitcoinTransaction> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); int transactCount=0; while (reader.nextKeyValue()) { transactCount++; } assertEquals( 1645, transactCount,"Block version 3 must contain exactly 1645 transactions"); reader.close(); }
@Test public void readBitcoinBlockInputFormatBlockVersion4() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="version4.blk"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); BitcoinBlockFileInputFormat format = new BitcoinBlockFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for block version 4"); RecordReader<BytesWritable, BitcoinBlock> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); BytesWritable key = new BytesWritable(); BitcoinBlock block = new BitcoinBlock(); assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block"); block=reader.getCurrentValue(); assertEquals( 936, block.getTransactions().size(),"Random block version 4 must contain exactly 936 transactions"); assertFalse( reader.nextKeyValue(),"No further blocks in block version 4"); reader.close(); }
@Test public void readBitcoinTransactionInputFormatBlockVersion4() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="version4.blk"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); BitcoinTransactionFileInputFormat format = new BitcoinTransactionFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for block version 4"); RecordReader<BytesWritable, BitcoinTransaction> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); int transactCount=0; while (reader.nextKeyValue()) { transactCount++; } assertEquals( 936, transactCount,"Block version 4 must contain exactly 936 transactions"); reader.close(); }
@Test public void readBitcoinRawBlockInputFormatGenesisBlock() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="genesis.blk"; String fileNameGenesis=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameGenesis); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for genesis block"); RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); BytesWritable genesisKey = new BytesWritable(); BytesWritable genesisBlock = new BytesWritable(); assertTrue( reader.nextKeyValue(),"Input Split for genesis block contains at least one block"); genesisKey=reader.getCurrentKey(); genesisBlock=reader.getCurrentValue(); assertEquals( 293, genesisBlock.getLength(),"Genesis Block must have size of 293"); assertFalse( reader.nextKeyValue(),"No further blocks in genesis Block"); reader.close(); }
@SuppressWarnings({"rawtypes", "unchecked"}) @Test (timeout=10000) public void testLoadMapper() throws Exception { Configuration conf = new Configuration(); conf.setInt(JobContext.NUM_REDUCES, 2); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); TaskAttemptID taskId = new TaskAttemptID(); RecordReader<NullWritable, GridmixRecord> reader = new FakeRecordReader(); LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter(); OutputCommitter committer = new CustomOutputCommitter(); StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter(); LoadSplit split = getLoadSplit(); MapContext<NullWritable, GridmixRecord, GridmixKey, GridmixRecord> mapContext = new MapContextImpl<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>( conf, taskId, reader, writer, committer, reporter, split); // context Context ctx = new WrappedMapper<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>() .getMapContext(mapContext); reader.initialize(split, ctx); ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); CompressionEmulationUtil.setCompressionEmulationEnabled( ctx.getConfiguration(), true); LoadJob.LoadMapper mapper = new LoadJob.LoadMapper(); // setup, map, clean mapper.run(ctx); Map<GridmixKey, GridmixRecord> data = writer.getData(); // check result assertEquals(2, data.size()); }
@Test public void readBitcoinTransactionInputFormatGenesisBlock() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="genesis.blk"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); BitcoinTransactionFileInputFormat format = new BitcoinTransactionFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for genesis block"); RecordReader<BytesWritable, BitcoinTransaction> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); int transactCount=0; while (reader.nextKeyValue()) { transactCount++; } assertEquals( 1, transactCount,"Genesis Block must contain exactly one transactions"); reader.close(); }
@Test public void readExcelInputFormatExcel2013SingleSheetEncryptedNegative() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="excel2013encrypt.xlsx"; String fileNameSpreadSheet=classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47","de"); // for decryption simply set the password conf.set("hadoopoffice.read.security.crypt.password","test2"); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals( 1, splits.size(),"Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); InterruptedException ex = assertThrows(InterruptedException.class, ()->reader.initialize(splits.get(0),context),"Exception is thrown in case of wrong password"); }
@Test public void readExcelInputFormatExcel2003SingleSheetEncryptedNegative() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="excel2003encrypt.xls"; String fileNameSpreadSheet=classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47","de"); // for decryption simply set the password conf.set("hadoopoffice.read.security.crypt.password","test2"); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals( 1, splits.size(),"Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); InterruptedException ex = assertThrows(InterruptedException.class, ()->reader.initialize(splits.get(0),context),"Exception is thrown in case of wrong password"); }
@Test public void readBitcoinBlockInputFormatGenesisBlock() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="genesis.blk"; String fileNameGenesis=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameGenesis); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); BitcoinBlockFileInputFormat format = new BitcoinBlockFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for genesis block"); RecordReader<BytesWritable, BitcoinBlock> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); BytesWritable genesisKey = new BytesWritable(); BitcoinBlock genesisBlock = new BitcoinBlock(); assertTrue( reader.nextKeyValue(),"Input Split for genesis block contains at least one block"); genesisBlock=reader.getCurrentValue(); assertEquals( 1, genesisBlock.getTransactions().size(),"Genesis Block must contain exactly one transaction"); assertFalse( reader.nextKeyValue(),"No further blocks in genesis Block"); reader.close(); }
@Test public void readBitcoinRawBlockInputFormatGzipCompressed() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); Job job = Job.getInstance(conf); CompressionCodec gzip = new GzipCodec(); ReflectionUtils.setConf(gzip, conf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="version4comp.blk.gz"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); FileInputFormat.setInputPaths(job, file); BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for compressed block"); RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); BytesWritable key = new BytesWritable(); BytesWritable block = new BytesWritable(); assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block"); block=reader.getCurrentValue(); assertEquals( 998039, block.getLength(),"Compressed block must have a size of 998.039 bytes"); assertFalse( reader.nextKeyValue(),"No further blocks in compressed block"); reader.close(); }
@Test public void readBitcoinRawBlockInputFormatBzip2Compressed() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); Job job = Job.getInstance(conf); CompressionCodec bzip2 = new BZip2Codec(); ReflectionUtils.setConf(bzip2, conf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="version4comp.blk.bz2"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); FileInputFormat.setInputPaths(job, file); BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for compressed block"); RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); BytesWritable key = new BytesWritable(); BytesWritable block = new BytesWritable(); assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block"); block=reader.getCurrentValue(); assertEquals( 998039, block.getLength(),"Compressed block must have a size of 998.039 bytes"); assertFalse( reader.nextKeyValue(),"No further blocks in compressed block"); reader.close(); }
@Test public void readExcelInputFormatExcel2003SingleSheetEncryptedPositiveLowFootprint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="excel2003encrypt.xls"; String fileNameSpreadSheet=classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47","de"); // low footprint conf.set("hadoopoffice.read.lowFootprint", "true"); // for decryption simply set the password conf.set("hadoopoffice.read.security.crypt.password","test"); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals( 1, splits.size(),"Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue( reader.nextKeyValue(),"Input Split for Excel file contains row 1"); spreadSheetKey=reader.getCurrentKey(); spreadSheetValue=reader.getCurrentValue(); assertEquals( "[excel2003encrypt.xls]Sheet1!A1", spreadSheetKey.toString(),"Input Split for Excel file has keyname == \"[excel2003encrypt.xls]Sheet1!A1\""); assertEquals( 3, spreadSheetValue.get().length,"Input Split for Excel file contains row 1 with 3 columns"); assertEquals( "test1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals( "Sheet1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getSheetName(),"Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\""); assertEquals( "A1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getAddress(),"Input Split for Excel file contains row 1 with cell 1 address == \"A1\""); assertEquals( "test2", ((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals( "test3", ((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 3 == \"test3\""); }
@Test public void readExcelInputFormatExcel2003SingleSheetEncryptedNegativeLowFootprint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="excel2003encrypt.xls"; String fileNameSpreadSheet=classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47","de"); // low footprint conf.set("hadoopoffice.read.lowFootprint", "true"); // for decryption simply set the password conf.set("hadoopoffice.read.security.crypt.password","test2"); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals( 1, splits.size(),"Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); InterruptedException ex = assertThrows(InterruptedException.class, ()->reader.initialize(splits.get(0),context),"Exception is thrown in case of wrong password"); }
@Test public void readExcelInputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="excel2013encrypt.xlsx"; String fileNameSpreadSheet=classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47","de"); // low footprint conf.set("hadoopoffice.read.lowFootprint", "true"); // for decryption simply set the password conf.set("hadoopoffice.read.security.crypt.password","test2"); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals( 1, splits.size(),"Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); InterruptedException ex = assertThrows(InterruptedException.class, ()->reader.initialize(splits.get(0),context),"Exception is thrown in case of wrong password"); }
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException{ super.checkOutputSpecs(job); // creating dummy TaskAttemptID TaskAttemptID tid = new TaskAttemptID("jt", 1, TaskType.JOB_SETUP, 0, 0); getOutputCommitter(new TaskAttemptContextImpl(job.getConfiguration(), tid)). setupJob(job); }
@SuppressWarnings("unchecked") public void testCommitter() throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeOutput(theRecordWriter, tContext); // do commit committer.commitTask(tContext); committer.commitJob(jContext); // validate output File expectedFile = new File(new Path(outDir, partFile).toString()); StringBuffer expectedOutput = new StringBuffer(); expectedOutput.append(key1).append('\t').append(val1).append("\n"); expectedOutput.append(val1).append("\n"); expectedOutput.append(val2).append("\n"); expectedOutput.append(key2).append("\n"); expectedOutput.append(key1).append("\n"); expectedOutput.append(key2).append('\t').append(val2).append("\n"); String output = UtilsForTests.slurp(expectedFile); assertEquals(output, expectedOutput.toString()); FileUtil.fullyDelete(new File(outDir.toString())); }
@SuppressWarnings("unchecked") public void testAbort() throws IOException, InterruptedException { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // do setup committer.setupJob(jContext); committer.setupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeOutput(theRecordWriter, tContext); // do abort committer.abortTask(tContext); File expectedFile = new File(new Path(committer.getWorkPath(), partFile) .toString()); assertFalse("task temp dir still exists", expectedFile.exists()); committer.abortJob(jContext, JobStatus.State.FAILED); expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME) .toString()); assertFalse("job temp dir still exists", expectedFile.exists()); assertEquals("Output directory not empty", 0, new File(outDir.toString()) .listFiles().length); FileUtil.fullyDelete(new File(outDir.toString())); }
@Test public void testRecordReaderInit() throws InterruptedException, IOException { // Test that we properly initialize the child recordreader when // CombineFileInputFormat and CombineFileRecordReader are used. TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); Configuration conf1 = new Configuration(); conf1.set(DUMMY_KEY, "STATE1"); TaskAttemptContext context1 = new TaskAttemptContextImpl(conf1, taskId); // This will create a CombineFileRecordReader that itself contains a // DummyRecordReader. InputFormat inputFormat = new ChildRRInputFormat(); Path [] files = { new Path("file1") }; long [] lengths = { 1 }; CombineFileSplit split = new CombineFileSplit(files, lengths); RecordReader rr = inputFormat.createRecordReader(split, context1); assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader); // Verify that the initial configuration is the one being used. // Right after construction the dummy key should have value "STATE1" assertEquals("Invalid initial dummy key value", "STATE1", rr.getCurrentKey().toString()); // Switch the active context for the RecordReader... Configuration conf2 = new Configuration(); conf2.set(DUMMY_KEY, "STATE2"); TaskAttemptContext context2 = new TaskAttemptContextImpl(conf2, taskId); rr.initialize(split, context2); // And verify that the new context is updated into the child record reader. assertEquals("Invalid secondary dummy key value", "STATE2", rr.getCurrentKey().toString()); }