public void testEmptyOutput() throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // Do not write any output // do commit committer.commitTask(tContext); committer.commitJob(jContext); FileUtil.fullyDelete(new File(outDir.toString())); }
@Test public void testNoCommitAction() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(), taskAttemptContext.getTaskAttemptID().getJobID()); try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); committer.commitJob(jobContext); Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful"); //Test for idempotent commit committer.commitJob(jobContext); Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful"); } catch (IOException e) { LOG.error("Exception encountered ", e); Assert.fail("Commit failed"); } }
/** * Below method will be used to get the table block info * * @param job job context * @param absoluteTableIdentifier absolute table identifier * @param segmentId number of segment id * @return list of table block * @throws IOException */ private List<TableBlockInfo> getTableBlockInfo(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId) throws IOException { // List<FileStatus> fileStatusList = new LinkedList<FileStatus>(); List<TableBlockInfo> tableBlockInfoList = new ArrayList<TableBlockInfo>(); // getFileStatusOfSegments(job, new int[]{ segmentId }, fileStatusList); // get file location of all files of given segment JobContext newJob = new JobContextImpl(new Configuration(job.getConfiguration()), job.getJobID()); newJob.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, segmentId + ""); // identify table blocks for (InputSplit inputSplit : getSplitsInternal(newJob)) { CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit; BlockletInfos blockletInfos = new BlockletInfos(carbonInputSplit.getNumberOfBlocklets(), 0, carbonInputSplit.getNumberOfBlocklets()); tableBlockInfoList.add( new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), segmentId, carbonInputSplit.getLocations(), carbonInputSplit.getLength(), blockletInfos)); } return tableBlockInfoList; }
public HadoopElementIterator(final HadoopGraph graph) { try { this.graph = graph; final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration()); final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil.getReaderAsInputFormat(configuration); if (inputFormat instanceof FileInputFormat) { final Storage storage = FileSystemStorage.open(configuration); if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) return; // there is no input location and thus, no data (empty graph) if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).isPresent()) return; // there is no data at the input location (empty graph) configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get()); } final List<InputSplit> splits = inputFormat.getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1))); for (final InputSplit split : splits) { this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID()))); } } catch (final Exception e) { throw new IllegalStateException(e.getMessage(), e); } }
@Override public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException { configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits); JobContext jobContext = new JobContextImpl(configuration, new JobID()); jobContext.getCredentials().addAll(this.credentials); Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser()); if (currentUserCreds != null) { jobContext.getCredentials().addAll(currentUserCreds); } List<org.apache.hadoop.mapreduce.InputSplit> splits; try { splits = this.mapreduceInputFormat.getSplits(jobContext); } catch (InterruptedException e) { throw new IOException("Could not get Splits.", e); } HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()]; for (int i = 0; i < hadoopInputSplits.length; i++) { hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext); } return hadoopInputSplits; }
@Override public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException { configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits); JobContext jobContext = new JobContextImpl(configuration, new JobID()); List<InputSplit> splits; try { splits = this.hCatInputFormat.getSplits(jobContext); } catch (InterruptedException e) { throw new IOException("Could not get Splits.", e); } HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()]; for (int i = 0; i < hadoopInputSplits.length; i++){ hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext); } return hadoopInputSplits; }
public void testCommitter() throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeOutput(theRecordWriter, tContext); // do commit committer.commitTask(tContext); committer.commitJob(jContext); // validate output validateContent(outDir); FileUtil.fullyDelete(new File(outDir.toString())); }
public void testMapFileOutputCommitter() throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // write output MapFileOutputFormat theOutputFormat = new MapFileOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeMapFileOutput(theRecordWriter, tContext); // do commit committer.commitTask(tContext); committer.commitJob(jContext); // validate output validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir); FileUtil.fullyDelete(new File(outDir.toString())); }
@Before public void setup() throws Exception { Configuration conf = new Configuration(); input = ClassLoader.getSystemClassLoader().getResource("test.cram").getFile(); reference = ClassLoader.getSystemClassLoader().getResource("auxf.fa").toURI().toString(); String referenceIndex = ClassLoader.getSystemClassLoader().getResource("auxf.fa.fai") .toURI().toString(); conf.set("mapred.input.dir", "file://" + input); URI hdfsRef = clusterUri.resolve("/tmp/auxf.fa"); URI hdfsRefIndex = clusterUri.resolve("/tmp/auxf.fa.fai"); Files.copy(Paths.get(URI.create(reference)), Paths.get(hdfsRef)); Files.copy(Paths.get(URI.create(referenceIndex)), Paths.get(hdfsRefIndex)); conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, hdfsRef.toString()); taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class)); jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID()); }
@Test public void getSplits() throws Exception { S3MapReduceCpOptions options = getOptions(); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing( new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options); JobContext jobContext = new JobContextImpl(configuration, new JobID()); DynamicInputFormat<Text, CopyListingFileStatus> inputFormat = new DynamicInputFormat<>(); List<InputSplit> splits = inputFormat.getSplits(jobContext); int nFiles = 0; int taskId = 0; for (InputSplit split : splits) { RecordReader<Text, CopyListingFileStatus> recordReader = inputFormat.createRecordReader(split, null); StubContext stubContext = new StubContext(jobContext.getConfiguration(), recordReader, taskId); final TaskAttemptContext taskAttemptContext = stubContext.getContext(); recordReader.initialize(splits.get(0), taskAttemptContext); float previousProgressValue = 0f; while (recordReader.nextKeyValue()) { CopyListingFileStatus fileStatus = recordReader.getCurrentValue(); String source = fileStatus.getPath().toString(); assertTrue(expectedFilePaths.contains(source)); final float progress = recordReader.getProgress(); assertTrue(progress >= previousProgressValue); assertTrue(progress >= 0.0f); assertTrue(progress <= 1.0f); previousProgressValue = progress; ++nFiles; } assertTrue(recordReader.getProgress() == 1.0f); ++taskId; } Assert.assertEquals(expectedFilePaths.size(), nFiles); }
@SuppressWarnings("unchecked") public void testCommitter() throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeOutput(theRecordWriter, tContext); // do commit committer.commitTask(tContext); committer.commitJob(jContext); // validate output File expectedFile = new File(new Path(outDir, partFile).toString()); StringBuffer expectedOutput = new StringBuffer(); expectedOutput.append(key1).append('\t').append(val1).append("\n"); expectedOutput.append(val1).append("\n"); expectedOutput.append(val2).append("\n"); expectedOutput.append(key2).append("\n"); expectedOutput.append(key1).append("\n"); expectedOutput.append(key2).append('\t').append(val2).append("\n"); String output = UtilsForTests.slurp(expectedFile); assertEquals(output, expectedOutput.toString()); FileUtil.fullyDelete(new File(outDir.toString())); }
@SuppressWarnings("unchecked") public void testAbort() throws IOException, InterruptedException { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // do setup committer.setupJob(jContext); committer.setupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeOutput(theRecordWriter, tContext); // do abort committer.abortTask(tContext); File expectedFile = new File(new Path(committer.getWorkPath(), partFile) .toString()); assertFalse("task temp dir still exists", expectedFile.exists()); committer.abortJob(jContext, JobStatus.State.FAILED); expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME) .toString()); assertFalse("job temp dir still exists", expectedFile.exists()); assertEquals("Output directory not empty", 0, new File(outDir.toString()) .listFiles().length); FileUtil.fullyDelete(new File(outDir.toString())); }
private void testCommitterInternal(int version) throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeOutput(theRecordWriter, tContext); // do commit committer.commitTask(tContext); committer.commitJob(jContext); // validate output validateContent(outDir); FileUtil.fullyDelete(new File(outDir.toString())); }