/** * test methods run end execute of DistCp class. silple copy file * @throws Exception */ @Test public void testCleanup() throws Exception { Configuration conf = getConf(); Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf), conf); stagingDir.getFileSystem(conf).mkdirs(stagingDir); Path soure = createFile("tmp.txt"); Path target = createFile("target.txt"); DistCp distcp = new DistCp(conf, null); String[] arg = { soure.toString(), target.toString() }; distcp.run(arg); Assert.assertTrue(fs.exists(target)); }
/** * test main method of DistCp. Method should to call System.exit(). * */ @Test public void testCleanupTestViaToolRunner() throws IOException, InterruptedException { Configuration conf = getConf(); Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf), conf); stagingDir.getFileSystem(conf).mkdirs(stagingDir); Path soure = createFile("tmp.txt"); Path target = createFile("target.txt"); try { String[] arg = {target.toString(),soure.toString()}; DistCp.main(arg); Assert.fail(); } catch (ExitException t) { Assert.assertTrue(fs.exists(target)); Assert.assertEquals(t.status, 0); Assert.assertEquals( stagingDir.getFileSystem(conf).listStatus(stagingDir).length, 0); } }
@Test(timeout=100000) public void testCleanup() { try { Path sourcePath = new Path("noscheme:///file"); List<Path> sources = new ArrayList<Path>(); sources.add(sourcePath); DistCpOptions options = new DistCpOptions(sources, target); Configuration conf = getConf(); Path stagingDir = JobSubmissionFiles.getStagingDir( new Cluster(conf), conf); stagingDir.getFileSystem(conf).mkdirs(stagingDir); try { new DistCp(conf, options).execute(); } catch (Throwable t) { Assert.assertEquals(stagingDir.getFileSystem(conf). listStatus(stagingDir).length, 0); } } catch (Exception e) { LOG.error("Exception encountered ", e); Assert.fail("testCleanup failed " + e.getMessage()); } }
private void uploadJobFiles(JobID id, InputSplit[] splits, Path jobSubmitDir, UserGroupInformation ugi, final JobConf conf) throws Exception { final Path confLocation = JobSubmissionFiles.getJobConfPath(jobSubmitDir); FileSystem fs = ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { public FileSystem run() throws IOException { return confLocation.getFileSystem(conf); } }); JobSplitWriter.createSplitFiles(jobSubmitDir, conf, fs, splits); FsPermission perm = new FsPermission((short)0700); // localize conf DataOutputStream confOut = FileSystem.create(fs, confLocation, perm); conf.writeXml(confOut); confOut.close(); }
@Test public void testCleanup() { try { Path sourcePath = new Path("noscheme:///file"); List<Path> sources = new ArrayList<Path>(); sources.add(sourcePath); DistCpOptions options = new DistCpOptions(sources, target); Configuration conf = getConf(); Path stagingDir = JobSubmissionFiles.getStagingDir( new Cluster(conf), conf); stagingDir.getFileSystem(conf).mkdirs(stagingDir); try { new DistCp(conf, options).execute(); } catch (Throwable t) { Assert.assertEquals(stagingDir.getFileSystem(conf). listStatus(stagingDir).length, 0); } } catch (Exception e) { LOG.error("Exception encountered ", e); Assert.fail("testCleanup failed " + e.getMessage()); } }
/** * Generate new-api mapreduce InputFormat splits * @param jobContext JobContext required by InputFormat * @param inputSplitDir Directory in which to generate splits information * * @return InputSplitInfo containing the split files' information and the * location hints for each split generated to be used to determining parallelism of * the map stage. * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static InputSplitInfoDisk writeNewSplits(JobContext jobContext, Path inputSplitDir) throws IOException, InterruptedException, ClassNotFoundException { org.apache.hadoop.mapreduce.InputSplit[] splits = generateNewSplits(jobContext, null, 0); Configuration conf = jobContext.getConfiguration(); JobSplitWriter.createSplitFiles(inputSplitDir, conf, inputSplitDir.getFileSystem(conf), splits); List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length); for (int i = 0; i < splits.length; ++i) { locationHints.add( new TaskLocationHint(new HashSet<String>( Arrays.asList(splits[i].getLocations())), null)); } return new InputSplitInfoDisk( JobSubmissionFiles.getJobSplitFile(inputSplitDir), JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints, jobContext.getCredentials()); }
/** * Generate old-api mapred InputFormat splits * @param jobConf JobConf required by InputFormat class * @param inputSplitDir Directory in which to generate splits information * * @return InputSplitInfo containing the split files' information and the * number of splits generated to be used to determining parallelism of * the map stage. * * @throws IOException */ private static InputSplitInfoDisk writeOldSplits(JobConf jobConf, Path inputSplitDir) throws IOException { org.apache.hadoop.mapred.InputSplit[] splits = generateOldSplits(jobConf, null, 0); JobSplitWriter.createSplitFiles(inputSplitDir, jobConf, inputSplitDir.getFileSystem(jobConf), splits); List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length); for (int i = 0; i < splits.length; ++i) { locationHints.add( new TaskLocationHint(new HashSet<String>( Arrays.asList(splits[i].getLocations())), null)); } return new InputSplitInfoDisk( JobSubmissionFiles.getJobSplitFile(inputSplitDir), JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints, jobConf.getCredentials()); }
public static void hackHadoopStagingOnWin() { // do the assignment only on Windows systems if (System.getProperty("os.name").toLowerCase().startsWith("win")) { // 0655 = -rwxr-xr-x JobSubmissionFiles.JOB_DIR_PERMISSION.fromShort((short) 0650); JobSubmissionFiles.JOB_FILE_PERMISSION.fromShort((short) 0650); if (trackerDistributedCacheManagerClass != null) { // handle jar permissions as well Field field = findField(trackerDistributedCacheManagerClass, "PUBLIC_CACHE_OBJECT_PERM"); makeAccessible(field); try { FsPermission perm = (FsPermission) field.get(null); perm.fromShort((short) 0650); } catch (IllegalAccessException e) { throw new RuntimeException("Error while trying to set permission on field: " + field, e); }; } } }
/** * Generate old-api mapred InputFormat splits * @param jobConf JobConf required by InputFormat class * @param inputSplitDir Directory in which to generate splits information * * @return InputSplitInfo containing the split files' information and the * number of splits generated to be used to determining parallelism of * the map stage. * * @throws IOException */ private static InputSplitInfoDisk writeOldSplits(JobConf jobConf, Path inputSplitDir) throws IOException { org.apache.hadoop.mapred.InputSplit[] splits = generateOldSplits(jobConf, false, true, 0); JobSplitWriter.createSplitFiles(inputSplitDir, jobConf, inputSplitDir.getFileSystem(jobConf), splits); List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length); for (int i = 0; i < splits.length; ++i) { locationHints.add( TaskLocationHint.createTaskLocationHint(new HashSet<String>( Arrays.asList(splits[i].getLocations())), null) ); } return new InputSplitInfoDisk( JobSubmissionFiles.getJobSplitFile(inputSplitDir), JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints, jobConf.getCredentials()); }
private void writeConf(Configuration conf, Path jobFile) throws IOException { // Write job file to JobTracker's fs FSDataOutputStream out = FileSystem.create(jtFs, jobFile, new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION)); try { conf.writeXml(out); } finally { out.close(); } }
/** * Create a default working folder for the job, under the job staging directory * * @return Returns the working folder information * @throws Exception - EXception if any */ private Path createMetaFolderPath() throws Exception { Configuration configuration = getConf(); Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(configuration), configuration); Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt())); LOG.debug("Meta folder location: {}", metaFolderPath); configuration.set(S3MapReduceCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString()); return metaFolderPath; }
public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo( JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir) throws IOException { long maxMetaInfoSize = conf.getLong(MRJobConfig.SPLIT_METAINFO_MAXSIZE, MRJobConfig.DEFAULT_SPLIT_METAINFO_MAXSIZE); Path metaSplitFile = JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir); String jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDir).toString(); FileStatus fStatus = fs.getFileStatus(metaSplitFile); if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) { throw new IOException("Split metadata size exceeded " + maxMetaInfoSize +". Aborting job " + jobId); } FSDataInputStream in = fs.open(metaSplitFile); byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length]; in.readFully(header); if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) { throw new IOException("Invalid header on split file"); } int vers = WritableUtils.readVInt(in); if (vers != JobSplit.META_SPLIT_VERSION) { in.close(); throw new IOException("Unsupported split version " + vers); } int numSplits = WritableUtils.readVInt(in); //TODO: check for insane values JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = new JobSplit.TaskSplitMetaInfo[numSplits]; for (int i = 0; i < numSplits; i++) { JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo(); splitMetaInfo.readFields(in); JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex( jobSplitFile, splitMetaInfo.getStartOffset()); allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex, splitMetaInfo.getLocations(), splitMetaInfo.getInputDataLength()); } in.close(); return allSplitMetaInfo; }
public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, Configuration conf, FileSystem fs, T[] splits) throws IOException, InterruptedException { FSDataOutputStream out = createFile(fs, JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf); SplitMetaInfo[] info = writeNewSplits(conf, splits, out); out.close(); writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion, info); }