/** * Run the test * * @throws IOException on error */ public static void runTests() throws IOException { config.setLong("io.bytes.per.checksum", bytesPerChecksum); JobConf job = new JobConf(config, NNBench.class); job.setJobName("NNBench-" + operation); FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME)); job.setInputFormat(SequenceFileInputFormat.class); // Explicitly set number of max map attempts to 1. job.setMaxMapAttempts(1); // Explicitly turn off speculative execution job.setSpeculativeExecution(false); job.setMapperClass(NNBenchMapper.class); job.setReducerClass(NNBenchReducer.class); FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks((int) numberOfReduces); JobClient.runJob(job); }
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException { final JobConf job = MapreduceTestingShim.getJobConf(mrCluster); job.setInputFormat(clazz); job.setOutputFormat(NullOutputFormat.class); job.setMapperClass(ExampleVerifier.class); job.setNumReduceTasks(0); LOG.debug("submitting job."); final RunningJob run = JobClient.runJob(job); assertTrue("job failed!", run.isSuccessful()); assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter()); assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter()); assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter()); assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter()); assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter()); assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter()); }
private void runIOTest( Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, Path outputDir) throws IOException { JobConf job = new JobConf(config, TestDFSIO.class); FileInputFormat.setInputPaths(job, getControlDir(config)); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(mapperClass); job.setReducerClass(AccumulatingReducer.class); FileOutputFormat.setOutputPath(job, outputDir); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); JobClient.runJob(job); }
private static void joinAs(String jointype, Class<? extends SimpleCheckerBase> c) throws Exception { final int srcs = 4; Configuration conf = new Configuration(); JobConf job = new JobConf(conf, c); Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype)); Path[] src = writeSimpleSrc(base, conf, srcs); job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype, SequenceFileInputFormat.class, src)); job.setInt("testdatamerge.sources", srcs); job.setInputFormat(CompositeInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(base, "out")); job.setMapperClass(c); job.setReducerClass(c); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); JobClient.runJob(job); base.getFileSystem(job).delete(base, true); }
public void testEmptyJoin() throws Exception { JobConf job = new JobConf(); Path base = cluster.getFileSystem().makeQualified(new Path("/empty")); Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") }; job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer", Fake_IF.class, src)); job.setInputFormat(CompositeInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(base, "out")); job.setMapperClass(IdentityMapper.class); job.setReducerClass(IdentityReducer.class); job.setOutputKeyClass(IncomparableKey.class); job.setOutputValueClass(NullWritable.class); JobClient.runJob(job); base.getFileSystem(job).delete(base, true); }
/** * When no input dir is specified, generate random data. */ protected static void confRandom(Job job) throws IOException { // from RandomWriter job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(RandomMapOutput.class); Configuration conf = job.getConfiguration(); final ClusterStatus cluster = new JobClient(conf).getClusterStatus(); int numMapsPerHost = conf.getInt(RandomTextWriter.MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { throw new IOException( "Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0"); } long totalBytesToWrite = conf.getLong(RandomTextWriter.TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); }
private void mrRun() throws Exception { FileSystem fs = FileSystem.get(getJobConf()); Path inputDir = new Path("input"); fs.mkdirs(inputDir); Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt"))); writer.write("hello"); writer.close(); Path outputDir = new Path("output", "output"); JobConf jobConf = new JobConf(getJobConf()); jobConf.setInt("mapred.map.tasks", 1); jobConf.setInt("mapred.map.max.attempts", 1); jobConf.setInt("mapred.reduce.max.attempts", 1); jobConf.set("mapred.input.dir", inputDir.toString()); jobConf.set("mapred.output.dir", outputDir.toString()); JobClient jobClient = new JobClient(jobConf); RunningJob runJob = jobClient.submitJob(jobConf); runJob.waitForCompletion(); assertTrue(runJob.isComplete()); assertTrue(runJob.isSuccessful()); }
/** * Increase the replication factor of _distcp_src_files to * sqrt(min(maxMapsOnCluster, numMaps)). This is to reduce the chance of * failing of distcp because of "not having a replication of _distcp_src_files * available for reading for some maps". */ private static void setReplication(Configuration conf, JobConf jobConf, Path srcfilelist, int numMaps) throws IOException { int numMaxMaps = new JobClient(jobConf).getClusterStatus().getMaxMapTasks(); short replication = (short) Math.ceil( Math.sqrt(Math.min(numMaxMaps, numMaps))); FileSystem fs = srcfilelist.getFileSystem(conf); FileStatus srcStatus = fs.getFileStatus(srcfilelist); if (srcStatus.getReplication() < replication) { if (!fs.setReplication(srcfilelist, replication)) { throw new IOException("Unable to increase the replication of file " + srcfilelist); } } }
@Override public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException { final JobClient client = new JobClient(new JobConf(jobCtxt.getConfiguration())); ClusterStatus stat = client.getClusterStatus(true); final long toGen = jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1); if (toGen < 0) { throw new IOException("Invalid/missing generation bytes: " + toGen); } final int nTrackers = stat.getTaskTrackers(); final long bytesPerTracker = toGen / nTrackers; final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers); final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*"); final Matcher m = trackerPattern.matcher(""); for (String tracker : stat.getActiveTrackerNames()) { m.reset(tracker); if (!m.find()) { System.err.println("Skipping node: " + tracker); continue; } final String name = m.group(1); splits.add(new GenSplit(bytesPerTracker, new String[] { name })); } return splits; }
@Override public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException { final JobConf jobConf = new JobConf(jobCtxt.getConfiguration()); final JobClient client = new JobClient(jobConf); ClusterStatus stat = client.getClusterStatus(true); int numTrackers = stat.getTaskTrackers(); final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1); // Total size of distributed cache files to be generated final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1); // Get the path of the special file String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST); if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) { throw new RuntimeException("Invalid metadata: #files (" + fileCount + "), total_size (" + totalSize + "), filelisturi (" + distCacheFileList + ")"); } Path sequenceFile = new Path(distCacheFileList); FileSystem fs = sequenceFile.getFileSystem(jobConf); FileStatus srcst = fs.getFileStatus(sequenceFile); // Consider the number of TTs * mapSlotsPerTracker as number of mappers. int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2); int numSplits = numTrackers * numMapSlotsPerTracker; List<InputSplit> splits = new ArrayList<InputSplit>(numSplits); LongWritable key = new LongWritable(); BytesWritable value = new BytesWritable(); // Average size of data to be generated by each map task final long targetSize = Math.max(totalSize / numSplits, DistributedCacheEmulator.AVG_BYTES_PER_MAP); long splitStartPosition = 0L; long splitEndPosition = 0L; long acc = 0L; long bytesRemaining = srcst.getLen(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, sequenceFile, jobConf); while (reader.next(key, value)) { // If adding this file would put this split past the target size, // cut the last split and put this file in the next split. if (acc + key.get() > targetSize && acc != 0) { long splitSize = splitEndPosition - splitStartPosition; splits.add(new FileSplit( sequenceFile, splitStartPosition, splitSize, (String[])null)); bytesRemaining -= splitSize; splitStartPosition = splitEndPosition; acc = 0L; } acc += key.get(); splitEndPosition = reader.getPosition(); } } finally { if (reader != null) { reader.close(); } } if (bytesRemaining != 0) { splits.add(new FileSplit( sequenceFile, splitStartPosition, bytesRemaining, (String[])null)); } return splits; }
/** * Runs a GridMix data-generation job. */ private static void runDataGenJob(Configuration conf, Path tempDir) throws IOException, ClassNotFoundException, InterruptedException { JobClient client = new JobClient(conf); // get the local job runner conf.setInt(MRJobConfig.NUM_MAPS, 1); Job job = Job.getInstance(conf); CompressionEmulationUtil.configure(job); job.setInputFormatClass(CustomInputFormat.class); // set the output path FileOutputFormat.setOutputPath(job, tempDir); // submit and wait for completion job.submit(); int ret = job.waitForCompletion(true) ? 0 : 1; assertEquals("Job Failed", 0, ret); }
/** * Submit/run a map/reduce job. * * @param job * @return true for success * @throws IOException */ public static boolean runJob(JobConf job) throws IOException { JobClient jc = new JobClient(job); boolean sucess = true; RunningJob running = null; try { running = jc.submitJob(job); JobID jobId = running.getID(); System.out.println("Job " + jobId + " is submitted"); while (!running.isComplete()) { System.out.println("Job " + jobId + " is still running."); try { Thread.sleep(60000); } catch (InterruptedException e) { } running = jc.getJob(jobId); } sucess = running.isSuccessful(); } finally { if (!sucess && (running != null)) { running.killJob(); } jc.close(); } return sucess; }
@Test @SuppressWarnings("deprecation") public void shoudBeValidMapReduceEvaluation() throws Exception { Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); try { jobConf.setJobName("process row task"); jobConf.setNumReduceTasks(1); TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY), ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class, jobConf); TableMapReduceUtil.initTableReduceJob(TABLE_NAME, ClassificatorRowReduce.class, jobConf); RunningJob job = JobClient.runJob(jobConf); assertTrue(job.isSuccessful()); } finally { if (jobConf != null) FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir"))); } }
@Test @SuppressWarnings("deprecation") public void shoudBeValidMapReduceWithPartitionerEvaluation() throws IOException { Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); try { jobConf.setJobName("process row task"); jobConf.setNumReduceTasks(2); TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY), ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class, jobConf); TableMapReduceUtil.initTableReduceJob(TABLE_NAME, ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class); RunningJob job = JobClient.runJob(jobConf); assertTrue(job.isSuccessful()); } finally { if (jobConf != null) FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir"))); } }
@Override protected void runJob(String jobName, Configuration c, List<Scan> scans) throws IOException, InterruptedException, ClassNotFoundException { JobConf job = new JobConf(TEST_UTIL.getConfiguration()); job.setJobName(jobName); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir); TableMapReduceUtil.addDependencyJars(job); job.setReducerClass(Reducer.class); job.setNumReduceTasks(1); // one to get final "first" and "last" key FileOutputFormat.setOutputPath(job, new Path(job.getJobName())); LOG.info("Started " + job.getJobName()); RunningJob runningJob = JobClient.runJob(job); runningJob.waitForCompletion(); assertTrue(runningJob.isSuccessful()); LOG.info("After map/reduce completion - job " + jobName); }
public ReadIopsCalculator(JobClient jobClient, DynamoDBClient dynamoDBClient, String tableName, int totalSegments, int localSegments) { this.jobConf = (JobConf) jobClient.getConf(); this.jobClient = jobClient; this.dynamoDBClient = dynamoDBClient; this.tableName = tableName; this.totalSegments = totalSegments; this.localSegments = localSegments; this.throughputPercent = Double.parseDouble(jobConf.get(DynamoDBConstants .THROUGHPUT_READ_PERCENT, DynamoDBConstants.DEFAULT_THROUGHPUT_PERCENTAGE)); log.info("Table name: " + tableName); log.info("Throughput percent: " + throughputPercent); }
@Override public void run(String[] args) throws Exception { Flags flags = new Flags(); flags.addWithDefaultValue( "tag_subject_data", "/media/work/datasets(secret)/douban/raw/tag_subject.dat", ""); flags.addWithDefaultValue( "subject_data", "/media/work/datasets(secret)/douban/raw/subject.dat", ""); flags.add("output"); flags.parseAndCheck(args); JobConf job = new JobConf(this.getClass()); job.setJobName("convert-douban-raw-to-posts"); MapReduceHelper.setAllOutputTypes(job, Text.class); MapReduceHelper.setMR( job, DoubanRawMapper.class, DoubanToPostReducer.class); job.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath( job, new Path(flags.getString("tag_subject_data"))); TextInputFormat.addInputPath( job, new Path(flags.getString("subject_data"))); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath( job, new Path(flags.getString("output"))); JobClient.runJob(job); }
public void merge(Path output, Path[] dbs, boolean normalize, boolean filter) throws Exception { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("LinkDb merge: starting at " + sdf.format(start)); JobConf job = createMergeJob(getConf(), output, normalize, filter); for (int i = 0; i < dbs.length; i++) { FileInputFormat.addInputPath(job, new Path(dbs[i], LinkDb.CURRENT_NAME)); } JobClient.runJob(job); FileSystem fs = FileSystem.get(getConf()); fs.mkdirs(output); fs.rename(FileOutputFormat.getOutputPath(job), new Path(output, LinkDb.CURRENT_NAME)); long end = System.currentTimeMillis(); LOG.info("LinkDb merge: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCountOldAPI.class); conf.setJobName("old wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
public int run(String[] argv) throws IOException { if (argv.length < 2) { System.out.println("ExternalMapReduce <input> <output>"); return -1; } Path outDir = new Path(argv[1]); Path input = new Path(argv[0]); JobConf testConf = new JobConf(getConf(), ExternalMapReduce.class); //try to load a class from libjar try { testConf.getClassByName("testjar.ClassWordCount"); } catch (ClassNotFoundException e) { System.out.println("Could not find class from libjar"); return -1; } testConf.setJobName("external job"); FileInputFormat.setInputPaths(testConf, input); FileOutputFormat.setOutputPath(testConf, outDir); testConf.setMapperClass(MapClass.class); testConf.setReducerClass(Reduce.class); testConf.setNumReduceTasks(1); JobClient.runJob(testConf); return 0; }
public void configure(String keySpec, int expect) throws Exception { Path testdir = new Path(TEST_DIR.getAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = getFileSystem(); fs.delete(testdir, true); conf.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(conf, inDir); FileOutputFormat.setOutputPath(conf, outDir); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(LongWritable.class); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class); conf.setKeyFieldComparatorOptions(keySpec); conf.setKeyFieldPartitionerOptions("-k1.1,1.1"); conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " "); conf.setMapperClass(InverseMapper.class); conf.setReducerClass(IdentityReducer.class); if (!fs.mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.toString()); } if (!fs.mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.toString()); } // set up input data in 2 files Path inFile = new Path(inDir, "part0"); FileOutputStream fos = new FileOutputStream(inFile.toString()); fos.write((line1 + "\n").getBytes()); fos.write((line2 + "\n").getBytes()); fos.close(); JobClient jc = new JobClient(conf); RunningJob r_job = jc.submitJob(conf); while (!r_job.isComplete()) { Thread.sleep(1000); } if (!r_job.isSuccessful()) { fail("Oops! The job broke due to an unexpected error"); } Path[] outputFiles = FileUtil.stat2Paths( getFileSystem().listStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter())); if (outputFiles.length > 0) { InputStream is = getFileSystem().open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); String line = reader.readLine(); //make sure we get what we expect as the first line, and also //that we have two lines if (expect == 1) { assertTrue(line.startsWith(line1)); } else if (expect == 2) { assertTrue(line.startsWith(line2)); } line = reader.readLine(); if (expect == 1) { assertTrue(line.startsWith(line2)); } else if (expect == 2) { assertTrue(line.startsWith(line1)); } reader.close(); } }
private void encryptedShuffleWithCerts(boolean useClientCerts) throws Exception { try { Configuration conf = new Configuration(); String keystoresDir = new File(BASEDIR).getAbsolutePath(); String sslConfsDir = KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class); KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf, useClientCerts); conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true); startCluster(conf); FileSystem fs = FileSystem.get(getJobConf()); Path inputDir = new Path("input"); fs.mkdirs(inputDir); Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt"))); writer.write("hello"); writer.close(); Path outputDir = new Path("output", "output"); JobConf jobConf = new JobConf(getJobConf()); jobConf.setInt("mapred.map.tasks", 1); jobConf.setInt("mapred.map.max.attempts", 1); jobConf.setInt("mapred.reduce.max.attempts", 1); jobConf.set("mapred.input.dir", inputDir.toString()); jobConf.set("mapred.output.dir", outputDir.toString()); JobClient jobClient = new JobClient(jobConf); RunningJob runJob = jobClient.submitJob(jobConf); runJob.waitForCompletion(); Assert.assertTrue(runJob.isComplete()); Assert.assertTrue(runJob.isSuccessful()); } finally { stopCluster(); } }
static boolean runJob(JobConf conf, Path inDir, Path outDir, int numMaps, int numReds) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(conf); if (fs.exists(outDir)) { fs.delete(outDir, true); } if (!fs.exists(inDir)) { fs.mkdirs(inDir); } String input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n"; for (int i = 0; i < numMaps; ++i) { DataOutputStream file = fs.create(new Path(inDir, "part-" + i)); file.writeBytes(input); file.close(); } DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf, fs); conf.setOutputCommitter(CustomOutputCommitter.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, inDir); FileOutputFormat.setOutputPath(conf, outDir); conf.setNumMapTasks(numMaps); conf.setNumReduceTasks(numReds); JobClient jobClient = new JobClient(conf); RunningJob job = jobClient.submitJob(conf); return jobClient.monitorAndPrintJob(conf, job); }
private void closeClient(JobClient client) { try { if (client != null) client.close(); } catch (Exception ignored) { // nothing we can do ignored.printStackTrace(); } }
/** * @return the job client of this job */ public JobClient getJobClient() { try { return new JobClient(super.getJob().getConfiguration()); } catch (IOException ioe) { return null; } }
/** * Driver to copy srcPath to destPath depending on required protocol. * @param conf configuration * @param args arguments */ static void copy(final Configuration conf, final Arguments args ) throws IOException { LOG.info("srcPaths=" + args.srcs); if (!args.dryrun || args.flags.contains(Options.UPDATE)) { LOG.info("destPath=" + args.dst); } JobConf job = createJobConf(conf); checkSrcPath(job, args.srcs); if (args.preservedAttributes != null) { job.set(PRESERVE_STATUS_LABEL, args.preservedAttributes); } if (args.mapredSslConf != null) { job.set("dfs.https.client.keystore.resource", args.mapredSslConf); } //Initialize the mapper try { if (setup(conf, job, args)) { JobClient.runJob(job); } if(!args.dryrun) { finalize(conf, job, args.dst, args.preservedAttributes); } } finally { if (!args.dryrun) { //delete tmp fullyDelete(job.get(TMP_DIR_LABEL), job); } //delete jobDirectory fullyDelete(job.get(JOB_DIR_LABEL), job); } }
/** * Calculate how many maps to run. * Number of maps is bounded by a minimum of the cumulative size of the * copy / (distcp.bytes.per.map, default BYTES_PER_MAP or -m on the * command line) and at most (distcp.max.map.tasks, default * MAX_MAPS_PER_NODE * nodes in the cluster). * @param totalBytes Count of total bytes for job * @param job The job to configure * @return Count of maps to run. */ private static int setMapCount(long totalBytes, JobConf job) throws IOException { int numMaps = (int)(totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP)); numMaps = Math.min(numMaps, job.getInt(MAX_MAPS_LABEL, MAX_MAPS_PER_NODE * new JobClient(job).getClusterStatus().getTaskTrackers())); numMaps = Math.max(numMaps, 1); job.setNumMapTasks(numMaps); return numMaps; }
public Statistics( final Configuration conf, int pollingInterval, CountDownLatch startFlag) throws IOException, InterruptedException { UserGroupInformation ugi = UserGroupInformation.getLoginUser(); this.cluster = ugi.doAs(new PrivilegedExceptionAction<JobClient>() { public JobClient run() throws IOException { return new JobClient(new JobConf(conf)); } }); this.jtPollingInterval = pollingInterval; maxJobCompletedInInterval = conf.getInt( MAX_JOBS_COMPLETED_IN_POLL_INTERVAL_KEY, 1); this.startFlag = startFlag; }
/** * Test {@link ClusterSummarizer}. */ @Test (timeout=20000) public void testClusterSummarizer() throws IOException { ClusterSummarizer cs = new ClusterSummarizer(); Configuration conf = new Configuration(); String jt = "test-jt:1234"; String nn = "test-nn:5678"; conf.set(JTConfig.JT_IPC_ADDRESS, jt); conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, nn); cs.start(conf); assertEquals("JT name mismatch", jt, cs.getJobTrackerInfo()); assertEquals("NN name mismatch", nn, cs.getNamenodeInfo()); ClusterStats cStats = ClusterStats.getClusterStats(); conf.set(JTConfig.JT_IPC_ADDRESS, "local"); conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "local"); JobClient jc = new JobClient(conf); cStats.setClusterMetric(jc.getClusterStatus()); cs.update(cStats); // test assertEquals("Cluster summary test failed!", 1, cs.getMaxMapTasks()); assertEquals("Cluster summary test failed!", 1, cs.getMaxReduceTasks()); assertEquals("Cluster summary test failed!", 1, cs.getNumActiveTrackers()); assertEquals("Cluster summary test failed!", 0, cs.getNumBlacklistedTrackers()); }
public int run(final String[] args) throws Exception { // Make sure there are at least 3 parameters if (args.length < 3) { System.err.println("ERROR: Wrong number of parameters: " + args.length); return printUsage(); } JobClient.runJob(createSubmittableJob(args)); return 0; }