@Before @SuppressWarnings("unchecked") // mocked generics public void setup() { LOG.info(">>>> " + name.getMethodName()); job = new JobConf(); job.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, false); jobWithRetry = new JobConf(); jobWithRetry.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, true); id = TaskAttemptID.forName("attempt_0_1_r_1_1"); ss = mock(ShuffleSchedulerImpl.class); mm = mock(MergeManagerImpl.class); r = mock(Reporter.class); metrics = mock(ShuffleClientMetrics.class); except = mock(ExceptionReporter.class); key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0}); connection = mock(HttpURLConnection.class); allErrs = mock(Counters.Counter.class); when(r.getCounter(anyString(), anyString())).thenReturn(allErrs); ArrayList<TaskAttemptID> maps = new ArrayList<TaskAttemptID>(1); maps.add(map1ID); maps.add(map2ID); when(ss.getMapsForHost(host)).thenReturn(maps); }
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException { final JobConf job = MapreduceTestingShim.getJobConf(mrCluster); job.setInputFormat(clazz); job.setOutputFormat(NullOutputFormat.class); job.setMapperClass(ExampleVerifier.class); job.setNumReduceTasks(0); LOG.debug("submitting job."); final RunningJob run = JobClient.runJob(job); assertTrue("job failed!", run.isSuccessful()); assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter()); assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter()); assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter()); assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter()); assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter()); assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter()); }
@Test @SuppressWarnings({ "deprecation" }) public void shouldCreateAndRunSubmittableJob() throws Exception { RowCounter rCounter = new RowCounter(); rCounter.setConf(HBaseConfiguration.create()); String[] args = new String[] { "\temp", "tableA", "column1", "column2", "column3" }; JobConf jobConfig = rCounter.createSubmittableJob(args); assertNotNull(jobConfig); assertEquals(0, jobConfig.getNumReduceTasks()); assertEquals("rowcounter", jobConfig.getJobName()); assertEquals(jobConfig.getMapOutputValueClass(), Result.class); assertEquals(jobConfig.getMapperClass(), RowCounterMapper.class); assertEquals(jobConfig.get(TableInputFormat.COLUMN_LIST), Joiner.on(' ') .join("column1", "column2", "column3")); assertEquals(jobConfig.getMapOutputKeyClass(), ImmutableBytesWritable.class); }
/** Mapper configuration. * Extracts source and destination file system, as well as * top-level paths on source and destination directories. * Gets the named file systems, to be used later in map. */ public void configure(JobConf job) { destPath = new Path(job.get(DST_DIR_LABEL, "/")); try { destFileSys = destPath.getFileSystem(job); } catch (IOException ex) { throw new RuntimeException("Unable to get the named file system.", ex); } sizeBuf = job.getInt("copy.buf.size", 128 * 1024); buffer = new byte[sizeBuf]; ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false); preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false); if (preserve_status) { preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL)); } update = job.getBoolean(Options.UPDATE.propertyname, false); overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false); skipCRCCheck = job.getBoolean(Options.SKIPCRC.propertyname, false); this.job = job; }
/** * Check what the given number of reduce tasks for the given job configuration * does not exceed the number of regions for the given table. */ @Test public void shouldNumberOfReduceTaskNotExceedNumberOfRegionsForGivenTable() throws IOException { Assert.assertNotNull(presidentsTable); Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf); TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf); TableMapReduceUtil.setScannerCaching(jobConf, 100); assertEquals(1, jobConf.getNumReduceTasks()); assertEquals(100, jobConf.getInt("hbase.client.scanner.caching", 0)); jobConf.setNumReduceTasks(10); TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf); TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf); assertEquals(1, jobConf.getNumReduceTasks()); }
/** * Writes the user's password to a tmp file with 0600 permissions. * @return the filename used. */ public static String writePasswordFile(Configuration conf) throws IOException { // Create the temp file to hold the user's password. String tmpDir = conf.get( ConfigurationConstants.PROP_JOB_LOCAL_DIRECTORY, "/tmp/"); File tempFile = File.createTempFile("mysql-cnf", ".cnf", new File(tmpDir)); // Make the password file only private readable. DirectImportUtils.setFilePermissions(tempFile, "0600"); // If we're here, the password file is believed to be ours alone. The // inability to set chmod 0600 inside Java is troublesome. We have to // trust that the external 'chmod' program in the path does the right // thing, and returns the correct exit status. But given our inability to // re-read the permissions associated with a file, we'll have to make do // with this. String password = DBConfiguration.getPassword((JobConf) conf); BufferedWriter w = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(tempFile))); w.write("[client]\n"); w.write("password=" + password + "\n"); w.close(); return tempFile.toString(); }
/** * Use this before submitting a TableReduce job. It will * appropriately set up the JobConf. * * @param table The output table. * @param reducer The reducer class to use. * @param job The current job configuration to adjust. * @param partitioner Partitioner to use. Pass <code>null</code> to use * default partitioner. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @throws IOException When determining the region count fails. */ public static void initTableReduceJob(String table, Class<? extends TableReduce> reducer, JobConf job, Class partitioner, boolean addDependencyJars) throws IOException { job.setOutputFormat(TableOutputFormat.class); job.setReducerClass(reducer); job.set(TableOutputFormat.OUTPUT_TABLE, table); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Put.class); job.setStrings("io.serializations", job.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()); if (partitioner == HRegionPartitioner.class) { job.setPartitionerClass(HRegionPartitioner.class); int regions = MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table)); if (job.getNumReduceTasks() > regions) { job.setNumReduceTasks(regions); } } else if (partitioner != null) { job.setPartitionerClass(partitioner); } if (addDependencyJars) { addDependencyJars(job); } initCredentials(job); }
/** * Verify SplitSampler contract in mapred.lib.InputSampler, which is added * back for binary compatibility of M/R 1.x */ @Test (timeout = 30000) @SuppressWarnings("unchecked") // IntWritable comparator not typesafe public void testMapredSplitSampler() throws Exception { final int TOT_SPLITS = 15; final int NUM_SPLITS = 5; final int STEP_SAMPLE = 5; final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE; org.apache.hadoop.mapred.lib.InputSampler.Sampler<IntWritable,NullWritable> sampler = new org.apache.hadoop.mapred.lib.InputSampler.SplitSampler <IntWritable,NullWritable>(NUM_SAMPLES, NUM_SPLITS); int inits[] = new int[TOT_SPLITS]; for (int i = 0; i < TOT_SPLITS; ++i) { inits[i] = i * STEP_SAMPLE; } Object[] samples = sampler.getSample( new TestMapredInputSamplerIF(100000, TOT_SPLITS, inits), new JobConf()); assertEquals(NUM_SAMPLES, samples.length); Arrays.sort(samples, new IntWritable.Comparator()); for (int i = 0; i < NUM_SAMPLES; ++i) { // mapred.lib.InputSampler.SplitSampler has a sampling step assertEquals(i % STEP_SAMPLE + TOT_SPLITS * (i / STEP_SAMPLE), ((IntWritable)samples[i]).get()); } }
/** * Submit/run a map/reduce job. * * @param job * @return true for success * @throws IOException */ public static boolean runJob(JobConf job) throws IOException { JobClient jc = new JobClient(job); boolean sucess = true; RunningJob running = null; try { running = jc.submitJob(job); JobID jobId = running.getID(); System.out.println("Job " + jobId + " is submitted"); while (!running.isComplete()) { System.out.println("Job " + jobId + " is still running."); try { Thread.sleep(60000); } catch (InterruptedException e) { } running = jc.getJob(jobId); } sucess = running.isSuccessful(); } finally { if (!sucess && (running != null)) { running.killJob(); } jc.close(); } return sucess; }
public LocalFetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl<K, V> scheduler, MergeManager<K,V> merger, Reporter reporter, ShuffleClientMetrics metrics, ExceptionReporter exceptionReporter, SecretKey shuffleKey, Map<TaskAttemptID, MapOutputFile> localMapFiles) { super(job, reduceId, scheduler, merger, reporter, metrics, exceptionReporter, shuffleKey); this.job = job; this.localMapFiles = localMapFiles; setName("localfetcher#" + id); setDaemon(true); }
public StreamXmlRecordReader(FSDataInputStream in, FileSplit split, Reporter reporter, JobConf job, FileSystem fs) throws IOException { super(in, split, reporter, job, fs); beginMark_ = checkJobGet(CONF_NS + "begin"); endMark_ = checkJobGet(CONF_NS + "end"); maxRecSize_ = job_.getInt(CONF_NS + "maxrec", 50 * 1000); lookAhead_ = job_.getInt(CONF_NS + "lookahead", 2 * maxRecSize_); synched_ = false; slowMatch_ = job_.getBoolean(CONF_NS + "slowmatch", false); if (slowMatch_) { beginPat_ = makePatternCDataOrMark(beginMark_); endPat_ = makePatternCDataOrMark(endMark_); } init(); }
@Override public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException { final JobConf jobConf = new JobConf(jobCtxt.getConfiguration()); final JobClient client = new JobClient(jobConf); ClusterStatus stat = client.getClusterStatus(true); int numTrackers = stat.getTaskTrackers(); final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1); // Total size of distributed cache files to be generated final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1); // Get the path of the special file String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST); if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) { throw new RuntimeException("Invalid metadata: #files (" + fileCount + "), total_size (" + totalSize + "), filelisturi (" + distCacheFileList + ")"); } Path sequenceFile = new Path(distCacheFileList); FileSystem fs = sequenceFile.getFileSystem(jobConf); FileStatus srcst = fs.getFileStatus(sequenceFile); // Consider the number of TTs * mapSlotsPerTracker as number of mappers. int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2); int numSplits = numTrackers * numMapSlotsPerTracker; List<InputSplit> splits = new ArrayList<InputSplit>(numSplits); LongWritable key = new LongWritable(); BytesWritable value = new BytesWritable(); // Average size of data to be generated by each map task final long targetSize = Math.max(totalSize / numSplits, DistributedCacheEmulator.AVG_BYTES_PER_MAP); long splitStartPosition = 0L; long splitEndPosition = 0L; long acc = 0L; long bytesRemaining = srcst.getLen(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, sequenceFile, jobConf); while (reader.next(key, value)) { // If adding this file would put this split past the target size, // cut the last split and put this file in the next split. if (acc + key.get() > targetSize && acc != 0) { long splitSize = splitEndPosition - splitStartPosition; splits.add(new FileSplit( sequenceFile, splitStartPosition, splitSize, (String[])null)); bytesRemaining -= splitSize; splitStartPosition = splitEndPosition; acc = 0L; } acc += key.get(); splitEndPosition = reader.getPosition(); } } finally { if (reader != null) { reader.close(); } } if (bytesRemaining != 0) { splits.add(new FileSplit( sequenceFile, splitStartPosition, bytesRemaining, (String[])null)); } return splits; }
@Override public void configure(JobConf job) { try { HTable exampleTable = new HTable(HBaseConfiguration.create(job), Bytes.toBytes("exampleDeprecatedTable")); // mandatory setHTable(exampleTable); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; // mandatory setInputColumns(inputColumns); Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*")); // optional setRowFilter(exampleFilter); } catch (IOException exception) { throw new RuntimeException("Failed to configure for job.", exception); } }
public void setConf(Configuration conf) { if (conf instanceof JobConf) { this.conf = (JobConf) conf; } else { this.conf = new JobConf(conf); } //Initialize the specification for *comparision* String sortColumns = this.conf.get(SORT_COLUMNS, null); if (sortColumns != null) { sortSpec = sortColumns.split(","); } //Column-separator columnSeparator = this.conf.get(COLUMN_SEPARATOR, ""); }
@Override public void obtainAuthTokenForJob(JobConf job) throws IOException, InterruptedException { try { Class<?> c = Class.forName( "org.apache.hadoop.hbase.security.token.TokenUtil"); Methods.call(c, null, "obtainTokenForJob", new Class[]{JobConf.class, UserGroupInformation.class}, new Object[]{job, ugi}); } catch (ClassNotFoundException cnfe) { throw new RuntimeException("Failure loading TokenUtil class, " +"is secure RPC available?", cnfe); } catch (IOException ioe) { throw ioe; } catch (InterruptedException ie) { throw ie; } catch (RuntimeException re) { throw re; } catch (Exception e) { throw new UndeclaredThrowableException(e, "Unexpected error calling TokenUtil.obtainAndCacheToken()"); } }
public FileSplitParquetRecordReader( final OperatorContext oContext, final ParquetReaderFactory readerFactory, final List<SchemaPath> columnsToRead, final List<SchemaPath> groupScanColumns, final List<FilterCondition> conditions, final FileSplit fileSplit, final ParquetMetadata footer, final JobConf jobConf, final boolean vectorize, final boolean enableDetailedTracing ) { this.oContext = oContext; this.columnsToRead = columnsToRead; this.groupScanColumns = groupScanColumns; this.conditions = conditions; this.fileSplit = fileSplit; this.footer = footer; this.jobConf = jobConf; this.readerFactory = readerFactory; this.vectorize = vectorize; this.enableDetailedTracing = enableDetailedTracing; }
@Test (timeout=30000) public void testCompareGridmixJob() throws Exception { Configuration conf = new Configuration(); Path outRoot = new Path("target"); JobStory jobDesc = mock(JobStory.class); when(jobDesc.getName()).thenReturn("JobName"); when(jobDesc.getJobConf()).thenReturn(new JobConf(conf)); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); GridmixJob j1 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 0); GridmixJob j2 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 0); GridmixJob j3 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 1); GridmixJob j4 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 1); assertTrue(j1.equals(j2)); assertEquals(0, j1.compareTo(j2)); // Only one parameter matters assertFalse(j1.equals(j3)); // compare id and submissionMillis assertEquals(-1, j1.compareTo(j3)); assertEquals(-1, j1.compareTo(j4)); }
@Test public void testInitTableSnapshotMapperJobConfig() throws Exception { setupCluster(); TableName tableName = TableName.valueOf("testInitTableSnapshotMapperJobConfig"); String snapshotName = "foo"; try { createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1); JobConf job = new JobConf(UTIL.getConfiguration()); Path tmpTableDir = UTIL.getRandomDir(); TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false, tmpTableDir); // TODO: would be better to examine directly the cache instance that results from this // config. Currently this is not possible because BlockCache initialization is static. Assert.assertEquals( "Snapshot job should be configured for default LruBlockCache.", HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT, job.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01); Assert.assertEquals( "Snapshot job should not use BucketCache.", 0, job.getFloat("hbase.bucketcache.size", -1), 0.01); } finally { UTIL.getHBaseAdmin().deleteSnapshot(snapshotName); UTIL.deleteTable(tableName); tearDownCluster(); } }
@Test public void testJobSubmission() throws Exception { JobConf conf = new JobConf(); Job job = new Job(conf); job.setInputFormatClass(TestInputFormat.class); job.setMapperClass(TestMapper.class); job.setOutputFormatClass(TestOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.waitForCompletion(true); assertTrue(job.isSuccessful()); }
private static ValueAggregatorDescriptor getValueAggregatorDescriptor( String spec, JobConf job) { if (spec == null) return null; String[] segments = spec.split(",", -1); String type = segments[0]; if (type.compareToIgnoreCase("UserDefined") == 0) { String className = segments[1]; return new UserDefinedValueAggregatorDescriptor(className, job); } return null; }
private void encryptedShuffleWithCerts(boolean useClientCerts) throws Exception { try { Configuration conf = new Configuration(); String keystoresDir = new File(BASEDIR).getAbsolutePath(); String sslConfsDir = KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class); KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf, useClientCerts); conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true); startCluster(conf); FileSystem fs = FileSystem.get(getJobConf()); Path inputDir = new Path("input"); fs.mkdirs(inputDir); Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt"))); writer.write("hello"); writer.close(); Path outputDir = new Path("output", "output"); JobConf jobConf = new JobConf(getJobConf()); jobConf.setInt("mapred.map.tasks", 1); jobConf.setInt("mapred.map.max.attempts", 1); jobConf.setInt("mapred.reduce.max.attempts", 1); jobConf.set("mapred.input.dir", inputDir.toString()); jobConf.set("mapred.output.dir", outputDir.toString()); JobClient jobClient = new JobClient(jobConf); RunningJob runJob = jobClient.submitJob(jobConf); runJob.waitForCompletion(); Assert.assertTrue(runJob.isComplete()); Assert.assertTrue(runJob.isSuccessful()); } finally { stopCluster(); } }
@Override public String getQueueName() { QueueName queue = job.getQueue(); return (queue == null || queue.getValue() == null) ? JobConf.DEFAULT_QUEUE_NAME : queue.getValue(); }
public LazyRecordWriter(JobConf job, OutputFormat of, String name, Progressable progress) throws IOException { this.of = of; this.job = job; this.name = name; this.progress = progress; }
@Before public void setUp() { conf = new JobConf(); mockFTPClient = mock(FTPClient.class); when(mockFTPClient.getReplyString()).thenReturn(""); MainframeFTPClientUtils.setMockFTPClient(mockFTPClient); }
public PGBulkloadManagerManualTest() { JobConf conf = new JobConf(getConf()); DBConfiguration.configureDB(conf, "org.postgresql.Driver", getConnectString(), getUserName(), (String) null, (Integer) null); dbConf = new DBConfiguration(conf); }
public void configure(JobConf job) { String userTypeName = job.get(USER_TYPE_NAME_KEY); if (null == userTypeName) { throw new RuntimeException("Unconfigured parameter: " + USER_TYPE_NAME_KEY); } setCol = job.get(SET_COL_KEY); setVal = job.get(SET_VAL_KEY); LOG.info("User type name set to " + userTypeName); LOG.info("Will try to set col " + setCol + " to " + setVal); this.userRecord = null; try { Configuration conf = new Configuration(); Class userClass = Class.forName(userTypeName, true, Thread.currentThread().getContextClassLoader()); this.userRecord = (SqoopRecord) ReflectionUtils.newInstance(userClass, conf); } catch (ClassNotFoundException cnfe) { // handled by the next block. LOG.error("ClassNotFound exception: " + cnfe.toString()); } catch (Exception e) { LOG.error("Got an exception reflecting user class: " + e.toString()); } if (null == this.userRecord) { LOG.error("Could not instantiate user record of type " + userTypeName); throw new RuntimeException("Could not instantiate user record of type " + userTypeName); } }
String getPipeCommand(JobConf job) { String str = job.get("stream.reduce.streamprocessor"); if (str == null) { return str; } try { return URLDecoder.decode(str, "UTF-8"); } catch (UnsupportedEncodingException e) { System.err.println("stream.reduce.streamprocessor in jobconf not found"); return null; } }
private void writePasswordToLocalFile(String localPasswordFile, byte[] password, JobConf conf) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Path localPath = new Path(localPasswordFile); FSDataOutputStream out = FileSystem.create(localFs, localPath, new FsPermission("400")); out.write(password); out.close(); }
String getPipeCommand(JobConf job) { String str = job.get("stream.map.streamprocessor"); if (str == null) { return str; } try { return URLDecoder.decode(str, "UTF-8"); } catch (UnsupportedEncodingException e) { System.err.println("stream.map.streamprocessor in jobconf not found"); return null; } }
/** * Parse a list of comma-separated nodes. */ public void parse(List<Token> args, JobConf job) throws IOException { ListIterator<Token> i = args.listIterator(); while (i.hasNext()) { Token t = i.next(); t.getNode().setID(i.previousIndex() >> 1); kids.add(t.getNode()); if (i.hasNext() && !TType.COMMA.equals(i.next().getType())) { throw new IOException("Expected ','"); } } }
public void configure(JobConf jconf) { conf = jconf; try { // read the cached files (unzipped, unjarred and text) // and put it into a single file TEST_ROOT_DIR/test.txt String TEST_ROOT_DIR = jconf.get("test.build.data","/tmp"); Path file = new Path("file:///", TEST_ROOT_DIR); FileSystem fs = FileSystem.getLocal(conf); if (!fs.mkdirs(file)) { throw new IOException("Mkdirs failed to create " + file.toString()); } Path fileOut = new Path(file, "test.txt"); fs.delete(fileOut, true); DataOutputStream out = fs.create(fileOut); String[] symlinks = new String[6]; symlinks[0] = "."; symlinks[1] = "testjar"; symlinks[2] = "testzip"; symlinks[3] = "testtgz"; symlinks[4] = "testtargz"; symlinks[5] = "testtar"; for (int i = 0; i < symlinks.length; i++) { // read out the files from these archives File f = new File(symlinks[i]); File txt = new File(f, "test.txt"); FileInputStream fin = new FileInputStream(txt); BufferedReader reader = new BufferedReader(new InputStreamReader(fin)); String str = reader.readLine(); reader.close(); out.writeBytes(str); out.writeBytes("\n"); } out.close(); } catch (IOException ie) { System.out.println(StringUtils.stringifyException(ie)); } }
public static Properties addProperties(JobConf jobConf, Properties output, List<Prop> props){ for(Prop p : props){ output.setProperty(p.getKey(), p.getValue()); jobConf.set(p.getKey(), p.getValue()); } return output; }
/** * Provide the required splits from the specified configuration. By default this * method makes query (function-execution) on the region with `_meta' suffix * so need to be make sure that the region-name is passed accordingly. * * @param conf the job configuration * @param numSplits the required number of splits * @return the required splits to read/write the data * @throws IOException if table does not exist. */ public static InputSplit[] getSplits(final JobConf conf, final int numSplits) throws IOException { final Path[] tablePaths = FileInputFormat.getInputPaths(conf); /** initialize cache if not done yet.. **/ final AmpoolClient aClient = MonarchUtils.getConnectionFromConf(conf); String tableName = conf.get(MonarchUtils.REGION); boolean isFTable = MonarchUtils.isFTable(conf); Table table = null; if (isFTable) { table = aClient.getFTable(tableName); } else { table = aClient.getMTable(tableName); } if (table == null) { throw new IOException("Table " + tableName + "does not exist."); } int totalnumberOfSplits = table.getTableDescriptor().getTotalNumOfSplits(); Map<Integer, Set<ServerLocation>> bucketMap = new HashMap<>(numSplits); final AtomicLong start = new AtomicLong(0L); MonarchSplit[] splits = MTableUtils .getSplitsWithSize(tableName, numSplits, totalnumberOfSplits, bucketMap) .stream().map(e -> { MonarchSplit ms = convertToSplit(tablePaths, start.get(), e, bucketMap); start.addAndGet(e.getSize()); return ms; }).toArray(MonarchSplit[]::new); logger.info("numSplits= {}; MonarchSplits= {}", numSplits, Arrays.toString(splits)); return splits; }
@SuppressWarnings("unchecked") public static InputSplit[] getSplits(final JobConf conf, final int numSplits, int dummy) { final Path[] tablePaths = FileInputFormat.getInputPaths(conf); long splitSize = NumberUtils.toLong(conf.get(MonarchUtils.SPLIT_SIZE_KEY), DEFAULT_SPLIT_SIZE); final String regionName = conf.get(MonarchUtils.REGION) + MonarchUtils.META_TABLE_SFX; MPredicateHolder ph = new MPredicateHolder(-1, BasicTypes.STRING, CompareOp.REGEX, ".*"+MonarchUtils.KEY_BLOCKS_SFX); MonarchGetAllFunction func = new MonarchGetAllFunction(); final AmpoolClient aClient = MonarchUtils.getConnectionFromConf(conf); Execution exec = FunctionService.onServer(((GemFireCacheImpl)(aClient.getGeodeCache())).getDefaultPool()) .withArgs(new Object[]{regionName, ph}); ResultCollector rc = exec.execute(func); /** TODO: refactor below code.. change below required in case the function is changed to return in some way **/ List<String[]> output = (List<String[]>)((List) rc.getResult()).get(0); if (output.isEmpty()) { logger.error("No entries found in region= {} with key_prefix= %-{}", regionName, MonarchUtils.KEY_BLOCKS_SFX); return new MonarchSplit[0]; } List<MonarchSplit> list = new ArrayList<>(output.size()); String prefix; long numberOfBlocks; for (final String[] arr : output) { prefix = arr[0].substring(0, arr[0].length() - 6); numberOfBlocks = Long.valueOf(arr[1]); if (numberOfBlocks > splitSize) { Collections.addAll(list, MonarchSplit.getInputSplits(tablePaths[0], prefix, splitSize, numberOfBlocks)); } else { list.add(new MonarchSplit(tablePaths[0], 0, numberOfBlocks, null, prefix)); } } return list.toArray(new MonarchSplit[list.size()]); }
public StreamBaseRecordReader(FSDataInputStream in, FileSplit split, Reporter reporter, JobConf job, FileSystem fs) throws IOException { in_ = in; split_ = split; start_ = split_.getStart(); length_ = split_.getLength(); end_ = start_ + length_; splitName_ = split_.getPath().getName(); reporter_ = reporter; job_ = job; fs_ = fs; statusMaxRecordChars_ = job_.getInt(CONF_NS + "statuschars", 200); }
/** * Get input splits for the specified split-size. * * @param regionName the region name * @param splitSize the split-size * @return an array of splits to be read */ private InputSplit[] getSplits(final String regionName, final int splitSize) throws IOException{ JobConf jobConf = new JobConf(); jobConf.set(MonarchUtils.REGION, regionName); jobConf.set("mapred.input.dir", "/home/mgalande"); jobConf.set(MonarchUtils.SPLIT_SIZE_KEY, String.valueOf(splitSize)); jobConf.set(MonarchUtils.MONARCH_TABLE_TYPE, "unordered"); return MonarchSplit.getSplits(jobConf, 1); }
/** * Obtain an authentication token on behalf of the given user and add it to * the credentials for the given map reduce job. * @param user The user for whom to obtain the token * @param job The job configuration in which the token should be stored * @throws IOException If making a remote call to the authentication service fails * @throws InterruptedException If executing as the given user is interrupted * @deprecated Replaced by {@link #obtainTokenForJob(Connection,JobConf,User)} */ @Deprecated public static void obtainTokenForJob(final JobConf job, UserGroupInformation user) throws IOException, InterruptedException { Connection conn = ConnectionFactory.createConnection(job); try { UserProvider userProvider = UserProvider.instantiate(job); obtainTokenForJob(conn, job, userProvider.create(user)); } finally { conn.close(); } }
public MapTaskImpl(JobId jobId, int partition, EventHandler eventHandler, Path remoteJobConfFile, JobConf conf, TaskSplitMetaInfo taskSplitMetaInfo, TaskAttemptListener taskAttemptListener, Token<JobTokenIdentifier> jobToken, Credentials credentials, Clock clock, int appAttemptId, MRAppMetrics metrics, AppContext appContext) { super(jobId, TaskType.MAP, partition, eventHandler, remoteJobConfFile, conf, taskAttemptListener, jobToken, credentials, clock, appAttemptId, metrics, appContext); this.taskSplitMetaInfo = taskSplitMetaInfo; }
public ReduceTaskImpl(JobId jobId, int partition, EventHandler eventHandler, Path jobFile, JobConf conf, int numMapTasks, TaskAttemptListener taskAttemptListener, Token<JobTokenIdentifier> jobToken, Credentials credentials, Clock clock, int appAttemptId, MRAppMetrics metrics, AppContext appContext) { super(jobId, TaskType.REDUCE, partition, eventHandler, jobFile, conf, taskAttemptListener, jobToken, credentials, clock, appAttemptId, metrics, appContext); this.numMapTasks = numMapTasks; }
@Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { if (baseOut == null) { getBaseOutputFormat(job); } super.checkOutputSpecs(ignored, job); }