@Override protected Class<? extends Mapper> getMapperClass() { if (options.getHCatTableName() != null) { return SqoopHCatUtilities.getImportMapperClass(); } if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) { return TextImportMapper.class; } else if (options.getFileLayout() == SqoopOptions.FileLayout.SequenceFile) { return SequenceFileImportMapper.class; } else if (options.getFileLayout() == SqoopOptions.FileLayout.AvroDataFile) { return AvroImportMapper.class; } else if (options.getFileLayout() == SqoopOptions.FileLayout.ParquetFile) { return ParquetImportMapper.class; } return null; }
@Override protected Class<? extends Mapper> getMapperClass() { if (isHCatJob) { return SqoopHCatUtilities.getExportMapperClass(); } if (options.getOdpsTable() != null) { return OdpsExportMapper.class; } switch (fileType) { case SEQUENCE_FILE: return SequenceFileExportMapper.class; case AVRO_DATA_FILE: return AvroExportMapper.class; case PARQUET_FILE: return ParquetExportMapper.class; case UNKNOWN: default: return TextExportMapper.class; } }
@Override protected Class<? extends Mapper> getMapperClass() { if (isHCatJob) { return SqoopHCatUtilities.getExportOdpsMapperClass(); } switch (fileType) { case SEQUENCE_FILE: return SequenceFileExportMapper.class; case AVRO_DATA_FILE: return AvroExportMapper.class; case PARQUET_FILE: return ParquetExportMapper.class; case UNKNOWN: default: return TextExportMapper.class; } }
@Test public void testGetMainframeDatasetImportMapperClass() throws SecurityException, NoSuchMethodException, IllegalArgumentException, IllegalAccessException, InvocationTargetException { String jarFile = "dummyJarFile"; String tableName = "dummyTableName"; Path path = new Path("dummyPath"); ImportJobContext context = new ImportJobContext(tableName, jarFile, options, path); mfImportJob = new MainframeImportJob(options, context); // To access protected method by means of reflection Class[] types = {}; Method m_getMapperClass = MainframeImportJob.class.getDeclaredMethod( "getMapperClass", types); m_getMapperClass.setAccessible(true); Class<? extends Mapper> mapper = (Class<? extends Mapper>) m_getMapperClass .invoke(mfImportJob); assertEquals(mapper, org.apache.sqoop.mapreduce.mainframe.MainframeDatasetImportMapper.class); }
@Test public void testSuperMapperClass() throws SecurityException, NoSuchMethodException, IllegalArgumentException, IllegalAccessException, InvocationTargetException { String jarFile = "dummyJarFile"; String tableName = "dummyTableName"; Path path = new Path("dummyPath"); options.setFileLayout(SqoopOptions.FileLayout.AvroDataFile); ImportJobContext context = new ImportJobContext(tableName, jarFile, options, path); avroImportJob = new MainframeImportJob(options, context); // To access protected method by means of reflection Class[] types = {}; Method m_getMapperClass = MainframeImportJob.class.getDeclaredMethod( "getMapperClass", types); m_getMapperClass.setAccessible(true); Class<? extends Mapper> mapper = (Class<? extends Mapper>) m_getMapperClass .invoke(avroImportJob); assertEquals(mapper, org.apache.sqoop.mapreduce.AvroImportMapper.class); }
/** * Tests one of the mappers throwing exception. * * @throws Exception */ public void testChainFail() throws Exception { Configuration conf = createJobConf(); Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0, input); job.setJobName("chain"); ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); ChainMapper.addMapper(job, FailMap.class, LongWritable.class, Text.class, IntWritable.class, Text.class, null); ChainMapper.addMapper(job, Mapper.class, IntWritable.class, Text.class, LongWritable.class, Text.class, null); job.waitForCompletion(true); assertTrue("Job Not failed", !job.isSuccessful()); }
/** * Tests Reducer throwing exception. * * @throws Exception */ public void testReducerFail() throws Exception { Configuration conf = createJobConf(); Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1, input); job.setJobName("chain"); ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); ChainReducer.setReducer(job, FailReduce.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); ChainReducer.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); job.waitForCompletion(true); assertTrue("Job Not failed", !job.isSuccessful()); }
/** * Tests one of the maps consuming output. * * @throws Exception */ public void testChainMapNoOuptut() throws Exception { Configuration conf = createJobConf(); String expectedOutput = ""; Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0, input); job.setJobName("chain"); ChainMapper.addMapper(job, ConsumeMap.class, IntWritable.class, Text.class, LongWritable.class, Text.class, null); ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); job.waitForCompletion(true); assertTrue("Job failed", job.isSuccessful()); assertEquals("Outputs doesn't match", expectedOutput, MapReduceTestUtil .readOutput(outDir, conf)); }
/** * Tests reducer consuming output. * * @throws Exception */ public void testChainReduceNoOuptut() throws Exception { Configuration conf = createJobConf(); String expectedOutput = ""; Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1, input); job.setJobName("chain"); ChainMapper.addMapper(job, Mapper.class, IntWritable.class, Text.class, LongWritable.class, Text.class, null); ChainReducer.setReducer(job, ConsumeReduce.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); ChainReducer.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); job.waitForCompletion(true); assertTrue("Job failed", job.isSuccessful()); assertEquals("Outputs doesn't match", expectedOutput, MapReduceTestUtil .readOutput(outDir, conf)); }
@SuppressWarnings("unchecked") public void testAddInputPathWithMapper() throws IOException { final Job conf = Job.getInstance(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class, KeyValueMapClass.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); final Map<Path, Class<? extends Mapper>> maps = MultipleInputs .getMapperTypeMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); assertEquals(MapClass.class, maps.get(new Path("/foo"))); assertEquals(KeyValueMapClass.class, maps.get(new Path("/bar"))); }
/** * Add mapper(the first mapper) that reads input from the input * context and writes to queue */ @SuppressWarnings("unchecked") void addMapper(TaskInputOutputContext inputContext, ChainBlockingQueue<KeyValuePair<?, ?>> output, int index) throws IOException, InterruptedException { Configuration conf = getConf(index); Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class); Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS, Object.class); RecordReader rr = new ChainRecordReader(inputContext); RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output, conf); Mapper.Context mapperContext = createMapContext(rr, rw, (MapContext) inputContext, getConf(index)); MapRunner runner = new MapRunner(mappers.get(index), mapperContext, rr, rw); threads.add(runner); }
private Path getTmpFile(Path target, Mapper.Context context) { Path targetWorkPath = new Path(context.getConfiguration(). get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); Path root = target.equals(targetWorkPath)? targetWorkPath.getParent() : targetWorkPath; LOG.info("Creating temp file: " + new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString())); return new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString()); }
private void testCopyingExistingFiles(FileSystem fs, CopyMapper copyMapper, Mapper<Text, CopyListingFileStatus, Text, Text>.Context context) { try { for (Path path : pathList) { copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), new CopyListingFileStatus(fs.getFileStatus(path)), context); } Assert.assertEquals(nFiles, context.getCounter(CopyMapper.Counter.SKIP).getValue()); } catch (Exception exception) { Assert.assertTrue("Caught unexpected exception:" + exception.getMessage(), false); } }
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput, Integer width, Integer wrapMuplitplier) throws Exception { LOG.info("Running RandomInputGenerator with numMappers=" + numMappers + ", numNodes=" + numNodes); Job job = Job.getInstance(getConf()); job.setJobName("Random Input Generator"); job.setNumReduceTasks(0); job.setJarByClass(getClass()); job.setInputFormatClass(GeneratorInputFormat.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(NullWritable.class); setJobConf(job, numMappers, numNodes, width, wrapMuplitplier); job.setMapperClass(Mapper.class); //identity mapper FileOutputFormat.setOutputPath(job, tmpOutput); job.setOutputFormatClass(SequenceFileOutputFormat.class); boolean success = jobCompletion(job); return success ? 0 : 1; }
private void enumDirectories(FileSystem fs, URI rootUri, Path directory, boolean recursive, Mapper.Context context) throws IOException, InterruptedException { try { for (FileStatus status : fs.listStatus(directory, hiddenFileFilter)) { if (status.isDirectory()) { if (recursive) { if (directoryBlackList == null || !status.getPath().getName().matches(directoryBlackList)) { enumDirectories(fs,rootUri, status.getPath(), recursive, context); } } } else { context.write(new Text(rootUri.relativize(directory.toUri()).getPath()), new FileStatus(status)); } } context.progress(); } catch (FileNotFoundException e) { return; } }
private long copyToFile(Path targetPath, FileSystem targetFS, FileStatus sourceFileStatus, long sourceOffset, Mapper.Context context, EnumSet<FileAttribute> fileAttributes, final FileChecksum sourceChecksum) throws IOException { FsPermission permission = FsPermission.getFileDefault().applyUMask( FsPermission.getUMask(targetFS.getConf())); final OutputStream outStream; if (action == FileAction.OVERWRITE) { final short repl = getReplicationFactor(fileAttributes, sourceFileStatus, targetFS, targetPath); final long blockSize = getBlockSize(fileAttributes, sourceFileStatus, targetFS, targetPath); FSDataOutputStream out = targetFS.create(targetPath, permission, EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), BUFFER_SIZE, repl, blockSize, context, getChecksumOpt(fileAttributes, sourceChecksum)); outStream = new BufferedOutputStream(out); } else { outStream = new BufferedOutputStream(targetFS.append(targetPath, BUFFER_SIZE)); } return copyBytes(sourceFileStatus, sourceOffset, outStream, BUFFER_SIZE, context); }
@Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context) throws IOException, InterruptedException { String line = value.toString(); String[] splited = line.split("\t"); String word = splited[0]; String[] fileTFs = splited[1].split(","); int hasFilesCount = fileTFs.length; double idf = Math.log((docCount+1.0)/hasFilesCount); for (String fileTF : fileTFs) { String[] fileTFArray = fileTF.split(":"); String fileName = fileTFArray[0]; double tf = Double.parseDouble(fileTFArray[1]); double tfidf = idf*tf; k2.set(fileName+"\t"+word+"\t"+tfidf); context.write(k2, NullWritable.get()); } }
@Override protected void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { /* * We will use StringTokenizer to extract words from given line of input * e.g 1 This is Hello World ( Here 1 is key and Statement is value ) * <This, 1> <is, 1> <Hello, 1> <World, 1> */ StringTokenizer stringTokenizer = new StringTokenizer(value.toString()); while (stringTokenizer.hasMoreElements()) { String wordTemp = stringTokenizer.nextToken(); if(wordTemp.equals("Java")){ word.set(wordTemp); context.write(word, one); } } }
@Override protected void setup(Mapper<LongWritable, Text, NullWritable, Text>.Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); this.k = conf.getInt("topk", 1); this.type = conf.get("type", "min"); if("min".equals(this.type)){ topkSet = new TreeSet<>(); }else { topkSet = new TreeSet<>(new Comparator<TFIDFWord>() { @Override public int compare(TFIDFWord o1, TFIDFWord o2) { return -o1.compareTo(o2); } }); } }
@Override protected void startAligner(Mapper.Context context) throws IOException, InterruptedException { // make command String customArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "bwa", "mem"); String[] command = CommandGenerator.bwaMem(bin, ref, null, null, isPaired, true, threads, customArgs); pbw = new ProcessBuilderWrapper(command, bin); // run command // needs to be streamed to output otherwise the process blocks ... pbw.startProcess(null, System.err); // check if alive. if(!pbw.isAlive()) throw new ProcessException("BWA mem", pbw.getExitState()); pbw.getSTDINWriter(); // make a SAMstream handler ssh = new SAMStreamHandler(instance, context, false); ssh.start(); }
@Override protected void startAligner(Mapper.Context context) throws IOException, InterruptedException { File file1 = new File(getFileName(tmpdir, taskId, 1)); if (!file1.exists()) { file1.createNewFile(); } fastqFile1 = new BufferedWriter(new FileWriter(file1.getAbsoluteFile())); if(isPaired) { File file2 = new File(getFileName(tmpdir, taskId, 2)); if (!file2.exists()) { file2.createNewFile(); } fastqFile2 = new BufferedWriter(new FileWriter(file2.getAbsoluteFile())); } // make output dir! File starOut = new File(starOutDir); starOut.mkdirs(); }
protected AlignerInstance(Mapper.Context context, String bin, int task) throws IOException, URISyntaxException { AlignerInstance.context = context; header = null; containers = HalvadeConf.getMapContainerCount(context.getConfiguration()); tasksLeft = HalvadeConf.getMapTasksLeft(task, context.getConfiguration()); redistribute = HalvadeConf.getRedistribute(context.getConfiguration()); mergeBam = HalvadeConf.getMergeBam(context.getConfiguration()); writableRecord = new SAMRecordWritable(); writableRegion = new ChromosomeRegion(); writeableCompactRegion = new GenomeSJ(); stub = new Text(); minChrLength = HalvadeConf.getMinChrLength(context.getConfiguration()); chr = HalvadeConf.getChrList(context.getConfiguration()); tmpdir = HalvadeConf.getScratchTempDir(context.getConfiguration()); if(!tmpdir.endsWith("/")) tmpdir = tmpdir + "/"; File tmp = new File(tmpdir); tmp.mkdirs(); this.bin = bin; threads = HalvadeConf.getMapThreads(context.getConfiguration()); isPaired = HalvadeConf.getIsPaired(context.getConfiguration()); Logger.DEBUG("paired? " + isPaired); splitter = new ChromosomeSplitter(HalvadeConf.getBedRegions(context.getConfiguration()), context.getConfiguration()); keepChrSplitPairs = HalvadeConf.getkeepChrSplitPairs(context.getConfiguration()); keep = HalvadeConf.getKeepFiles(context.getConfiguration()); }
/** * Shutdown transfer queue and release other engaged resources. */ @Override protected void cleanup(Mapper<Text, CopyListingFileStatus, Text, Text>.Context context) throws IOException, InterruptedException { if (transferManager != null) { transferManager.shutdownNow(true); } }
@Override protected Class<? extends Mapper> getMapperClass() { if (isHCatJob) { return NetezzaExternalTableHCatExportMapper.class; } if (inputIsSequenceFiles()) { return NetezzaExternalTableRecordExportMapper.class; } else { return NetezzaExternalTableTextExportMapper.class; } }
@Override protected Class<? extends Mapper> getMapperClass() { if (isHCatJob) { return NetezzaExternalTableHCatImportMapper.class; } else { return NetezzaExternalTableTextImportMapper.class; } }
public ImportJobBase(final SqoopOptions opts, final Class<? extends Mapper> mapperClass, final Class<? extends InputFormat> inputFormatClass, final Class<? extends OutputFormat> outputFormatClass, final ImportJobContext context) { super(opts, mapperClass, inputFormatClass, outputFormatClass); this.context = context; }
public JobBase(final SqoopOptions opts, final Class<? extends Mapper> mapperClass, final Class<? extends InputFormat> inputFormatClass, final Class<? extends OutputFormat> outputFormatClass) { this.options = opts; this.mapperClass = mapperClass; this.inputFormatClass = inputFormatClass; this.outputFormatClass = outputFormatClass; isHCatJob = options.getHCatTableName() != null; }
@Override protected Class<? extends Mapper> getMapperClass() { if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) { return MainframeDatasetImportMapper.class; } else { return super.getMapperClass(); } }
@Override protected Class<? extends Mapper> getMapperClass() { if (inputIsSequenceFiles()) { return MySQLRecordExportMapper.class; } else { return MySQLTextExportMapper.class; } }
@Override protected Class<? extends Mapper> getMapperClass() { if (options.getOdpsTable() != null) { return OdpsExportMapper.class; } if (inputIsSequenceFiles()) { return SequenceFileExportMapper.class; } else { return TextExportMapper.class; } }
/** * Convenience method to access #getDataStream(Configuration, Path) * from within a map task that read this LobRef from a file-based * InputSplit. * @param mapContext the Mapper.Context instance that encapsulates * the current map task. * @return an object that lazily streams the record to the client. * @throws IllegalArgumentException if it cannot find the source * path for this LOB based on the MapContext. * @throws IOException if it could not read the LOB from external storage. */ public ACCESSORTYPE getDataStream(Mapper.Context mapContext) throws IOException { InputSplit split = mapContext.getInputSplit(); if (split instanceof FileSplit) { Path basePath = ((FileSplit) split).getPath().getParent(); return getDataStream(mapContext.getConfiguration(), basePath); } else { throw new IllegalArgumentException( "Could not ascertain LOB base path from MapContext."); } }
public ImportJobBase(final SqoopOptions opts, final Class<? extends Mapper> mapperClass, final Class<? extends InputFormat> inputFormatClass, final Class<? extends OutputFormat> outputFormatClass, final ImportJobContext context) { super(opts, mapperClass, inputFormatClass, outputFormatClass, context); }