private void getSentiFile(Context context) throws IOException { Configuration conf = context.getConfiguration(); String swnPath = conf.get("sentwordnetfile"); System.out.println("@@@ Path: " + swnPath); this.linhas = new ArrayList<String>(); try{ Path pt=new Path(swnPath); FileSystem fs = FileSystem.get(new Configuration()); BufferedReader br=new BufferedReader(new InputStreamReader(fs.open(pt))); String line; line=br.readLine(); while (line != null){ linhas.add(line); line=br.readLine(); } }catch(Exception e){ System.out.println("@@@@ ERRO: " + e.getMessage()); throw new IOException(e); } sdc = new SentiWordNetDemoCode(linhas); }
@Override public void map(NullWritable key, NullWritable value, Context context) throws IOException, InterruptedException { int counter = 0; System.out.println("starting mapper"); System.out.println(); for (int i = 0; i < numberOfRecords; i++) { String keyRoot = StringUtils.leftPad(Integer.toString(r.nextInt(Short.MAX_VALUE)), 5, '0'); if (i % 1000 == 0) { System.out.print("."); } for (int j = 0; j < 10; j++) { hKey.set(Bytes.toBytes(keyRoot + "|" + runID + "|" + taskId)); kv = new KeyValue(hKey.get(), columnFamily, Bytes.toBytes("C" + j), Bytes.toBytes("counter:" + counter++ )); context.write(hKey, kv); } } System.out.println("finished mapper"); }
public void map(LongWritable key, Writable value, Context context) throws IOException, InterruptedException { try { String str = value.toString(); if (value instanceof Text) { writer.write(str, 0, str.length()); writer.newLine(); } else if (value instanceof SqoopRecord) { writer.write(str, 0, str.length()); } } catch (Exception e) { doExecuteUpdate("DROP TABLE " + tmpTableName); cleanup(context); throw new IOException(e); } }
@Override public void map(LongWritable key, Writable value, Context context) throws IOException, InterruptedException { line.setLength(0); line.append(value.toString()); if (value instanceof Text) { line.append(System.getProperty("line.separator")); } try { byte[]data = line.toString().getBytes("UTF-8"); copyin.writeToCopy(data, 0, data.length); } catch (SQLException ex) { LoggingUtils.logAll(LOG, "Unable to execute copy", ex); close(); throw new IOException(ex); } }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); // Instantiate a copy of the user's class to hold and parse the record. String recordClassName = conf.get( ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY); if (null == recordClassName) { throw new IOException("Export table class name (" + ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY + ") is not set!"); } try { Class cls = Class.forName(recordClassName, true, Thread.currentThread().getContextClassLoader()); recordImpl = (SqoopRecord) ReflectionUtils.newInstance(cls, conf); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } if (null == recordImpl) { throw new IOException("Could not instantiate object of type " + recordClassName); } columnTypes = DefaultStringifier.load(conf, AVRO_COLUMN_TYPES_MAP, MapWritable.class); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); line = line.trim().toLowerCase(); line = line.replaceAll("[^a-z]+", " "); String words[] = line.split("\\s+"); //split by ' ', '\t', '\n', etc. if(words.length < 2) { return; } StringBuilder sb; for (int i = 0; i < words.length-1; i++) { sb = new StringBuilder(); for (int j = 0; i + j < words.length && j < noGram; j++) { sb.append(" "); sb.append(words[i + j]); context.write(new Text(sb.toString().trim()), new IntWritable(1)); } } }
public void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); multipleOutputs = new MultipleOutputs(context); lowerBoundary = conf.get("LOWER_DATE"); upperBoundary = conf.get("HIGHER_DATE"); }
JobHistoryFileReplayHelper(Context context) throws IOException { Configuration conf = context.getConfiguration(); int taskId = context.getTaskAttemptID().getTaskID().getId(); int size = conf.getInt(MRJobConfig.NUM_MAPS, TimelineServicePerformance.NUM_MAPS_DEFAULT); replayMode = conf.getInt(JobHistoryFileReplayHelper.REPLAY_MODE, JobHistoryFileReplayHelper.REPLAY_MODE_DEFAULT); String processingDir = conf.get(JobHistoryFileReplayHelper.PROCESSING_PATH); Path processingPath = new Path(processingDir); FileSystem processingFs = processingPath.getFileSystem(conf); parser = new JobHistoryFileParser(processingFs); jobFiles = selectJobFiles(processingFs, processingPath, taskId, size); }
@SuppressWarnings({"rawtypes", "unchecked"}) @Test (timeout=10000) public void testLoadMapper() throws Exception { Configuration conf = new Configuration(); conf.setInt(JobContext.NUM_REDUCES, 2); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); TaskAttemptID taskId = new TaskAttemptID(); RecordReader<NullWritable, GridmixRecord> reader = new FakeRecordReader(); LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter(); OutputCommitter committer = new CustomOutputCommitter(); StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter(); LoadSplit split = getLoadSplit(); MapContext<NullWritable, GridmixRecord, GridmixKey, GridmixRecord> mapContext = new MapContextImpl<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>( conf, taskId, reader, writer, committer, reporter, split); // context Context ctx = new WrappedMapper<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>() .getMapContext(mapContext); reader.initialize(split, ctx); ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); CompressionEmulationUtil.setCompressionEmulationEnabled( ctx.getConfiguration(), true); LoadJob.LoadMapper mapper = new LoadJob.LoadMapper(); // setup, map, clean mapper.run(ctx); Map<GridmixKey, GridmixRecord> data = writer.getData(); // check result assertEquals(2, data.size()); }
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); overrideRdfContext = conf.getBoolean(OVERRIDE_CONTEXT_PROPERTY, false); String defCtx = conf.get(DEFAULT_CONTEXT_PROPERTY); defaultRdfContext = defCtx == null ? null : SimpleValueFactory.getInstance().createIRI(defCtx); decimationFactor = conf.getInt(DECIMATION_FACTOR_PROPERTY, DEFAULT_DECIMATION_FACTOR); for (byte b = 1; b < 6; b++) { context.write(new ImmutableBytesWritable(new byte[] {b}), new LongWritable(1)); } timestamp = conf.getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()); }
@Override protected void setup(Context context) throws IOException, InterruptedException { // TODO Auto-generated method stu super.setup(context); //read data to memory on the mapper. myMap = new HashMap<String,String>(); Configuration conf = context.getConfiguration(); String mybusinessdataPath = conf.get("businessdata"); //e.g /user/hue/input/ Path part=new Path("hdfs://cshadoop1"+mybusinessdataPath);//Location of file in HDFS FileSystem fs = FileSystem.get(conf); FileStatus[] fss = fs.listStatus(part); for (FileStatus status : fss) { Path pt = status.getPath(); BufferedReader br=new BufferedReader(new InputStreamReader(fs.open(pt))); String line; line=br.readLine(); while (line != null){ String[] arr=line.split("\\^"); if(arr.length == 3){ myMap.put(arr[0].trim(), line); //businessid and the remain datacolumns } line=br.readLine(); } } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //from ratings String[] mydata = value.toString().split("\\^"); if (mydata.length == 3){ if (mydata[1].contains("Palo Alto")){ context.write(new Text(mydata[0].trim()),new IntWritable(1)); } /*if("review".compareTo(mydata[mydata.length-2].trim())== 0){ context.write(new Text(mydata[mydata.length-2].trim()),new IntWritable(1)); } if("user".compareTo(mydata[mydata.length-2].trim())== 0){ context.write(new Text(mydata[mydata.length-2].trim()),new IntWritable(1)); }*/ } }
@Override protected void cleanup( Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub super.cleanup(context); Collections.sort(myarray,new MyMovieComparator()); int count =0; for(MyBusinessData data : myarray){ result.set(""+data.rating); myKey.set(data.businessId); context.write(myKey, result); // create a pair <keyword, number of occurences> count++; if(count >=10)break;} }
@Override protected void startAligner(Mapper.Context context) throws IOException, InterruptedException { // make command String customArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "bwa", "mem"); String[] command = CommandGenerator.bwaMem(bin, ref, null, null, isPaired, true, threads, customArgs); pbw = new ProcessBuilderWrapper(command, bin); // run command // needs to be streamed to output otherwise the process blocks ... pbw.startProcess(null, System.err); // check if alive. if(!pbw.isAlive()) throw new ProcessException("BWA mem", pbw.getExitState()); pbw.getSTDINWriter(); // make a SAMstream handler ssh = new SAMStreamHandler(instance, context, false); ssh.start(); }
static public void logEvent(FileSystem fs, Path path, LOGTYPES type, LOGRESULTS result, Codec codec, Context context, LogSample sample, String tag) { try { if (context == null) { incrRaidNodeMetricCounter(fs, type, result, tag); } else { incrLogMetricCounter(context, fs, type, result, tag); } if (sample == null) sample = new LogSample(); if (path != null) sample.addNormalValue(LOGKEYS.Path.name(), path.toString()); if (codec != null) sample.addNormalValue(LOGKEYS.Code.name(), codec.id); sample.addNormalValue(LOGKEYS.Type.name(), type.name()); sample.addNormalValue(LOGKEYS.Cluster.name(), fs.getUri().getAuthority()); EVENTS_LOG.info(sample.toJSON()); } catch (Exception e) { LOG.warn("Exception when logging the File_Fix_WaitTime metric : " + e.getMessage(), e); } }
/** * Retrieve stripes from stripe store */ public StripeInfo retrieveStripe(Block lostBlock, Path p, long lostBlockOffset, FileSystem fs, Context context, boolean online) throws IOException { StripeInfo si = null; if (stripeStore != null) { IOException caughtException = null; try { si = stripeStore.getStripe(codec, lostBlock); } catch (IOException ioe) { LOG.error(" Fail to get stripe " + codec + " : " + lostBlock, ioe); caughtException = ioe; } if (si == null) { // Stripe is not record, we should report LogUtils.logRaidReconstructionMetrics(LOGRESULTS.FAILURE, 0, codec, p, lostBlockOffset, online? LOGTYPES.ONLINE_RECONSTRUCTION_GET_STRIPE: LOGTYPES.OFFLINE_RECONSTRUCTION_GET_STRIPE, fs, caughtException, context); } } return si; }
/** * Retrieve checksums from checksum store and record checksum lost * if possible */ public Long retrieveChecksum(Block lostBlock, Path p, long lostBlockOffset, FileSystem fs, Context context) throws IOException { Long oldCRC = null; if (checksumStore != null) { IOException caughtException = null; try { oldCRC = checksumStore.getChecksum(lostBlock); } catch (IOException ioe) { LOG.error(" Fail to get checksum for block " + lostBlock, ioe); caughtException = ioe; } // Checksum is not record, we should report if (oldCRC == null) { LogUtils.logRaidReconstructionMetrics(LOGRESULTS.FAILURE, 0, codec, p, lostBlockOffset, LOGTYPES.OFFLINE_RECONSTRUCTION_GET_CHECKSUM, fs, caughtException, context); } } return oldCRC; }
public CRC32 recoverParityBlockToFile( FileSystem srcFs, FileStatus srcStat, FileSystem parityFs, Path parityPath, long blockSize, long blockOffset, File localBlockFile, StripeInfo si, Context context) throws IOException, InterruptedException { OutputStream out = null; try { out = new FileOutputStream(localBlockFile); return fixErasedBlock(srcFs, srcStat, parityFs, parityPath, false, blockSize, blockOffset, blockSize, false, out, si, context, false); } finally { if (out != null) { out.close(); } } }
/** * Recover a corrupt block to local file. Using the stripe information * stored in the Stripe Store. * * @param srcFs The filesystem containing the source file. * @param srcPath The damaged source file. * @param lostBlock The missing/corrupted block * @param localBlockFile The file to write the block to. * @param blockSize The block size of the file. * @param lostBlockOffset The start offset of the block in the file. * @param limit The maximum number of bytes to be written out. * @param si The StripeInfo retrieved from Stripe Store. * @param context * @return * @throws IOException */ public CRC32 recoverBlockToFileFromStripeInfo( FileSystem srcFs, Path srcPath, Block lostBlock, File localBlockFile, long blockSize, long lostBlockOffset, long limit, StripeInfo si, Context context) throws IOException { OutputStream out = null; try { out = new FileOutputStream(localBlockFile); CRC32 crc = null; if (checksumStore != null) { crc = new CRC32(); } fixErasedBlockImpl(srcFs, srcPath, srcFs, null, true, blockSize, lostBlockOffset, limit, false, out, context, crc, si, true, lostBlock); return crc; } finally { if (null != out) { out.close(); } } }
DecoderInputStream generateAlternateStream(FileSystem srcFs, Path srcFile, FileSystem parityFs, Path parityFile, long blockSize, long errorOffset, long limit, Block lostBlock, StripeInfo si, boolean recoverFromStripeInfo, Context context) { configureBuffers(blockSize); Progressable reporter = context; if (reporter == null) { reporter = RaidUtils.NULL_PROGRESSABLE; } DecoderInputStream decoderInputStream = new DecoderInputStream( reporter, limit, blockSize, errorOffset, srcFs, srcFile, parityFs, parityFile, lostBlock, si, recoverFromStripeInfo); return decoderInputStream; }
public void map(Map<String, ByteBuffer> keys, Map<String, ByteBuffer> columns, Context context) throws IOException, InterruptedException { for (Entry<String, ByteBuffer> column : columns.entrySet()) { if (!"line".equalsIgnoreCase(column.getKey())) continue; String value = ByteBufferUtil.string(column.getValue()); StringTokenizer itr = new StringTokenizer(value); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } }
protected void setup(Context context ) throws IOException, InterruptedException { /* * @author: Tim and Lism * @time: 2013-10-17 * Get if GPU is available for this task. * If GPU available, you can call GPU code. * Else please call CPU code. * */ if(Integer.valueOf(System.getenv("GPU"))!=0) { int gpu_id = Integer.valueOf(System.getenv("GPU_ID")); System.out.printf("GPU number: %d\n",Integer.valueOf(System.getenv("GPU"))); System.out.printf(" GPU ID: %s\n",gpu_id); //call GPU code here! System.out.println("[GPU INFO] GPU is Available for this task, you can call GPU code here!"); } else { //call CPU code here! System.out.println("[GPU INFO] GPU is NOT Available for this task, you should call CPU code here!"); } }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { if (first) { FileSplit split = (FileSplit) context.getInputSplit(); Path path = split.getPath(); // current split path lvalue.set(path.getName()); lkey.set(key.get()); context.write(lkey, lvalue); first = false; } String line = value.toString(); if (!line.isEmpty()) { Instance instance = converter.convert(line); double prediction = ruleBase.classify(instance); lkey.set(dataset.getLabel(instance)); lvalue.set(Double.toString(prediction)); context.write(lkey, lvalue); } }
public void recoverParityBlockToFile(FileSystem srcFs, Path srcPath, FileSystem parityFs, Path parityPath, long blockSize, long blockOffset, File localBlockFile, Context context) throws IOException, InterruptedException { DistributedFileSystem dfs = (DistributedFileSystem) srcFs; long crc32 = dfs.getClient().getBlockChecksum(parityPath.toUri().toString(), (int) (blockOffset / blockSize)); OutputStream out = null; try { out = new FileOutputStream(localBlockFile); fixErasedBlock(srcFs, srcPath, parityFs, parityPath, false, blockSize, blockOffset, blockSize, false, out, context, false, crc32); } finally { if (out != null) { out.close(); } } }
/** * Having buffers of the right size is extremely important. If the the * buffer size is not a divisor of the block size, we may end up reading * across block boundaries. */ void fixErasedBlock(FileSystem srcFs, Path srcFile, FileSystem parityFs, Path parityFile, boolean fixSource, long blockSize, long errorOffset, long limit, boolean partial, OutputStream out, Mapper.Context context, boolean skipVerify, long oldCrc) throws IOException, InterruptedException { // TODO This causes a NullPointerException and it didn't seem to be required // configureBuffers(blockSize); Progressable reporter = context; if (reporter == null) { reporter = RaidUtils.NULL_PROGRESSABLE; } CRC32 crc = new CRC32(); fixErasedBlockImpl(srcFs, srcFile, parityFs, parityFile, fixSource, blockSize, errorOffset, limit, partial, out, reporter, crc); if (crc.getValue() != oldCrc) { throw new BlockChecksumException(String.format( "Repair of %s at offset %d failed. Checksum differs from stored checksum", fixSource ? srcFile : parityFile, errorOffset)); } }
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); int dot = line.indexOf(".", line.indexOf("GET ")); int end = line.indexOf(" ", dot); String resource = line.substring(dot, end).trim(); if (resource.endsWith("gif")) { context.getCounter("ImageCounter", "gif").increment(1); } else if (resource.endsWith("jpg")) { context.getCounter("ImageCounter", "jpg").increment(1); } else { context.getCounter("ImageCounter", "other").increment(1); } }
@Ignore("convenient trial tool for dev") @Test public void test() throws IOException, InterruptedException { Configuration hconf = HadoopUtil.getCurrentConfiguration(); HiveToBaseCuboidMapper mapper = new HiveToBaseCuboidMapper(); Context context = MockupMapContext.create(hconf, metadataUrl, cubeName, null); mapper.doSetup(context); Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath)); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf); Text value = new Text(); while (reader.next(key, value)) { mapper.map(key, value, context); } reader.close(); }
@Ignore("convenient trial tool for dev") @Test public void test() throws IOException, InterruptedException { Configuration hconf = new Configuration(); BaseCuboidMapper mapper = new BaseCuboidMapper(); Context context = MockupMapContext.create(hconf, metadataUrl, cubeName, null); mapper.setup(context); Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath)); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf); Text value = new Text(); while (reader.next(key, value)) { mapper.map(key, value, context); } reader.close(); }
public void map(Long key, Row row, Context context) throws IOException, InterruptedException { String value = row.getString("line"); logger.debug("read {}:{}={} from {}", new Object[] {key, "line", value, context.getInputSplit()}); StringTokenizer itr = new StringTokenizer(value); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } }
public void reduce(Text word, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) sum += val.get(); keys.put("word", ByteBufferUtil.bytes(word.toString())); context.write(keys, getBindVariables(word, sum)); }
@Override public void map(BytesWritable key, Tuple value, Context context) throws IOException, InterruptedException { System.out.println("key = " + key); System.out.println("value = " + value); context.write(key, value); }
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for(Text val : values) { String str = val.toString(); Delete delRow1 = new Delete(Bytes.toBytes(key.toString())); delRow1.deleteColumn("weight".getBytes(), str.getBytes()); //System.out.println("RemoveEdgesReducer: key = " + key.toString() + " sec_key = " + str); Delete delRow2 = new Delete(Bytes.toBytes(str.toString())); delRow2.deleteColumn("weight".getBytes(), key.getBytes()); //System.out.println("RemoveEdgesReducer: key = " + str + " sec_key = " + key.toString()); context.write(null, delRow1); context.write(null, delRow2); } }
public void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException { // int i = 0; // for (IntWritable val : values) { // i += val.get(); // } //Put put = new Put(Bytes.toBytes("some")); //put.add("cooccurrence".getBytes(), "sdff".getBytes(), Bytes.toBytes(1)); //context.write(null, put); double cos_sim = 0; for(DoubleWritable d : values) { cos_sim += Double.parseDouble(d.toString()); } String line = key.toString(); String [] str = line.split(","); String w_u = str[0]; String w_v = str[1]; //if(cos_sim > theta) { Put put1 = new Put(Bytes.toBytes(w_u)); put1.add("weight".getBytes(), w_v.getBytes(), Bytes.toBytes(cos_sim)); //System.out.println("GraphTable_cos_similatiry: key = " + w_u + " sec_key = " + w_v + " weight = " + cos_sim); Put put2 = new Put(Bytes.toBytes(w_v)); put2.add("weight".getBytes(), w_u.getBytes(), Bytes.toBytes(cos_sim)); //System.out.println("GraphTable_cos_similatiry: key = " + w_v + " sec_key = " + w_u + " weight = " + cos_sim); context.write(null, put1); context.write(null, put2); //} }
public void map(ImmutableBytesWritable row, Result value, Context context) throws InterruptedException, IOException { String w_seed = new String(row.get()); //System.out.println("PosNegScoreCalculationMapper: w_seed =" + w_seed); for(KeyValue kv : value.raw()){ String word = new String(kv.getQualifier()); double score = Bytes.toDouble(kv.getValue()); context.write(new Text(word), new Text(w_seed + "," + String.valueOf(score))); //System.out.println("To Reducer goes: key = " + word + " value = w_seed, score which is " + w_seed + "," + score); } }