@Override public void map(ImmutableBytesWritable key, Result value, OutputCollector<NullWritable,NullWritable> output, Reporter reporter) throws IOException { for (Cell cell : value.listCells()) { reporter.getCounter(TestTableInputFormat.class.getName() + ":row", Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())) .increment(1l); reporter.getCounter(TestTableInputFormat.class.getName() + ":family", Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength())) .increment(1l); reporter.getCounter(TestTableInputFormat.class.getName() + ":value", Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())) .increment(1l); } }
@Override public void map(LongWritable key, SqoopRecord val, Context context) throws IOException, InterruptedException { try { // Loading of LOBs was delayed until we have a Context. val.loadLargeObjects(lobLoader); } catch (SQLException sqlE) { throw new IOException(sqlE); } Map<String, Object> fields = val.getFieldMap(); List<Put> putList = putTransformer.getPutCommand(fields); for(Put put: putList){ context.write(new ImmutableBytesWritable(put.getRow()), put); } }
public int getPartition(ImmutableBytesWritable key, V2 value, int numPartitions) { byte[] region = null; // Only one region return 0 if (this.startKeys.length == 1){ return 0; } try { // Not sure if this is cached after a split so we could have problems // here if a region splits while mapping region = locator.getRegionLocation(key.get()).getRegionInfo().getStartKey(); } catch (IOException e) { LOG.error(e); } for (int i = 0; i < this.startKeys.length; i++){ if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){ if (i >= numPartitions-1){ // cover if we have less reduces then regions. return (Integer.toString(i).hashCode() & Integer.MAX_VALUE) % numPartitions; } return i; } } // if above fails to find start key that match we need to return something return 0; }
/** * @param args * @return the JobConf * @throws IOException */ public JobConf createSubmittableJob(String[] args) throws IOException { JobConf c = new JobConf(getConf(), getClass()); c.setJobName(NAME); // Columns are space delimited StringBuilder sb = new StringBuilder(); final int columnoffset = 2; for (int i = columnoffset; i < args.length; i++) { if (i > columnoffset) { sb.append(" "); } sb.append(args[i]); } // Second argument is the table name. TableMapReduceUtil.initTableMapJob(args[1], sb.toString(), RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c); c.setNumReduceTasks(0); // First arg is the output directory. FileOutputFormat.setOutputPath(c, new Path(args[0])); return c; }
/** * Pass the key, and reversed value to reduce * * @param key * @param value * @param context * @throws IOException */ public void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { if (value.size() != 1) { throw new IOException("There should only be one input column"); } Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf = value.getMap(); if(!cf.containsKey(INPUT_FAMILY)) { throw new IOException("Wrong input columns. Missing: '" + Bytes.toString(INPUT_FAMILY) + "'."); } // Get the original value and reverse it String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, null)); StringBuilder newValue = new StringBuilder(originalValue); newValue.reverse(); // Now set the value to be collected Put outval = new Put(key.get()); outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString())); context.write(key, outval); }
/** * Use this before submitting a TableReduce job. It will * appropriately set up the JobConf. * * @param table The output table. * @param reducer The reducer class to use. * @param job The current job configuration to adjust. * @param partitioner Partitioner to use. Pass <code>null</code> to use * default partitioner. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @throws IOException When determining the region count fails. */ public static void initTableReduceJob(String table, Class<? extends TableReduce> reducer, JobConf job, Class partitioner, boolean addDependencyJars) throws IOException { job.setOutputFormat(TableOutputFormat.class); job.setReducerClass(reducer); job.set(TableOutputFormat.OUTPUT_TABLE, table); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Put.class); job.setStrings("io.serializations", job.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()); if (partitioner == HRegionPartitioner.class) { job.setPartitionerClass(HRegionPartitioner.class); int regions = MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table)); if (job.getNumReduceTasks() > regions) { job.setNumReduceTasks(regions); } } else if (partitioner != null) { job.setPartitionerClass(partitioner); } if (addDependencyJars) { addDependencyJars(job); } initCredentials(job); }
/** * Write random values to the writer assuming a table created using * {@link #FAMILIES} as column family descriptors */ private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, KeyValue> writer, TaskAttemptContext context, Set<byte[]> families, int numRows) throws IOException, InterruptedException { byte keyBytes[] = new byte[Bytes.SIZEOF_INT]; int valLength = 10; byte valBytes[] = new byte[valLength]; int taskId = context.getTaskAttemptID().getTaskID().getId(); assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!"; final byte [] qualifier = Bytes.toBytes("data"); Random random = new Random(); for (int i = 0; i < numRows; i++) { Bytes.putInt(keyBytes, 0, i); random.nextBytes(valBytes); ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes); for (byte[] family : families) { KeyValue kv = new KeyValue(keyBytes, family, qualifier, valBytes); writer.write(key, kv); } } }
/** * Read the next key/hash pair. * Returns true if such a pair exists and false when at the end of the data. */ public boolean next() throws IOException { if (cachedNext) { cachedNext = false; return true; } key = new ImmutableBytesWritable(); hash = new ImmutableBytesWritable(); while (true) { boolean hasNext = mapFileReader.next(key, hash); if (hasNext) { return true; } hashFileIndex++; if (hashFileIndex < TableHash.this.numHashFiles) { mapFileReader.close(); openHashFile(); } else { key = null; hash = null; return false; } } }
@Override protected void map(ImmutableBytesWritable rowKey, Result result, Context context) throws IOException, InterruptedException { for(java.util.Map.Entry<byte[], ImmutableBytesWritable> index : indexes.entrySet()) { byte[] qualifier = index.getKey(); ImmutableBytesWritable tableName = index.getValue(); byte[] value = result.getValue(family, qualifier); if (value != null) { // original: row 123 attribute:phone 555-1212 // index: row 555-1212 INDEX:ROW 123 Put put = new Put(value); put.add(INDEX_COLUMN, INDEX_QUALIFIER, rowKey.get()); context.write(tableName, put); } } }
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration configuration = context.getConfiguration(); String tableName = configuration.get("index.tablename"); String[] fields = configuration.getStrings("index.fields"); String familyName = configuration.get("index.familyname"); family = Bytes.toBytes(familyName); indexes = new TreeMap<byte[], ImmutableBytesWritable>(Bytes.BYTES_COMPARATOR); for(String field : fields) { // if the table is "people" and the field to index is "email", then the // index table will be called "people-email" indexes.put(Bytes.toBytes(field), new ImmutableBytesWritable(Bytes.toBytes(tableName + "-" + field))); } }
@Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { BytesWritable bwKey = new BytesWritable(key.get()); BytesWritable bwVal = new BytesWritable(); for (Cell kv : value.listCells()) { if (Bytes.compareTo(TEST_QUALIFIER, 0, TEST_QUALIFIER.length, kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength()) == 0) { context.write(bwKey, EMPTY); } else { bwVal.set(kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength()); context.write(bwVal, bwKey); } } }
@Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { try { // first, finish any hash batches that end before the scanned row while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) { moveToNextBatch(context); } // next, add the scanned row (as long as we've reached the first batch) if (targetHasher.isBatchStarted()) { targetHasher.hashResult(value); } } catch (Throwable t) { mapperException = t; Throwables.propagateIfInstanceOf(t, IOException.class); Throwables.propagateIfInstanceOf(t, InterruptedException.class); Throwables.propagate(t); } }
/** * Write random values to the writer assuming a table created using * {@link #FAMILIES} as column family descriptors */ private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer, TaskAttemptContext context, Set<byte[]> families, int numRows) throws IOException, InterruptedException { byte keyBytes[] = new byte[Bytes.SIZEOF_INT]; int valLength = 10; byte valBytes[] = new byte[valLength]; int taskId = context.getTaskAttemptID().getTaskID().getId(); assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!"; final byte [] qualifier = Bytes.toBytes("data"); Random random = new Random(); for (int i = 0; i < numRows; i++) { Bytes.putInt(keyBytes, 0, i); random.nextBytes(valBytes); ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes); for (byte[] family : families) { Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes); writer.write(key, kv); } } }
/** * Check if the table has an attached co-processor represented by the name className * * @param classNameToMatch - Class name of the co-processor * @return true of the table has a co-processor className */ public boolean hasCoprocessor(String classNameToMatch) { Matcher keyMatcher; Matcher valueMatcher; for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e: this.values.entrySet()) { keyMatcher = HConstants.CP_HTD_ATTR_KEY_PATTERN.matcher( Bytes.toString(e.getKey().get())); if (!keyMatcher.matches()) { continue; } String className = getCoprocessorClassNameFromSpecStr(Bytes.toString(e.getValue().get())); if (className == null) continue; if (className.equals(classNameToMatch.trim())) { return true; } } return false; }
@Override protected void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName, int numRegions, int expectedNumSplits) throws Exception { setupCluster(); TableName tableName = TableName.valueOf("testWithMockedMapReduce"); try { createTableAndSnapshot( util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions); JobConf job = new JobConf(util.getConfiguration()); Path tmpTableDir = util.getRandomDir(); TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false, tmpTableDir); // mapred doesn't support start and end keys? o.O verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow()); } finally { util.getHBaseAdmin().deleteSnapshot(snapshotName); util.deleteTable(tableName); tearDownCluster(); } }
/** * @param row The current table row key. * @param value The columns. * @param context The current context. * @throws IOException When something is broken with the data. */ @Override public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException { try { if (LOG.isTraceEnabled()) { LOG.trace("Considering the row." + Bytes.toString(row.get(), row.getOffset(), row.getLength())); } if (filter == null || !filter.filterRowKey(row.get(), row.getOffset(), row.getLength())) { for (Cell kv : value.rawCells()) { kv = filterKv(filter, kv); // skip if we filtered it out if (kv == null) continue; // TODO get rid of ensureKeyValue context.write(row, KeyValueUtil.ensureKeyValueTypeForMR(convertKv(kv, cfRenameMap))); } } } catch (InterruptedException e) { e.printStackTrace(); } }
@Override public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName, int numRegions, int expectedNumSplits) throws Exception { setupCluster(); TableName tableName = TableName.valueOf("testWithMockedMapReduce"); try { createTableAndSnapshot( util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions); Job job = new Job(util.getConfiguration()); Path tmpTableDir = util.getRandomDir(); Scan scan = new Scan(getStartRow(), getEndRow()); // limit the scan TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false, tmpTableDir); verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow()); } finally { util.getHBaseAdmin().deleteSnapshot(snapshotName); util.deleteTable(tableName); tearDownCluster(); } }
protected static void verifyRowFromMap(ImmutableBytesWritable key, Result result) throws IOException { byte[] row = key.get(); CellScanner scanner = result.cellScanner(); while (scanner.advance()) { Cell cell = scanner.current(); //assert that all Cells in the Result have the same key Assert.assertEquals(0, Bytes.compareTo(row, 0, row.length, cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())); } for (int j = 0; j < FAMILIES.length; j++) { byte[] actual = result.getValue(FAMILIES[j], null); Assert.assertArrayEquals("Row in snapshot does not match, expected:" + Bytes.toString(row) + " ,actual:" + Bytes.toString(actual), row, actual); } }
@Test @SuppressWarnings("deprecation") public void shoudBeValidMapReduceWithPartitionerEvaluation() throws IOException { Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); try { jobConf.setJobName("process row task"); jobConf.setNumReduceTasks(2); TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY), ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class, jobConf); TableMapReduceUtil.initTableReduceJob(TABLE_NAME, ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class); RunningJob job = JobClient.runJob(jobConf); assertTrue(job.isSuccessful()); } finally { if (jobConf != null) FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir"))); } }
/** * Add coprocessor to values Map * @param specStr The Coprocessor specification all in in one String formatted so matches * {@link HConstants#CP_HTD_ATTR_VALUE_PATTERN} * @return Returns <code>this</code> */ private HTableDescriptor addCoprocessorToMap(final String specStr) { if (specStr == null) return this; // generate a coprocessor key int maxCoprocessorNumber = 0; Matcher keyMatcher; for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e: this.values.entrySet()) { keyMatcher = HConstants.CP_HTD_ATTR_KEY_PATTERN.matcher( Bytes.toString(e.getKey().get())); if (!keyMatcher.matches()) { continue; } maxCoprocessorNumber = Math.max(Integer.parseInt(keyMatcher.group(1)), maxCoprocessorNumber); } maxCoprocessorNumber++; String key = "coprocessor$" + Integer.toString(maxCoprocessorNumber); this.values.put(new ImmutableBytesWritable(Bytes.toBytes(key)), new ImmutableBytesWritable(Bytes.toBytes(specStr))); return this; }
/** * Create table data and run tests on specified htable using the * o.a.h.hbase.mapred API. * * @param table * @throws IOException */ static void runTestMapred(Table table) throws IOException { org.apache.hadoop.hbase.mapred.TableRecordReader trr = new org.apache.hadoop.hbase.mapred.TableRecordReader(); trr.setStartRow("aaa".getBytes()); trr.setEndRow("zzz".getBytes()); trr.setHTable(table); trr.setInputColumns(columns); trr.init(); Result r = new Result(); ImmutableBytesWritable key = new ImmutableBytesWritable(); boolean more = trr.next(key, r); assertTrue(more); checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes()); more = trr.next(key, r); assertTrue(more); checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes()); // no more data more = trr.next(key, r); assertFalse(more); }
@Test @SuppressWarnings({ "deprecation" }) public void shouldCreateAndRunSubmittableJob() throws Exception { RowCounter rCounter = new RowCounter(); rCounter.setConf(HBaseConfiguration.create()); String[] args = new String[] { "\temp", "tableA", "column1", "column2", "column3" }; JobConf jobConfig = rCounter.createSubmittableJob(args); assertNotNull(jobConfig); assertEquals(0, jobConfig.getNumReduceTasks()); assertEquals("rowcounter", jobConfig.getJobName()); assertEquals(jobConfig.getMapOutputValueClass(), Result.class); assertEquals(jobConfig.getMapperClass(), RowCounterMapper.class); assertEquals(jobConfig.get(TableInputFormat.COLUMN_LIST), Joiner.on(' ') .join("column1", "column2", "column3")); assertEquals(jobConfig.getMapOutputKeyClass(), ImmutableBytesWritable.class); }
@Test @SuppressWarnings({ "deprecation", "unchecked" }) public void shouldCollectPredefinedTimes() throws IOException { int recordNumber = 999; Result resultMock = mock(Result.class); IdentityTableMap identityTableMap = null; try { Reporter reporterMock = mock(Reporter.class); identityTableMap = new IdentityTableMap(); ImmutableBytesWritable bytesWritableMock = mock(ImmutableBytesWritable.class); OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock = mock(OutputCollector.class); for (int i = 0; i < recordNumber; i++) identityTableMap.map(bytesWritableMock, resultMock, outputCollectorMock, reporterMock); verify(outputCollectorMock, times(recordNumber)).collect( Mockito.any(ImmutableBytesWritable.class), Mockito.any(Result.class)); } finally { if (identityTableMap != null) identityTableMap.close(); } }
private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits, byte[] startRow, byte[] stopRow) throws IOException, InterruptedException { TableSnapshotInputFormat tsif = new TableSnapshotInputFormat(); List<InputSplit> splits = tsif.getSplits(job); Assert.assertEquals(expectedNumSplits, splits.size()); HBaseTestingUtility.SeenRowTracker rowTracker = new HBaseTestingUtility.SeenRowTracker(startRow, stopRow); for (int i = 0; i < splits.size(); i++) { // validate input split InputSplit split = splits.get(i); Assert.assertTrue(split instanceof TableSnapshotRegionSplit); // validate record reader TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class); when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration()); RecordReader<ImmutableBytesWritable, Result> rr = tsif.createRecordReader(split, taskAttemptContext); rr.initialize(split, taskAttemptContext); // validate we can read all the data back while (rr.nextKeyValue()) { byte[] row = rr.getCurrentKey().get(); verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue()); rowTracker.addRow(row); } rr.close(); } // validate all rows are seen rowTracker.validate(); }
@Override public void map(ImmutableBytesWritable key, Result value, OutputCollector<ImmutableBytesWritable, NullWritable> collector, Reporter reporter) throws IOException { verifyRowFromMap(key, value); collector.collect(key, NullWritable.get()); }
/** * Create table data and run tests on specified htable using the * o.a.h.hbase.mapreduce API. * * @param table * @throws IOException * @throws InterruptedException */ static void runTestMapreduce(Table table) throws IOException, InterruptedException { org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl trr = new org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl(); Scan s = new Scan(); s.setStartRow("aaa".getBytes()); s.setStopRow("zzz".getBytes()); s.addFamily(FAMILY); trr.setScan(s); trr.setHTable(table); trr.initialize(null, null); Result r = new Result(); ImmutableBytesWritable key = new ImmutableBytesWritable(); boolean more = trr.nextKeyValue(); assertTrue(more); key = trr.getCurrentKey(); r = trr.getCurrentValue(); checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes()); more = trr.nextKeyValue(); assertTrue(more); key = trr.getCurrentKey(); r = trr.getCurrentValue(); checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes()); // no more data more = trr.nextKeyValue(); assertFalse(more); }
/** * Create a key by concatenating multiple column values. * Override this function in order to produce different types of keys. * * @param vals * @return key generated by concatenating multiple column values */ protected ImmutableBytesWritable createGroupKey(byte[][] vals) { if(vals == null) { return null; } StringBuilder sb = new StringBuilder(); for(int i = 0; i < vals.length; i++) { if(i > 0) { sb.append(" "); } sb.append(Bytes.toString(vals[i])); } return new ImmutableBytesWritable(Bytes.toBytes(sb.toString())); }
public void map(ImmutableBytesWritable row, Result values, OutputCollector<ImmutableBytesWritable, Result> output, Reporter reporter) throws IOException { // Count every row containing data, whether it's in qualifiers or values reporter.incrCounter(Counters.ROWS, 1); }
@Override public boolean next(ImmutableBytesWritable key, Result value) throws IOException { if (!delegate.nextKeyValue()) { return false; } ImmutableBytesWritable currentKey = delegate.getCurrentKey(); key.set(currentKey.get(), currentKey.getOffset(), currentKey.getLength()); value.copyFrom(delegate.getCurrentValue()); return true; }
/** * Test SampleUploader from examples */ @SuppressWarnings("unchecked") @Test public void testSampleUploader() throws Exception { Configuration configuration = new Configuration(); Uploader uploader = new Uploader(); Mapper<LongWritable, Text, ImmutableBytesWritable, Put>.Context ctx = mock(Context.class); doAnswer(new Answer<Void>() { @Override public Void answer(InvocationOnMock invocation) throws Throwable { ImmutableBytesWritable writer = (ImmutableBytesWritable) invocation.getArguments()[0]; Put put = (Put) invocation.getArguments()[1]; assertEquals("row", Bytes.toString(writer.get())); assertEquals("row", Bytes.toString(put.getRow())); return null; } }).when(ctx).write(any(ImmutableBytesWritable.class), any(Put.class)); uploader.map(null, new Text("row,family,qualifier,value"), ctx); Path dir = util.getDataTestDirOnTestFS("testSampleUploader"); String[] args = { dir.toString(), "simpleTable" }; Job job = SampleUploader.configureJob(configuration, args); assertEquals(SequenceFileInputFormat.class, job.getInputFormatClass()); }
/** * @param key The key. * @return The value. */ public byte[] getValue(byte[] key) { ImmutableBytesWritable ibw = values.get(new ImmutableBytesWritable(key)); if (ibw == null) return null; return ibw.get(); }
/** * Gets the partition number for a given key (hence record) given the total * number of partitions i.e. number of reduce-tasks for the job. * * <p>Typically a hash function on a all or a subset of the key.</p> * * @param key The key to be partitioned. * @param value The entry value. * @param numPartitions The total number of partitions. * @return The partition number for the <code>key</code>. * @see org.apache.hadoop.mapreduce.Partitioner#getPartition( * java.lang.Object, java.lang.Object, int) */ @Override public int getPartition(ImmutableBytesWritable key, VALUE value, int numPartitions) { byte[] region = null; // Only one region return 0 if (this.startKeys.length == 1){ return 0; } try { // Not sure if this is cached after a split so we could have problems // here if a region splits while mapping region = this.locator.getRegionLocation(key.get()).getRegionInfo().getStartKey(); } catch (IOException e) { LOG.error(e); } for (int i = 0; i < this.startKeys.length; i++){ if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){ if (i >= numPartitions-1){ // cover if we have less reduces then regions. return (Integer.toString(i).hashCode() & Integer.MAX_VALUE) % numPartitions; } return i; } } // if above fails to find start key that match we need to return something return 0; }
@Override public void map(ImmutableBytesWritable key, Result value, OutputCollector<ImmutableBytesWritable, ImmutableBytesWritable> outputCollector, Reporter reporter) throws IOException { makeAssertions(key, value); outputCollector.collect(key, key); }
private void readPartitionFile(FileSystem fs, Configuration conf, Path path) throws IOException { @SuppressWarnings("deprecation") SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); ImmutableBytesWritable key = new ImmutableBytesWritable(); partitions = new ArrayList<ImmutableBytesWritable>(); while (reader.next(key)) { partitions.add(new ImmutableBytesWritable(key.copyBytes())); } reader.close(); if (!Ordering.natural().isOrdered(partitions)) { throw new IOException("Partitions are not ordered!"); } }
@Override protected void map(LongWritable key, LongWritable value, Context context) throws IOException, InterruptedException { long chainId = value.get(); LOG.info("Starting mapper with chainId:" + chainId); byte[] chainIdArray = Bytes.toBytes(chainId); long currentRow = 0; long chainLength = context.getConfiguration().getLong(CHAIN_LENGTH_KEY, CHAIN_LENGTH); long nextRow = getNextRow(0, chainLength); for (long i = 0; i < chainLength; i++) { byte[] rk = Bytes.toBytes(currentRow); // Next link in the chain. KeyValue linkKv = new KeyValue(rk, CHAIN_FAM, chainIdArray, Bytes.toBytes(nextRow)); // What link in the chain this is. KeyValue sortKv = new KeyValue(rk, SORT_FAM, chainIdArray, Bytes.toBytes(i)); // Added data so that large stores are created. KeyValue dataKv = new KeyValue(rk, DATA_FAM, chainIdArray, Bytes.toBytes(RandomStringUtils.randomAlphabetic(50)) ); // Emit the key values. context.write(new ImmutableBytesWritable(rk), linkKv); context.write(new ImmutableBytesWritable(rk), sortKv); context.write(new ImmutableBytesWritable(rk), dataKv); // Move to the next row. currentRow = nextRow; nextRow = getNextRow(i+1, chainLength); } }
/** * Tests an MR Scan initialized from properties set in the Configuration. * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ protected void testScanFromConfiguration(String start, String stop, String last) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase() : "Empty") + "To" + (stop != null ? stop.toUpperCase() : "Empty"); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME)); c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY)); c.set(KEY_STARTROW, start != null ? start : ""); c.set(KEY_LASTROW, last != null ? last : ""); if (start != null) { c.set(TableInputFormat.SCAN_ROW_START, start); } if (stop != null) { c.set(TableInputFormat.SCAN_ROW_STOP, stop); } Job job = new Job(c, jobName); job.setMapperClass(ScanMapper.class); job.setReducerClass(ScanReducer.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setInputFormatClass(TableInputFormat.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, new Path(job.getJobName())); TableMapReduceUtil.addDependencyJars(job); assertTrue(job.waitForCompletion(true)); }
/** * Remove a coprocessor from those set on the table * @param className Class name of the co-processor */ public void removeCoprocessor(String className) { ImmutableBytesWritable match = null; Matcher keyMatcher; Matcher valueMatcher; for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e : this.values .entrySet()) { keyMatcher = HConstants.CP_HTD_ATTR_KEY_PATTERN.matcher(Bytes.toString(e .getKey().get())); if (!keyMatcher.matches()) { continue; } valueMatcher = HConstants.CP_HTD_ATTR_VALUE_PATTERN.matcher(Bytes .toString(e.getValue().get())); if (!valueMatcher.matches()) { continue; } // get className and compare String clazz = valueMatcher.group(2).trim(); // classname is the 2nd field // remove the CP if it is present if (clazz.equals(className.trim())) { match = e.getKey(); break; } } // if we found a match, remove it if (match != null) remove(match); }
@Override protected void setup(Context context) throws IOException, InterruptedException { targetBatchSize = context.getConfiguration() .getLong(HASH_BATCH_SIZE_CONF_KEY, DEFAULT_BATCH_SIZE); hasher = new ResultHasher(); TableSplit split = (TableSplit) context.getInputSplit(); hasher.startBatch(new ImmutableBytesWritable(split.getStartRow())); }
public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName, String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception { //create the table and snapshot createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions); if (shutdownCluster) { util.shutdownMiniHBaseCluster(); } try { // create the job Job job = new Job(util.getConfiguration()); Scan scan = new Scan(startRow, endRow); // limit the scan job.setJarByClass(util.getClass()); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), TestTableSnapshotInputFormat.class); TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, true, tableDir); job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(NullOutputFormat.class); Assert.assertTrue(job.waitForCompletion(true)); } finally { if (!shutdownCluster) { util.getHBaseAdmin().deleteSnapshot(snapshotName); util.deleteTable(tableName); } } }
@Test @SuppressWarnings({ "deprecation", "unchecked" }) public void shouldNotCallCollectonSinceFindUniqueKeyValueMoreThanOnes() throws Exception { GroupingTableMap gTableMap = null; try { Result result = mock(Result.class); Reporter reporter = mock(Reporter.class); gTableMap = new GroupingTableMap(); Configuration cfg = new Configuration(); cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB"); JobConf jobConf = new JobConf(cfg); gTableMap.configure(jobConf); byte[] row = {}; List<Cell> keyValues = ImmutableList.<Cell>of( new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("1111")), new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("2222")), new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), Bytes.toBytes("3333"))); when(result.listCells()).thenReturn(keyValues); OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock = mock(OutputCollector.class); gTableMap.map(null, result, outputCollectorMock, reporter); verify(result).listCells(); verifyZeroInteractions(outputCollectorMock); } finally { if (gTableMap != null) gTableMap.close(); } }