public static Scan mergeRangeScans(List<Scan> rangeScans) { List<RowRange> ranges = Lists.newArrayList(); for (Scan rangeScan : rangeScans) { byte[] startRow = rangeScan.getStartRow(); byte[] stopRow = rangeScan.getStopRow(); ranges.add(new RowRange(startRow, true, stopRow, false)); } Scan mergedScan = new Scan(); try { mergedScan.setFilter(new MultiRowRangeFilter(ranges)); } catch (IOException e) { throw new RuntimeException(e); } return mergedScan; }
@Test public void testMergePrefixScans() throws IOException { List<Scan> scans = Lists.newArrayList(); byte[] startRow1 = Bytes.toBytes("hello"); byte[] stopRow1 = Bytes.toBytes("hellp"); Scan scan1 = new Scan(startRow1, stopRow1); scans.add(scan1); byte[] startRow2 = Bytes.toBytes("world"); byte[] stopRow2 = Bytes.toBytes("worle"); Scan scan2 = new Scan(startRow2, stopRow2); scans.add(scan2); Scan merged = HBaseUtils.mergeRangeScans(scans); assertEquals(MultiRowRangeFilter.class, merged.getFilter().getClass()); MultiRowRangeFilter mergedFilter = (MultiRowRangeFilter)merged.getFilter(); List<RowRange> ranges = mergedFilter.getRowRanges(); assertEquals(2, ranges.size()); assertTrue(ranges.get(0).getStartRow().equals(startRow1)); assertTrue(ranges.get(0).getStopRow().equals(stopRow1)); assertTrue(ranges.get(1).getStartRow().equals(startRow2)); assertTrue(ranges.get(1).getStopRow().equals(stopRow2)); }
public static void main(String[] args) throws Exception { if (args.length < 1) { throw new Exception("Table name not specified."); } Configuration conf = HBaseConfiguration.create(); HTable table = new HTable(conf, args[0]); Scan scan = new Scan(); List<RowKeyRange> ranges = new ArrayList<RowKeyRange>(); ranges.add(new RowKeyRange(Bytes.toBytes("001"), Bytes.toBytes("002"))); ranges.add(new RowKeyRange(Bytes.toBytes("003"), Bytes.toBytes("004"))); ranges.add(new RowKeyRange(Bytes.toBytes("005"), Bytes.toBytes("006"))); Filter filter = new MultiRowRangeFilter(ranges); scan.setFilter(filter); int count = 0; ResultScanner scanner = table.getScanner(scan); Result r = scanner.next(); while (r != null) { count++; r = scanner.next(); } System.out .println("++ Scanning finished with count : " + count + " ++"); scanner.close(); }
private static List<MultiRowRangeFilter.RowRange> parseRowRangeParameter( String arg, String rangeSwitch) { final String[] ranges = arg.substring(rangeSwitch.length()).split(";"); final List<MultiRowRangeFilter.RowRange> rangeList = new ArrayList<>(); for (String range : ranges) { String[] startEnd = range.split(",", 2); if (startEnd.length != 2 || startEnd[1].contains(",")) { printUsage("Please specify range in such format as \"--range=a,b\" " + "or, with only one boundary, \"--range=,b\" or \"--range=a,\""); throw new IllegalArgumentException("Wrong range specification: " + range); } String startKey = startEnd[0]; String endKey = startEnd[1]; rangeList.add(new MultiRowRangeFilter.RowRange( Bytes.toBytesBinary(startKey), true, Bytes.toBytesBinary(endKey), false)); } return rangeList; }
/** * Sets filter {@link FilterBase} to the {@link Scan} instance. * If provided rowRangeList contains more than one element, * method sets filter which is instance of {@link MultiRowRangeFilter}. * Otherwise, method sets filter which is instance of {@link FirstKeyOnlyFilter}. * If rowRangeList contains exactly one element, startRow and stopRow are set to the scan. * @param scan * @param rowRangeList */ private static void setScanFilter(Scan scan, List<MultiRowRangeFilter.RowRange> rowRangeList) { final int size = rowRangeList == null ? 0 : rowRangeList.size(); if (size <= 1) { scan.setFilter(new FirstKeyOnlyFilter()); } if (size == 1) { MultiRowRangeFilter.RowRange range = rowRangeList.get(0); scan.setStartRow(range.getStartRow()); //inclusive scan.setStopRow(range.getStopRow()); //exclusive } else if (size > 1) { scan.setFilter(new MultiRowRangeFilter(rowRangeList)); } }
protected Scan getMultiScanner( final FilterList filterList, final Integer limit, final double[] maxResolutionSubsamplingPerDimension ) { // Single scan w/ multiple ranges final Scan multiScanner = createStandardScanner(limit); final List<ByteArrayRange> ranges = getRanges(); final MultiRowRangeFilter filter = getMultiRowRangeFilter(ranges); if (filter != null) { filterList.addFilter(filter); final List<RowRange> rowRanges = filter.getRowRanges(); multiScanner.setStartRow(rowRanges.get( 0).getStartRow()); final RowRange stopRowRange = rowRanges.get(rowRanges.size() - 1); byte[] stopRowExclusive; if (stopRowRange.isStopRowInclusive()) { // because the end is always exclusive, to make an inclusive // stop row into exlusive all we need to do is add a traling 0 stopRowExclusive = new byte[stopRowRange.getStopRow().length + 1]; System.arraycopy( stopRowRange.getStopRow(), 0, stopRowExclusive, 0, stopRowExclusive.length - 1); } else { stopRowExclusive = stopRowRange.getStopRow(); } multiScanner.setStopRow(stopRowExclusive); } return multiScanner; }
private CloseableIterable<Result> createScanner() { // End of input ids if (null != idsIterator && !idsIterator.hasNext()) { return null; } Table table = null; try { final Scan scan = new Scan(); if (null != idsIterator) { final List<MultiRowRangeFilter.RowRange> rowRanges = new ArrayList<>(); final int maxEntriesForBatchScanner = store.getProperties().getMaxEntriesForBatchScanner(); int count = 0; while (idsIterator.hasNext() && count < maxEntriesForBatchScanner) { count++; rowRanges.addAll(rowRangeFactory.getRowRange(idsIterator.next(), operation)); } if (rowRanges.isEmpty()) { return new WrappedCloseableIterable<>(Collections.emptyList()); } scan.setFilter(new MultiRowRangeFilter(rowRanges)); } scan.setAuthorizations(authorisations); scan.setAttribute(HBaseStoreConstants.SCHEMA, store.getSchema().toCompactJson()); scan.setAttribute(HBaseStoreConstants.INCLUDE_MATCHED_VERTEX, Bytes.toBytes(Boolean.toString(includeMatchedVertex))); scan.setAttribute(HBaseStoreConstants.VIEW, operation.getView().toCompactJson()); if (null != operation.getDirectedType()) { scan.setAttribute(HBaseStoreConstants.DIRECTED_TYPE, Bytes.toBytes(operation.getDirectedType().name())); } if (null != extraProcessors) { scan.setAttribute(HBaseStoreConstants.EXTRA_PROCESSORS, extraProcessors); } scan.setMaxVersions(); table = store.getTable(); return new WrappedCloseableIterable<>(table.getScanner(scan)); } catch (final IOException | StoreException e) { if (null != table) { CloseableUtil.close(table); } throw new RuntimeException(e); } }
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; List<MultiRowRangeFilter.RowRange> rowRangeList = null; long startTime = 0; long endTime = 0; StringBuilder sb = new StringBuilder(); final String rangeSwitch = "--range="; final String startTimeArgKey = "--starttime="; final String endTimeArgKey = "--endtime="; final String expectedCountArg = "--expected-count="; // First argument is table name, starting from second for (int i = 1; i < args.length; i++) { if (args[i].startsWith(rangeSwitch)) { try { rowRangeList = parseRowRangeParameter(args[i], rangeSwitch); } catch (IllegalArgumentException e) { return null; } continue; } if (args[i].startsWith(startTimeArgKey)) { startTime = Long.parseLong(args[i].substring(startTimeArgKey.length())); continue; } if (args[i].startsWith(endTimeArgKey)) { endTime = Long.parseLong(args[i].substring(endTimeArgKey.length())); continue; } if (args[i].startsWith(expectedCountArg)) { conf.setLong(EXPECTED_COUNT_KEY, Long.parseLong(args[i].substring(expectedCountArg.length()))); continue; } // if no switch, assume column names sb.append(args[i]); sb.append(" "); } if (endTime < startTime) { printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime); return null; } Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(RowCounter.class); Scan scan = new Scan(); scan.setCacheBlocks(false); setScanFilter(scan, rowRangeList); if (sb.length() > 0) { for (String columnName : sb.toString().trim().split(" ")) { String family = StringUtils.substringBefore(columnName, ":"); String qualifier = StringUtils.substringAfter(columnName, ":"); if (StringUtils.isBlank(qualifier)) { scan.addFamily(Bytes.toBytes(family)); } else { scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier)); } } } scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime); job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(0); return job; }
protected MultiRowRangeFilter getMultiRowRangeFilter( final List<ByteArrayRange> ranges ) { // create the multi-row filter final List<RowRange> rowRanges = new ArrayList<RowRange>(); if ((ranges == null) || ranges.isEmpty()) { rowRanges.add(new RowRange( HConstants.EMPTY_BYTE_ARRAY, true, HConstants.EMPTY_BYTE_ARRAY, false)); } else { for (final ByteArrayRange range : ranges) { if (range.getStart() != null) { final byte[] startRow = range.getStart().getBytes(); byte[] stopRow; if (!range.isSingleValue()) { stopRow = HBaseUtils.getNextPrefix(range.getEnd().getBytes()); } else { stopRow = HBaseUtils.getNextPrefix(range.getStart().getBytes()); } final RowRange rowRange = new RowRange( startRow, true, stopRow, false); rowRanges.add(rowRange); } } } // Create the multi-range filter try { return new MultiRowRangeFilter( rowRanges); } catch (final IOException e) { LOGGER.error( "Error creating range filter.", e); } return null; }