@Override public void configure(JobConf job) { try { HTable exampleTable = new HTable(HBaseConfiguration.create(job), Bytes.toBytes("exampleDeprecatedTable")); // mandatory setHTable(exampleTable); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; // mandatory setInputColumns(inputColumns); Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*")); // optional setRowFilter(exampleFilter); } catch (IOException exception) { throw new RuntimeException("Failed to configure for job.", exception); } }
@Override public void configure(JobConf job) { try { HTable exampleTable = new HTable(HBaseConfiguration.create(job), Bytes.toBytes("exampleDeprecatedTable")); // mandatory setHTable(exampleTable); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; // optional Scan scan = new Scan(); for (byte[] family : inputColumns) { scan.addFamily(family); } Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*")); scan.setFilter(exampleFilter); setScan(scan); } catch (IOException exception) { throw new RuntimeException("Failed to configure for job.", exception); } }
@Override public void configure(JobConf job) { try { Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job)); TableName tableName = TableName.valueOf("exampleJobConfigurableTable"); // mandatory initializeTable(connection, tableName); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; //optional Scan scan = new Scan(); for (byte[] family : inputColumns) { scan.addFamily(family); } Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*")); scan.setFilter(exampleFilter); setScan(scan); } catch (IOException exception) { throw new RuntimeException("Failed to initialize.", exception); } }
@Override protected void initialize(JobContext job) throws IOException { Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create( job.getConfiguration())); TableName tableName = TableName.valueOf("exampleTable"); // mandatory initializeTable(connection, tableName); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; //optional Scan scan = new Scan(); for (byte[] family : inputColumns) { scan.addFamily(family); } Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*")); scan.setFilter(exampleFilter); setScan(scan); }
public ByteArrayComparableModel( ByteArrayComparable comparator) { String typeName = comparator.getClass().getSimpleName(); ComparatorType type = ComparatorType.valueOf(typeName); this.type = typeName; switch (type) { case BinaryComparator: case BinaryPrefixComparator: this.value = Base64.encodeBytes(comparator.getValue()); break; case BitComparator: this.value = Base64.encodeBytes(comparator.getValue()); this.op = ((BitComparator)comparator).getOperator().toString(); break; case NullComparator: break; case RegexStringComparator: case SubstringComparator: this.value = Bytes.toString(comparator.getValue()); break; default: throw new RuntimeException("unhandled filter type: " + type); } }
private RowFilter adaptRegexStringComparator( CompareOp compareOp, RegexStringComparator comparator) { String pattern = FilterAdapterHelper.extractRegexPattern(comparator); switch (compareOp) { case EQUAL: return RowFilter.newBuilder() .setColumnQualifierRegexFilter(ByteString.copyFromUtf8(pattern)) .build(); case NO_OP: return FilterAdapterHelper.ACCEPT_ALL_FILTER; case LESS: case LESS_OR_EQUAL: case NOT_EQUAL: case GREATER_OR_EQUAL: case GREATER: default: throw new IllegalStateException( String.format("Cannot adapt regex filter with compare op %s", compareOp)); } }
private RowFilter adaptRegexStringComparator( CompareOp compareOp, RegexStringComparator comparator) { String pattern = FilterAdapterHelper.extractRegexPattern(comparator); switch (compareOp) { case EQUAL: return RowFilter.newBuilder() .setValueRegexFilter(ByteString.copyFromUtf8(pattern)) .build(); case NO_OP: return FilterAdapterHelper.ACCEPT_ALL_FILTER; case LESS: case LESS_OR_EQUAL: case NOT_EQUAL: case GREATER_OR_EQUAL: case GREATER: default: throw new IllegalStateException( String.format("Cannot adapt regex filter with compare op %s", compareOp)); } }
@Override public void configure(JobConf job) { try { Connection connection = ConnectionFactory.createConnection(job); Table exampleTable = connection.getTable(TableName.valueOf("exampleDeprecatedTable")); // mandatory initializeTable(connection, exampleTable.getName()); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; // mandatory setInputColumns(inputColumns); Filter exampleFilter = new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); // optional setRowFilter(exampleFilter); } catch (IOException exception) { throw new RuntimeException("Failed to configure for job.", exception); } }
@Override public void configure(JobConf job) { try { Connection connection = ConnectionFactory.createConnection(job); Table exampleTable = connection.getTable(TableName.valueOf(("exampleDeprecatedTable"))); // mandatory initializeTable(connection, exampleTable.getName()); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; // optional Scan scan = new Scan(); for (byte[] family : inputColumns) { scan.addFamily(family); } Filter exampleFilter = new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); scan.setFilter(exampleFilter); setScan(scan); } catch (IOException exception) { throw new RuntimeException("Failed to configure for job.", exception); } }
@Override public void configure(JobConf job) { try { Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job)); TableName tableName = TableName.valueOf("exampleJobConfigurableTable"); // mandatory initializeTable(connection, tableName); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; //optional Scan scan = new Scan(); for (byte[] family : inputColumns) { scan.addFamily(family); } Filter exampleFilter = new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); scan.setFilter(exampleFilter); setScan(scan); } catch (IOException exception) { throw new RuntimeException("Failed to initialize.", exception); } }
@Override protected void initialize(JobContext job) throws IOException { Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create( job.getConfiguration())); TableName tableName = TableName.valueOf("exampleTable"); // mandatory initializeTable(connection, tableName); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; //optional Scan scan = new Scan(); for (byte[] family : inputColumns) { scan.addFamily(family); } Filter exampleFilter = new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); scan.setFilter(exampleFilter); setScan(scan); }
public RegexEntityFilter(EntitySchema entitySchema, EntitySerDe<?> entitySerDe, String fieldName, String regex, boolean isEqual) { FieldMapping fieldMapping = entitySchema.getFieldMapping(fieldName); if (fieldMapping.getMappingType() != MappingType.COLUMN) { throw new DatasetException( "SingleColumnValueFilter only compatible with COLUMN mapping types."); } byte[] family = fieldMapping.getFamily(); byte[] qualifier = fieldMapping.getQualifier(); this.filter = new org.apache.hadoop.hbase.filter.SingleColumnValueFilter( family, qualifier, isEqual ? CompareFilter.CompareOp.EQUAL : CompareFilter.CompareOp.NOT_EQUAL, new RegexStringComparator( regex)); }
protected void initialize(JobConf job, String table) throws IOException { Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job)); TableName tableName = TableName.valueOf(table); // mandatory initializeTable(connection, tableName); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; // mandatory setInputColumns(inputColumns); Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*")); // optional setRowFilter(exampleFilter); }
@Test public void testFilters() throws Exception { byte [] TABLE = Bytes.toBytes("testFilters"); Table ht = TEST_UTIL.createTable(TABLE, FAMILY); byte [][] ROWS = makeN(ROW, 10); byte [][] QUALIFIERS = { Bytes.toBytes("col0-<d2v1>-<d3v2>"), Bytes.toBytes("col1-<d2v1>-<d3v2>"), Bytes.toBytes("col2-<d2v1>-<d3v2>"), Bytes.toBytes("col3-<d2v1>-<d3v2>"), Bytes.toBytes("col4-<d2v1>-<d3v2>"), Bytes.toBytes("col5-<d2v1>-<d3v2>"), Bytes.toBytes("col6-<d2v1>-<d3v2>"), Bytes.toBytes("col7-<d2v1>-<d3v2>"), Bytes.toBytes("col8-<d2v1>-<d3v2>"), Bytes.toBytes("col9-<d2v1>-<d3v2>") }; for(int i=0;i<10;i++) { Put put = new Put(ROWS[i]); put.setDurability(Durability.SKIP_WAL); put.add(FAMILY, QUALIFIERS[i], VALUE); ht.put(put); } Scan scan = new Scan(); scan.addFamily(FAMILY); Filter filter = new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator("col[1-5]")); scan.setFilter(filter); ResultScanner scanner = ht.getScanner(scan); int expectedIndex = 1; for(Result result : ht.getScanner(scan)) { assertEquals(result.size(), 1); assertTrue(Bytes.equals(CellUtil.cloneRow(result.rawCells()[0]), ROWS[expectedIndex])); assertTrue(Bytes.equals(CellUtil.cloneQualifier(result.rawCells()[0]), QUALIFIERS[expectedIndex])); expectedIndex++; } assertEquals(expectedIndex, 6); scanner.close(); }
@Test public void testFiltersWithReverseScan() throws Exception { TableName TABLE = TableName.valueOf("testFiltersWithReverseScan"); Table ht = TEST_UTIL.createTable(TABLE, FAMILY); byte[][] ROWS = makeN(ROW, 10); byte[][] QUALIFIERS = { Bytes.toBytes("col0-<d2v1>-<d3v2>"), Bytes.toBytes("col1-<d2v1>-<d3v2>"), Bytes.toBytes("col2-<d2v1>-<d3v2>"), Bytes.toBytes("col3-<d2v1>-<d3v2>"), Bytes.toBytes("col4-<d2v1>-<d3v2>"), Bytes.toBytes("col5-<d2v1>-<d3v2>"), Bytes.toBytes("col6-<d2v1>-<d3v2>"), Bytes.toBytes("col7-<d2v1>-<d3v2>"), Bytes.toBytes("col8-<d2v1>-<d3v2>"), Bytes.toBytes("col9-<d2v1>-<d3v2>") }; for (int i = 0; i < 10; i++) { Put put = new Put(ROWS[i]); put.add(FAMILY, QUALIFIERS[i], VALUE); ht.put(put); } Scan scan = new Scan(); scan.setReversed(true); scan.addFamily(FAMILY); Filter filter = new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator("col[1-5]")); scan.setFilter(filter); ResultScanner scanner = ht.getScanner(scan); int expectedIndex = 5; for (Result result : scanner) { assertEquals(result.size(), 1); assertTrue(Bytes.equals(result.raw()[0].getRow(), ROWS[expectedIndex])); assertTrue(Bytes.equals(result.raw()[0].getQualifier(), QUALIFIERS[expectedIndex])); expectedIndex--; } assertEquals(expectedIndex, 0); scanner.close(); ht.close(); }
/** Tests reading all rows using a filter. */ @Test public void testReadingWithFilter() throws Exception { final String table = "TEST-FILTER-TABLE"; final int numRows = 1001; createTable(table); writeData(table, numRows); String regex = ".*17.*"; Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); HBaseIO.Read read = HBaseIO.read().withConfiguration(conf).withTableId(table).withFilter(filter); runReadTestLength(read, 20); }
@Test public void testFilters() throws Exception { byte [] TABLE = Bytes.toBytes("testFilters"); HTable ht = TEST_UTIL.createTable(TABLE, FAMILY); byte [][] ROWS = makeN(ROW, 10); byte [][] QUALIFIERS = { Bytes.toBytes("col0-<d2v1>-<d3v2>"), Bytes.toBytes("col1-<d2v1>-<d3v2>"), Bytes.toBytes("col2-<d2v1>-<d3v2>"), Bytes.toBytes("col3-<d2v1>-<d3v2>"), Bytes.toBytes("col4-<d2v1>-<d3v2>"), Bytes.toBytes("col5-<d2v1>-<d3v2>"), Bytes.toBytes("col6-<d2v1>-<d3v2>"), Bytes.toBytes("col7-<d2v1>-<d3v2>"), Bytes.toBytes("col8-<d2v1>-<d3v2>"), Bytes.toBytes("col9-<d2v1>-<d3v2>") }; for(int i=0;i<10;i++) { Put put = new Put(ROWS[i]); put.setWriteToWAL(false); put.add(FAMILY, QUALIFIERS[i], VALUE); ht.put(put); } Scan scan = new Scan(); scan.addFamily(FAMILY); Filter filter = new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator("col[1-5]")); scan.setFilter(filter); ResultScanner scanner = ht.getScanner(scan); int expectedIndex = 1; for(Result result : ht.getScanner(scan)) { assertEquals(result.size(), 1); assertTrue(Bytes.equals(result.raw()[0].getRow(), ROWS[expectedIndex])); assertTrue(Bytes.equals(result.raw()[0].getQualifier(), QUALIFIERS[expectedIndex])); expectedIndex++; } assertEquals(expectedIndex, 6); scanner.close(); }
@Override public RowFilter adapt(FilterAdapterContext context, QualifierFilter filter) throws IOException { if (filter.getComparator() instanceof RegexStringComparator) { return adaptRegexStringComparator( filter.getOperator(), (RegexStringComparator) filter.getComparator()); } else if (filter.getComparator() instanceof BinaryComparator) { return adaptBinaryComparator( context, filter.getOperator(), (BinaryComparator) filter.getComparator()); } throw new IllegalStateException( String.format( "Cannot adapt comparator %s", filter.getComparator().getClass().getCanonicalName())); }
@Override public RowFilter adapt(FilterAdapterContext context, ValueFilter filter) throws IOException { if (filter.getComparator() instanceof BinaryComparator) { return adaptBinaryComparator( filter.getOperator(), (BinaryComparator) filter.getComparator()); } else if (filter.getComparator() instanceof RegexStringComparator) { return adaptRegexStringComparator( filter.getOperator(), (RegexStringComparator) filter.getComparator()); } throw new IllegalStateException( String.format("Cannot adapt filter with comparator%s", filter.getComparator())); }
@Override public FilterSupportStatus isFilterSupported( FilterAdapterContext context, ValueFilter filter) { if (filter.getComparator() instanceof BinaryComparator || (filter.getComparator() instanceof RegexStringComparator && filter.getOperator() == CompareOp.EQUAL)) { return FilterSupportStatus.SUPPORTED; } return FilterSupportStatus.newNotSupported( String.format( "ValueFilter must have either a BinaryComparator with any compareOp " + "or a RegexStringComparator with an EQUAL compareOp. Found (%s, %s)", filter.getComparator().getClass().getSimpleName(), filter.getOperator())); }
@Test public void testRegexValueFilter() throws IOException { String pattern = "Foo\\d+"; assertAdaptedForm( new RegexStringComparator(pattern), CompareOp.EQUAL, RowFilter.newBuilder() .setValueRegexFilter(ByteString.copyFromUtf8(pattern)) .build()); }
@Test public void testRegexQualifierFilter() throws IOException { String pattern = "Foo\\d+"; assertAdaptedForm( new RegexStringComparator(pattern), CompareOp.EQUAL, RowFilter.newBuilder() .setColumnQualifierRegexFilter(ByteString.copyFromUtf8(pattern)) .build()); }
@Test public void testFilters() throws Exception { byte [] TABLE = Bytes.toBytes("testFilters"); HTable ht = TEST_UTIL.createTable(TABLE, FAMILY); byte [][] ROWS = makeN(ROW, 10); byte [][] QUALIFIERS = { Bytes.toBytes("col0-<d2v1>-<d3v2>"), Bytes.toBytes("col1-<d2v1>-<d3v2>"), Bytes.toBytes("col2-<d2v1>-<d3v2>"), Bytes.toBytes("col3-<d2v1>-<d3v2>"), Bytes.toBytes("col4-<d2v1>-<d3v2>"), Bytes.toBytes("col5-<d2v1>-<d3v2>"), Bytes.toBytes("col6-<d2v1>-<d3v2>"), Bytes.toBytes("col7-<d2v1>-<d3v2>"), Bytes.toBytes("col8-<d2v1>-<d3v2>"), Bytes.toBytes("col9-<d2v1>-<d3v2>") }; for(int i=0;i<10;i++) { Put put = new Put(ROWS[i]); put.setDurability(Durability.SKIP_WAL); put.add(FAMILY, QUALIFIERS[i], VALUE); ht.put(put); } Scan scan = new Scan(); scan.addFamily(FAMILY); Filter filter = new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator("col[1-5]")); scan.setFilter(filter); ResultScanner scanner = ht.getScanner(scan); int expectedIndex = 1; for(Result result : ht.getScanner(scan)) { assertEquals(result.size(), 1); assertTrue(Bytes.equals(CellUtil.cloneRow(result.rawCells()[0]), ROWS[expectedIndex])); assertTrue(Bytes.equals(CellUtil.cloneQualifier(result.rawCells()[0]), QUALIFIERS[expectedIndex])); expectedIndex++; } assertEquals(expectedIndex, 6); scanner.close(); }
@Test public void testFiltersWithReverseScan() throws Exception { byte[] TABLE = Bytes.toBytes("testFiltersWithReverseScan"); HTable ht = TEST_UTIL.createTable(TABLE, FAMILY); byte[][] ROWS = makeN(ROW, 10); byte[][] QUALIFIERS = { Bytes.toBytes("col0-<d2v1>-<d3v2>"), Bytes.toBytes("col1-<d2v1>-<d3v2>"), Bytes.toBytes("col2-<d2v1>-<d3v2>"), Bytes.toBytes("col3-<d2v1>-<d3v2>"), Bytes.toBytes("col4-<d2v1>-<d3v2>"), Bytes.toBytes("col5-<d2v1>-<d3v2>"), Bytes.toBytes("col6-<d2v1>-<d3v2>"), Bytes.toBytes("col7-<d2v1>-<d3v2>"), Bytes.toBytes("col8-<d2v1>-<d3v2>"), Bytes.toBytes("col9-<d2v1>-<d3v2>") }; for (int i = 0; i < 10; i++) { Put put = new Put(ROWS[i]); put.add(FAMILY, QUALIFIERS[i], VALUE); ht.put(put); } Scan scan = new Scan(); scan.setReversed(true); scan.addFamily(FAMILY); Filter filter = new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator("col[1-5]")); scan.setFilter(filter); ResultScanner scanner = ht.getScanner(scan); int expectedIndex = 5; for (Result result : scanner) { assertEquals(result.size(), 1); assertTrue(Bytes.equals(result.raw()[0].getRow(), ROWS[expectedIndex])); assertTrue(Bytes.equals(result.raw()[0].getQualifier(), QUALIFIERS[expectedIndex])); expectedIndex--; } assertEquals(expectedIndex, 0); scanner.close(); ht.close(); }
private static Filter constructFilterWithRegex( HBaseColumnSchema hbaseColumnSchema, CompareOp compareOp, Object object) { Util.checkNull(hbaseColumnSchema); Util.checkNull(compareOp); Util.checkNull(object); if (compareOp != CompareOp.EQUAL && compareOp != CompareOp.NOT_EQUAL) { throw new SimpleHBaseException( "only EQUAL or NOT_EQUAL can use regex match. compareOp = " + compareOp); } if (object.getClass() != String.class) { throw new SimpleHBaseException( "only String can use regex match. object = " + object); } if (hbaseColumnSchema.getType() != String.class) { throw new SimpleHBaseException( "only String can use regex match. hbaseColumnSchema = " + hbaseColumnSchema); } byte[] familyBytes = hbaseColumnSchema.getFamilyBytes(); byte[] qualifierBytes = hbaseColumnSchema.getQualifierBytes(); RegexStringComparator regexStringComparator = new RegexStringComparator( (String) object); SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter( familyBytes, qualifierBytes, compareOp, regexStringComparator); singleColumnValueFilter.setFilterIfMissing(true); return singleColumnValueFilter; }
@Test public void testFilterList() throws Exception { // Test getting a single row, single key using Row, Qualifier, and Value // regular expression and substring filters // Use must pass all List<Filter> filters = new ArrayList<Filter>(); filters.add(new RowFilter(CompareOp.EQUAL, new RegexStringComparator(".+-2"))); filters.add(new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator(".+-2"))); filters.add(new ValueFilter(CompareOp.EQUAL, new SubstringComparator("One"))); Filter f = new FilterList(Operator.MUST_PASS_ALL, filters); Scan s = new Scan(); s.addFamily(FAMILIES[0]); s.setFilter(f); KeyValue [] kvs = { new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[2], VALUES[0]) }; verifyScanFull(s, kvs); // Test getting everything with a MUST_PASS_ONE filter including row, qf, // val, regular expression and substring filters filters.clear(); filters.add(new RowFilter(CompareOp.EQUAL, new RegexStringComparator(".+Two.+"))); filters.add(new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator(".+-2"))); filters.add(new ValueFilter(CompareOp.EQUAL, new SubstringComparator("One"))); f = new FilterList(Operator.MUST_PASS_ONE, filters); s = new Scan(); s.setFilter(f); verifyScanNoEarlyOut(s, numRows, colsPerRow); }
static private void removeTablePermissions(TableName tableName, byte[] column, Table table, boolean closeTable) throws IOException { Scan scan = new Scan(); scan.addFamily(ACL_LIST_FAMILY); String columnName = Bytes.toString(column); scan.setFilter(new QualifierFilter(CompareOperator.EQUAL, new RegexStringComparator( String.format("(%s%s%s)|(%s%s)$", ACL_KEY_DELIMITER, columnName, ACL_KEY_DELIMITER, ACL_KEY_DELIMITER, columnName)))); Set<byte[]> qualifierSet = new TreeSet<>(Bytes.BYTES_COMPARATOR); ResultScanner scanner = null; try { scanner = table.getScanner(scan); for (Result res : scanner) { for (byte[] q : res.getFamilyMap(ACL_LIST_FAMILY).navigableKeySet()) { qualifierSet.add(q); } } if (qualifierSet.size() > 0) { Delete d = new Delete(tableName.getName()); for (byte[] qualifier : qualifierSet) { d.addColumns(ACL_LIST_FAMILY, qualifier); } table.delete(d); } } finally { if (scanner != null) scanner.close(); if (closeTable) table.close(); } }