/** * Returns IndexOperator from string representation * @param operator - string representing IndexOperator (=, >=, >, <, <=) * @return IndexOperator - enum value of IndexOperator or null if not found */ public static IndexOperator getIndexOperator(String operator) { if (operator.equals("=")) { return IndexOperator.EQ; } else if (operator.equals(">=")) { return IndexOperator.GTE; } else if (operator.equals(">")) { return IndexOperator.GT; } else if (operator.equals("<")) { return IndexOperator.LT; } else if (operator.equals("<=")) { return IndexOperator.LTE; } return null; }
private static int getPriority(IndexOperator op) { switch (op) { case EQ: return 4; case GTE: case GT: return 3; case LTE: case LT: return 2; case NOT_EQ: return 1; default: return 0; } }
@Test public void testSatisfiedByWithMultipleTerms() { final ByteBuffer comment = UTF8Type.instance.decompose("comment"); final ColumnFamilyStore store = Keyspace.open("sasecondaryindex").getColumnFamilyStore("saindexed1"); final IPartitioner<?> partitioner = StorageService.getPartitioner(); ColumnFamily cf = ArrayBackedSortedColumns.factory.create(store.metadata); cf.addColumn(new Column(comment, UTF8Type.instance.decompose("software engineer is working on a project"), System.currentTimeMillis())); Operation.Builder builder = new Operation.Builder(OperationType.AND, UTF8Type.instance, controller, new IndexExpression(comment, IndexOperator.EQ, UTF8Type.instance.decompose("eng is a work"))); Operation op = builder.complete(); Assert.assertTrue(op.satisfiedBy(new Row(partitioner.decorateKey(UTF8Type.instance.decompose("key1")), cf), null, false)); builder = new Operation.Builder(OperationType.AND, UTF8Type.instance, controller, new IndexExpression(comment, IndexOperator.EQ, UTF8Type.instance.decompose("soft works fine"))); op = builder.complete(); Assert.assertTrue(op.satisfiedBy(new Row(partitioner.decorateKey(UTF8Type.instance.decompose("key1")), cf), null, false)); }
public static boolean satisfies(int comparison, IndexOperator op) { switch (op) { case EQ: return comparison == 0; case GTE: return comparison >= 0; case GT: return comparison > 0; case LTE: return comparison <= 0; case LT: return comparison < 0; default: throw new IllegalStateException(); } }
private IndexExpression highestSelectivityPredicate(IndexClause clause) { IndexExpression best = null; int bestMeanCount = Integer.MAX_VALUE; for (IndexExpression expression : clause.expressions) { ColumnFamilyStore cfs = getIndexedColumnFamilyStore(expression.column_name); if (cfs == null || !expression.op.equals(IndexOperator.EQ)) continue; int columns = cfs.getMeanColumns(); if (columns < bestMeanCount) { best = expression; bestMeanCount = columns; } } return best; }
private static boolean satisfies(int comparison, IndexOperator op) { switch (op) { case EQ: return comparison == 0; case GTE: return comparison >= 0; case GT: return comparison > 0; case LTE: return comparison <= 0; case LT: return comparison < 0; default: throw new IllegalStateException(); } }
private static Expression expressionFor(long lower, boolean lowerInclusive, long upper, boolean upperInclusive) { Expression expression = new Expression(ByteBufferUtil.EMPTY_BYTE_BUFFER, LongType.instance); expression.add(lowerInclusive ? IndexOperator.GTE : IndexOperator.GT, LongType.instance.decompose(lower)); expression.add(upperInclusive ? IndexOperator.LTE : IndexOperator.LT, LongType.instance.decompose(upper)); return expression; }
private static Expression expressionForNot(AbstractType<?> validator, ByteBuffer lower, ByteBuffer upper, Iterable<ByteBuffer> terms) { Expression expression = new Expression(ByteBufferUtil.EMPTY_BYTE_BUFFER, validator); expression.setOp(Expression.Op.RANGE); expression.setLower(new Expression.Bound(lower, true)); expression.setUpper(new Expression.Bound(upper, true)); for (ByteBuffer term : terms) expression.add(IndexOperator.NOT_EQ, term); return expression; }
private static Expression rangeWithExclusions(long lower, boolean lowerInclusive, long upper, boolean upperInclusive, Set<Long> exclusions) { Expression expression = expressionFor(lower, lowerInclusive, upper, upperInclusive); for (long e : exclusions) expression.add(IndexOperator.NOT_EQ, LongType.instance.decompose(e)); return expression; }
public IndexOperator getIndexOperator(Bound b) { switch (b) { case START: return boundInclusive[b.idx] ? IndexOperator.GTE : IndexOperator.GT; case END: return boundInclusive[b.idx] ? IndexOperator.LTE : IndexOperator.LT; } throw new AssertionError(); }
@VisibleForTesting protected static ListMultimap<ByteBuffer, Expression> analyzeGroup(QueryController controller, final AbstractType<?> comparator, OperationType op, List<IndexExpression> expressions) { ListMultimap<ByteBuffer, Expression> analyzed = ArrayListMultimap.create(); // sort all of the expressions in the operation by name and priority of the logical operator // this gives us an efficient way to handle inequality and combining into ranges without extra processing // and converting expressions from one type to another. Collections.sort(expressions, new Comparator<IndexExpression>() { @Override public int compare(IndexExpression a, IndexExpression b) { int cmp = comparator.compare(ByteBuffer.wrap(a.getColumn_name()), ByteBuffer.wrap(b.getColumn_name())); return cmp == 0 ? -Integer.compare(getPriority(a.getOp()), getPriority(b.getOp())) : cmp; } }); for (final IndexExpression e : expressions) { if (e.isSetLogicalOp()) continue; ByteBuffer name = ByteBuffer.wrap(e.getColumn_name()); ColumnIndex columnIndex = controller.getIndex(name); List<Expression> perColumn = analyzed.get(name); if (columnIndex == null) { ColumnDefinition nonIndexedColumn = controller.getColumn(name); columnIndex = new ColumnIndex(controller.getKeyValidator(), nonIndexedColumn, controller.getComparator(nonIndexedColumn)); } AbstractAnalyzer analyzer = columnIndex.getAnalyzer(); analyzer.reset(ByteBuffer.wrap(e.getValue())); // EQ/NOT_EQ can have multiple expressions e.g. text = "Hello World", // becomes text = "Hello" OR text = "World" because "space" is always interpreted as a split point (by analyzer), // NOT_EQ is made an independent expression only in case of pre-existing multiple EQ expressions, or // if there is no EQ operations and NOT_EQ is met or a single NOT_EQ expression present, // in such case we know exactly that there would be no more EQ/RANGE expressions for given column // since NOT_EQ has the lowest priority. if (e.getOp() == IndexOperator.EQ || (e.getOp() == IndexOperator.NOT_EQ && (perColumn.size() == 0 || perColumn.size() > 1 || (perColumn.size() == 1 && perColumn.get(0).getOp() == Op.NOT_EQ)))) { while (analyzer.hasNext()) { final ByteBuffer token = analyzer.next(); perColumn.add(new Expression(controller, columnIndex) {{ add(e.op, token); }}); } } else // "range" or not-equals operator, combines both bounds together into the single expression, // iff operation of the group is AND, otherwise we are forced to create separate expressions, // not-equals is combined with the range iff operator is AND. { Expression range; if (perColumn.size() == 0 || op != OperationType.AND) perColumn.add((range = new Expression(controller, columnIndex))); else range = Iterables.getLast(perColumn); while (analyzer.hasNext()) range.add(e.op, analyzer.next()); } } return analyzed; }
public Expression add(IndexOperator op, ByteBuffer value) { boolean lowerInclusive = false, upperInclusive = false; switch (op) { case EQ: lower = new Bound(value, true); upper = lower; operation = Op.EQ; break; case NOT_EQ: // index expressions are priority sorted // and NOT_EQ is the lowest priority, which means that operation type // is always going to be set before reaching it in case of RANGE or EQ. if (operation == null) { operation = Op.NOT_EQ; lower = new Bound(value, true); upper = lower; } else exclusions.add(value); break; case LTE: upperInclusive = true; case LT: operation = Op.RANGE; upper = new Bound(value, upperInclusive); break; case GTE: lowerInclusive = true; case GT: operation = Op.RANGE; lower = new Bound(value, lowerInclusive); break; } return this; }
@Test public void testRangeQueryWithExclusions() throws Exception { final long lower = 0; final long upper = 100000; OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, LongType.instance, OnDiskIndexBuilder.Mode.SPARSE); for (long i = lower; i <= upper; i++) builder.add(LongType.instance.decompose(i), keyAt(i), i); File index = File.createTempFile("on-disk-sa-except-long-ranges", "db"); index.deleteOnExit(); builder.finish(index); OnDiskIndex onDisk = new OnDiskIndex(index, LongType.instance, new KeyConverter()); ThreadLocalRandom random = ThreadLocalRandom.current(); // single exclusion // let's do small range first to figure out if searchPoint works properly validateExclusions(onDisk, lower, 50, Sets.newHashSet(42L)); // now let's do whole data set to test SPARSE searching validateExclusions(onDisk, lower, upper, Sets.newHashSet(31337L)); // pair of exclusions which would generate a split validateExclusions(onDisk, lower, random.nextInt(400, 800), Sets.newHashSet(42L, 154L)); validateExclusions(onDisk, lower, upper, Sets.newHashSet(31337L, 54631L)); // 3 exclusions which would generate a split and change bounds validateExclusions(onDisk, lower, random.nextInt(400, 800), Sets.newHashSet(42L, 154L)); validateExclusions(onDisk, lower, upper, Sets.newHashSet(31337L, 54631L)); validateExclusions(onDisk, lower, random.nextLong(400, upper), Sets.newHashSet(42L, 55L)); validateExclusions(onDisk, lower, random.nextLong(400, upper), Sets.newHashSet(42L, 55L, 93L)); validateExclusions(onDisk, lower, random.nextLong(400, upper), Sets.newHashSet(42L, 55L, 93L, 205L)); Set<Long> exclusions = Sets.newHashSet(3L, 12L, 13L, 14L, 27L, 54L, 81L, 125L, 384L, 771L, 1054L, 2048L, 78834L); // test that exclusions are properly bound by lower/upper of the expression Assert.assertEquals(392, validateExclusions(onDisk, lower, 400, exclusions, false)); Assert.assertEquals(101, validateExclusions(onDisk, lower, 100, Sets.newHashSet(-10L, -5L, -1L), false)); validateExclusions(onDisk, lower, upper, exclusions); Assert.assertEquals(100000, convert(onDisk.search(new Expression(ByteBufferUtil.EMPTY_BYTE_BUFFER, LongType.instance) .add(IndexOperator.NOT_EQ, LongType.instance.decompose(100L)))).size()); Assert.assertEquals(49, convert(onDisk.search(new Expression(ByteBufferUtil.EMPTY_BYTE_BUFFER, LongType.instance) .add(IndexOperator.LT, LongType.instance.decompose(50L)) .add(IndexOperator.NOT_EQ, LongType.instance.decompose(10L)))).size()); Assert.assertEquals(99998, convert(onDisk.search(new Expression(ByteBufferUtil.EMPTY_BYTE_BUFFER, LongType.instance) .add(IndexOperator.GT, LongType.instance.decompose(1L)) .add(IndexOperator.NOT_EQ, LongType.instance.decompose(20L)))).size()); onDisk.close(); }
private static Expression expressionFor(AbstractType<?> validator, ByteBuffer term) { Expression expression = new Expression(ByteBufferUtil.EMPTY_BYTE_BUFFER, validator); expression.add(IndexOperator.EQ, term); return expression; }
@Test public void testAnalyzeNotIndexedButDefinedColumn() throws Exception { final ByteBuffer firstName = UTF8Type.instance.decompose("first_name"); final ByteBuffer height = UTF8Type.instance.decompose("height"); final ByteBuffer notDefined = UTF8Type.instance.decompose("not-defined"); // first_name = 'a' AND height != 10 Map<Expression.Op, Expression> expressions; expressions = convert(Operation.analyzeGroup(controller, UTF8Type.instance, OperationType.AND, Arrays.asList(new IndexExpression(firstName, IndexOperator.EQ, UTF8Type.instance.decompose("a")), new IndexExpression(height, IndexOperator.NOT_EQ, Int32Type.instance.decompose(5))))); Assert.assertEquals(2, expressions.size()); Assert.assertEquals(new Expression(height, Int32Type.instance) {{ operation = Op.NOT_EQ; lower = new Bound(Int32Type.instance.decompose(5), true); upper = lower; }}, expressions.get(Expression.Op.NOT_EQ)); expressions = convert(Operation.analyzeGroup(controller, UTF8Type.instance, OperationType.AND, Arrays.asList(new IndexExpression(firstName, IndexOperator.EQ, UTF8Type.instance.decompose("a")), new IndexExpression(height, IndexOperator.GT, Int32Type.instance.decompose(0)), new IndexExpression(height, IndexOperator.NOT_EQ, Int32Type.instance.decompose(5))))); Assert.assertEquals(2, expressions.size()); Assert.assertEquals(new Expression(height, Int32Type.instance) {{ operation = Op.RANGE; lower = new Bound(Int32Type.instance.decompose(0), false); exclusions.add(Int32Type.instance.decompose(5)); }}, expressions.get(Expression.Op.RANGE)); expressions = convert(Operation.analyzeGroup(controller, UTF8Type.instance, OperationType.AND, Arrays.asList(new IndexExpression(firstName, IndexOperator.EQ, UTF8Type.instance.decompose("a")), new IndexExpression(height, IndexOperator.NOT_EQ, Int32Type.instance.decompose(5)), new IndexExpression(height, IndexOperator.GTE, Int32Type.instance.decompose(0)), new IndexExpression(height, IndexOperator.LT, Int32Type.instance.decompose(10))))); Assert.assertEquals(2, expressions.size()); Assert.assertEquals(new Expression(height, Int32Type.instance) {{ operation = Op.RANGE; lower = new Bound(Int32Type.instance.decompose(0), true); upper = new Bound(Int32Type.instance.decompose(10), false); exclusions.add(Int32Type.instance.decompose(5)); }}, expressions.get(Expression.Op.RANGE)); expressions = convert(Operation.analyzeGroup(controller, UTF8Type.instance, OperationType.AND, new ArrayList<IndexExpression>() {{ add(new IndexExpression(notDefined, IndexOperator.EQ, UTF8Type.instance.decompose("a"))); }})); Assert.assertEquals(1, expressions.size()); }
private static List<org.apache.cassandra.db.Row> multiRangeSlice(CFMetaData metadata, SelectStatement select, List<ByteBuffer> variables) throws ReadTimeoutException, UnavailableException, InvalidRequestException { IPartitioner<?> p = StorageService.getPartitioner(); AbstractType<?> keyType = Schema.instance.getCFMetaData(metadata.ksName, select.getColumnFamily()).getKeyValidator(); ByteBuffer startKeyBytes = (select.getKeyStart() != null) ? select.getKeyStart().getByteBuffer(keyType,variables) : null; ByteBuffer finishKeyBytes = (select.getKeyFinish() != null) ? select.getKeyFinish().getByteBuffer(keyType,variables) : null; RowPosition startKey = RowPosition.forKey(startKeyBytes, p), finishKey = RowPosition.forKey(finishKeyBytes, p); if (startKey.compareTo(finishKey) > 0 && !finishKey.isMinimum(p)) { if (p instanceof RandomPartitioner) throw new InvalidRequestException("Start key sorts after end key. This is not allowed; you probably should not specify end key at all, under RandomPartitioner"); else throw new InvalidRequestException("Start key must sort before (or equal to) finish key in your partitioner!"); } AbstractBounds<RowPosition> bounds = new Bounds<RowPosition>(startKey, finishKey); IDiskAtomFilter columnFilter = filterFromSelect(select, metadata, variables); validateFilter(metadata, columnFilter); List<Relation> columnRelations = select.getColumnRelations(); List<IndexExpression> expressions = new ArrayList<IndexExpression>(columnRelations.size()); for (Relation columnRelation : columnRelations) { // Left and right side of relational expression encoded according to comparator/validator. ByteBuffer entity = columnRelation.getEntity().getByteBuffer(metadata.comparator, variables); ByteBuffer value = columnRelation.getValue().getByteBuffer(select.getValueValidator(metadata.ksName, entity), variables); expressions.add(new IndexExpression(entity, IndexOperator.valueOf(columnRelation.operator().toString()), value)); } int limit = select.isKeyRange() && select.getKeyStart() != null ? select.getNumRecords() + 1 : select.getNumRecords(); List<org.apache.cassandra.db.Row> rows = StorageProxy.getRangeSlice(new RangeSliceCommand(metadata.ksName, select.getColumnFamily(), null, columnFilter, bounds, expressions, limit), select.getConsistencyLevel()); // if start key was set and relation was "greater than" if (select.getKeyStart() != null && !select.includeStartKey() && !rows.isEmpty()) { if (rows.get(0).key.key.equals(startKeyBytes)) rows.remove(0); } // if finish key was set and relation was "less than" if (select.getKeyFinish() != null && !select.includeFinishKey() && !rows.isEmpty()) { int lastIndex = rows.size() - 1; if (rows.get(lastIndex).key.key.equals(finishKeyBytes)) rows.remove(lastIndex); } return rows.subList(0, select.getNumRecords() < rows.size() ? select.getNumRecords() : rows.size()); }