@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { int max = reader.maxDoc(); OpenBitSet good = new OpenBitSet(max); good.set(0, max); for( List<Field> values : terms ) { for( Field nv : values ) { Term term = new Term(nv.getField(), nv.getValue()); TermDocs docs = reader.termDocs(term); while( docs.next() ) { good.clear(docs.doc()); } docs.close(); } } return good; }
@Override public Scorer scorer(LeafReaderContext context) throws IOException { DocIdSet childrenDocSet = childrenFilter.getDocIdSet(context, null); // we forcefully apply live docs here so that deleted children don't give matching parents childrenDocSet = BitsFilteredDocIdSet.wrap(childrenDocSet, context.reader().getLiveDocs()); if (Lucene.isEmpty(childrenDocSet)) { return null; } final DocIdSetIterator childIterator = childrenDocSet.iterator(); if (childIterator == null) { return null; } SortedDocValues bytesValues = globalIfd.load(context).getOrdinalsValues(parentType); if (bytesValues == null) { return null; } return new ChildScorer(this, parentIdxs, scores, childIterator, bytesValues); }
@Override public Scorer scorer(LeafReaderContext context) throws IOException { DocIdSet childrenDocIdSet = childrenFilter.getDocIdSet(context, null); if (Lucene.isEmpty(childrenDocIdSet)) { return null; } SortedDocValues globalValues = globalIfd.load(context).getOrdinalsValues(parentType); if (globalValues != null) { // we forcefully apply live docs here so that deleted children don't give matching parents childrenDocIdSet = BitsFilteredDocIdSet.wrap(childrenDocIdSet, context.reader().getLiveDocs()); DocIdSetIterator innerIterator = childrenDocIdSet.iterator(); if (innerIterator != null) { ChildrenDocIdIterator childrenDocIdIterator = new ChildrenDocIdIterator( innerIterator, parentOrds, globalValues ); return ConstantScorer.create(childrenDocIdIterator, this, queryWeight); } } return null; }
@Override public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { Terms terms = context.reader().terms(term.field()); if (terms == null) { return null; } final TermsEnum termsEnum = terms.iterator(null); if (!termsEnum.seekExact(term.bytes())) { return null; } return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { return termsEnum.docs(acceptDocs, null, DocsEnum.FLAG_NONE); } }; }
/** * Creates a {@link Docs} to record hits. The default uses {@link FixedBitSet} * to record hits and you can override to e.g. record the docs in your own * {@link DocIdSet}. */ protected Docs createDocs(final int maxDoc) { return new Docs() { private final FixedBitSet bits = new FixedBitSet(maxDoc); @Override public void addDoc(int docId) throws IOException { bits.set(docId); } @Override public DocIdSet getDocIdSet() { return bits; } }; }
@Override protected DocIdSet docIdSetToCache(DocIdSet docIdSet, AtomicReader reader) throws IOException { if (docIdSet == null) { return EMPTY; } else if (docIdSet instanceof FixedBitSet) { // this is different from CachingWrapperFilter: even when the DocIdSet is // cacheable, we convert it to a FixedBitSet since we require all the // cached filters to be FixedBitSets return docIdSet; } else { final DocIdSetIterator it = docIdSet.iterator(); if (it == null) { return EMPTY; } else { final FixedBitSet copy = new FixedBitSet(reader.maxDoc()); copy.or(it); return copy; } } }
@Override public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { if (filter == null) { boolean debug = rb != null && rb.isDebug(); long start = System.currentTimeMillis(); resultSet = getDocSet(); long delta = System.currentTimeMillis()-start; if (debug) { System.out.println("Graph Traverse took : " + delta + " ms."); } filter = resultSet.getTopFilter(); } // TODO: understand this comment. // Although this set only includes live docs, other filters can be pushed down to queries. DocIdSet readerSet = filter.getDocIdSet(context, acceptDocs); // create a scrorer on the result set, if results from right query are empty, use empty iterator. return new GraphScorer(this, readerSet == null ? DocIdSetIterator.empty() : readerSet.iterator(), getBoost()); }
private Filter getNullDISIFilter() { return new Filter() { @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) { return new DocIdSet() { @Override public DocIdSetIterator iterator() { return null; } @Override public boolean isCacheable() { return true; } }; } }; }
/** * Create a complement of the input set. The returned {@link ScoredDocIDs} * does not contain any scores, which makes sense given that the complementing * documents were not scored. * * Note: the complement set does NOT contain doc ids which are noted as deleted by the given reader * * @param docids to be complemented. * @param reader holding the number of documents & information about deletions. */ public final static ScoredDocIDs getComplementSet(final ScoredDocIDs docids, final IndexReader reader) throws IOException { final int maxDoc = reader.maxDoc(); DocIdSet docIdSet = docids.getDocIDs(); final FixedBitSet complement; if (docIdSet instanceof FixedBitSet) { // That is the most common case, if ScoredDocIdsCollector was used. complement = ((FixedBitSet) docIdSet).clone(); } else { complement = new FixedBitSet(maxDoc); DocIdSetIterator iter = docIdSet.iterator(); int doc; while ((doc = iter.nextDoc()) < maxDoc) { complement.set(doc); } } complement.flip(0, maxDoc); clearDeleted(reader, complement); return createScoredDocIds(complement, maxDoc); }
public static DocIdSet getFullyEmptyDocIdSet(int maxDoc) { Bits bits = getFullyEmptyBits(maxDoc); return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { return getFullyEmptyDocIdSetIterator(maxDoc); } @Override public Bits bits() throws IOException { return bits; } @Override public boolean isCacheable() { return true; } }; }
private DocIdSet docIdSetToCache(DocIdSet docIdSet, AtomicReader reader, String segmentName, Directory directory) throws IOException { if (docIdSet == null) { // this is better than returning null, as the nonnull result can be cached return DocIdSet.EMPTY_DOCIDSET; } else if (docIdSet.isCacheable()) { return docIdSet; } else { final DocIdSetIterator it = docIdSet.iterator(); // null is allowed to be returned by iterator(), // in this case we wrap with the empty set, // which is cacheable. if (it == null) { return DocIdSet.EMPTY_DOCIDSET; } else { final IndexFileBitSet bits = new IndexFileBitSet(reader.maxDoc(), _id, segmentName, directory); if (!bits.exists()) { bits.create(it); } bits.load(); return bits; } } }
@Override protected DocIdSet docIdSetToCache(DocIdSet docIdSet, AtomicReader reader) throws IOException { if (docIdSet == null) { return EMPTY_DOCIDSET; } else if (docIdSet instanceof FixedBitSet) { // this is different from CachingWrapperFilter: even when the DocIdSet is // cacheable, we convert it to a FixedBitSet since we require all the // cached filters to be FixedBitSets return docIdSet; } else { final DocIdSetIterator it = docIdSet.iterator(); if (it == null) { return EMPTY_DOCIDSET; } else { final FixedBitSet copy = new FixedBitSet(reader.maxDoc()); copy.or(it); return copy; } } }
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { return new ConstantScoreWeight(this) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { final DocIdSet disi = build(context.reader()); final DocIdSetIterator leafIt = disi.iterator(); return new ConstantScoreScorer(this, score(), leafIt); } }; }
/** * Returns a DocIdSet per segments containing the matching docs for the specified slice. */ private DocIdSet build(LeafReader reader) throws IOException { final DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc()); final Terms terms = reader.terms(getField()); final TermsEnum te = terms.iterator(); PostingsEnum docsEnum = null; for (BytesRef term = te.next(); term != null; term = te.next()) { int hashCode = term.hashCode(); if (contains(hashCode)) { docsEnum = te.postings(docsEnum, PostingsEnum.NONE); builder.add(docsEnum); } } return builder.build(); }
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { int max = reader.maxDoc(); OpenBitSet good = new OpenBitSet(max); Institution institution = CurrentInstitution.get(); Term term = new Term(FreeTextQuery.FIELD_INSTITUTION, Long.toString(institution.getUniqueId())); TermDocs docs = reader.termDocs(term); while( docs.next() ) { good.set(docs.doc()); } docs.close(); return good; }
private static long applyQueryDeletes(Iterable<QueryAndLimit> queriesIter, ReadersAndUpdates rld, final SegmentReader reader) throws IOException { long delCount = 0; final AtomicReaderContext readerContext = reader.getContext(); boolean any = false; for (QueryAndLimit ent : queriesIter) { Query query = ent.query; int limit = ent.limit; final DocIdSet docs = new QueryWrapperFilter(query).getDocIdSet(readerContext, reader.getLiveDocs()); if (docs != null) { final DocIdSetIterator it = docs.iterator(); if (it != null) { while(true) { int doc = it.nextDoc(); if (doc >= limit) { break; } if (!any) { rld.initWritableLiveDocs(); any = true; } if (rld.delete(doc)) { delCount++; } } } } } return delCount; }
@Override public Scorer scorer(LeafReaderContext context) throws IOException { DocIdSet parentsSet = parentFilter.getDocIdSet(context, null); if (Lucene.isEmpty(parentsSet) || remaining == 0) { return null; } // We can't be sure of the fact that liveDocs have been applied, so we apply it here. The "remaining" // count down (short circuit) logic will then work as expected. DocIdSetIterator parents = BitsFilteredDocIdSet.wrap(parentsSet, context.reader().getLiveDocs()).iterator(); if (parents != null) { SortedDocValues bytesValues = collector.globalIfd.load(context).getOrdinalsValues(parentType); if (bytesValues == null) { return null; } if (minChildren > 0 || maxChildren != 0 || scoreType == ScoreType.NONE) { switch (scoreType) { case NONE: DocIdSetIterator parentIdIterator = new CountParentOrdIterator(this, parents, collector, bytesValues, minChildren, maxChildren); return ConstantScorer.create(parentIdIterator, this, queryWeight); case AVG: return new AvgParentCountScorer(this, parents, collector, bytesValues, minChildren, maxChildren); default: return new ParentCountScorer(this, parents, collector, bytesValues, minChildren, maxChildren); } } switch (scoreType) { case AVG: return new AvgParentScorer(this, parents, collector, bytesValues); default: return new ParentScorer(this, parents, collector, bytesValues); } } return null; }
@Override public Scorer scorer(LeafReaderContext context) throws IOException { if (remaining == 0) { return null; } if (shortCircuitFilter != null) { DocIdSet docIdSet = shortCircuitFilter.getDocIdSet(context, null); if (!Lucene.isEmpty(docIdSet)) { DocIdSetIterator iterator = docIdSet.iterator(); if (iterator != null) { return ConstantScorer.create(iterator, this, queryWeight); } } return null; } DocIdSet parentDocIdSet = this.parentFilter.getDocIdSet(context, null); if (!Lucene.isEmpty(parentDocIdSet)) { // We can't be sure of the fact that liveDocs have been applied, so we apply it here. The "remaining" // count down (short circuit) logic will then work as expected. parentDocIdSet = BitsFilteredDocIdSet.wrap(parentDocIdSet, context.reader().getLiveDocs()); DocIdSetIterator innerIterator = parentDocIdSet.iterator(); if (innerIterator != null) { LongBitSet parentOrds = collector.parentOrds; SortedDocValues globalValues = globalIfd.load(context).getOrdinalsValues(parentType); if (globalValues != null) { DocIdSetIterator parentIdIterator = new ParentOrdIterator(innerIterator, parentOrds, globalValues, this); return ConstantScorer.create(parentIdIterator, this, queryWeight); } } } return null; }
@Override public DocIdSet getDocIdSet(AtomicReaderContext arc, Bits bits) throws IOException { if (!isValid()) { return null; } final Filter[] array = filters.toArray(new Filter[filters.size()]); return and(array).getDocIdSet(arc, bits); }
public void testFlattenFilteredQuery() throws Exception { initBoost(); Query query = new FilteredQuery(pqF( "A" ), new Filter() { @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { return null; } }); query.setBoost(boost); FieldQuery fq = new FieldQuery( query, true, true ); Set<Query> flatQueries = new HashSet<>(); fq.flatten( query, reader, flatQueries ); assertCollectionQueries( flatQueries, tq( boost, "A" ) ); }
/** * {@link Filter#getDocIdSet}. */ @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { int[] index = new int[1]; // use array as reference to modifiable int; index[0] = 0; // an object attribute would not be thread safe. if (logic != -1) { return BitsFilteredDocIdSet.wrap(getDocIdSet(context, logic, index), acceptDocs); } else if (logicArray != null) { return BitsFilteredDocIdSet.wrap(getDocIdSet(context, logicArray, index), acceptDocs); } return BitsFilteredDocIdSet.wrap(getDocIdSet(context, DEFAULT, index), acceptDocs); }
private DocIdSetIterator getDISI(Filter filter, AtomicReaderContext context) throws IOException { // we dont pass acceptDocs, we will filter at the end using an additional filter DocIdSet docIdSet = filter.getDocIdSet(context, null); if (docIdSet == null) { return DocIdSetIterator.empty(); } else { DocIdSetIterator iter = docIdSet.iterator(); if (iter == null) { return DocIdSetIterator.empty(); } else { return iter; } } }
/** * Delegates to each filter in the chain. * * @param context AtomicReaderContext * @param logic Logical operation * @return DocIdSet */ private DocIdSet getDocIdSet(AtomicReaderContext context, int logic, int[] index) throws IOException { FixedBitSet result = initialResult(context, logic, index); for (; index[0] < chain.length; index[0]++) { // we dont pass acceptDocs, we will filter at the end using an additional filter doChain(result, logic, chain[index[0]].getDocIdSet(context, null)); } return result; }
/** * Delegates to each filter in the chain. * * @param context AtomicReaderContext * @param logic Logical operation * @return DocIdSet */ private DocIdSet getDocIdSet(AtomicReaderContext context, int[] logic, int[] index) throws IOException { if (logic.length != chain.length) { throw new IllegalArgumentException("Invalid number of elements in logic array"); } FixedBitSet result = initialResult(context, logic[0], index); for (; index[0] < chain.length; index[0]++) { // we dont pass acceptDocs, we will filter at the end using an additional filter doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(context, null)); } return result; }
private Filter getEmptyFilter() { return new Filter() { @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) { return new FixedBitSet(context.reader().maxDoc()); } }; }
private Filter getNullDISFilter() { return new Filter() { @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) { return null; } }; }
private Filter getNullDISIFilter() { return new Filter() { @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) { return DocIdSet.EMPTY; } }; }
private void tstFilterCard(String mes, int expected, Filter filt) throws Exception { final DocIdSet docIdSet = filt.getDocIdSet(reader.getContext(), reader.getLiveDocs()); int actual = 0; if (docIdSet != null) { DocIdSetIterator disi = docIdSet.iterator(); while (disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { actual++; } } assertEquals(mes, expected, actual); }
public void testMissingTermAndField() throws Exception { String fieldName = "field1"; Directory rd = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), rd); Document doc = new Document(); doc.add(newStringField(fieldName, "value1", Field.Store.NO)); w.addDocument(doc); IndexReader reader = SlowCompositeReaderWrapper.wrap(w.getReader()); assertTrue(reader.getContext() instanceof AtomicReaderContext); AtomicReaderContext context = (AtomicReaderContext) reader.getContext(); w.close(); DocIdSet idSet = termFilter(fieldName, "value1").getDocIdSet(context, context.reader().getLiveDocs()); assertNotNull("must not be null", idSet); DocIdSetIterator iter = idSet.iterator(); assertEquals(iter.nextDoc(), 0); assertEquals(iter.nextDoc(), DocIdSetIterator.NO_MORE_DOCS); idSet = termFilter(fieldName, "value2").getDocIdSet(context, context.reader().getLiveDocs()); assertNull("must be null", idSet); idSet = termFilter("field2", "value1").getDocIdSet(context, context.reader().getLiveDocs()); assertNull("must be null", idSet); reader.close(); rd.close(); }
public void testMissingField() throws Exception { String fieldName = "field1"; Directory rd1 = newDirectory(); RandomIndexWriter w1 = new RandomIndexWriter(random(), rd1); Document doc = new Document(); doc.add(newStringField(fieldName, "content1", Field.Store.YES)); w1.addDocument(doc); IndexReader reader1 = w1.getReader(); w1.close(); fieldName = "field2"; Directory rd2 = newDirectory(); RandomIndexWriter w2 = new RandomIndexWriter(random(), rd2); doc = new Document(); doc.add(newStringField(fieldName, "content2", Field.Store.YES)); w2.addDocument(doc); IndexReader reader2 = w2.getReader(); w2.close(); TermsFilter tf = new TermsFilter(new Term(fieldName, "content1")); MultiReader multi = new MultiReader(reader1, reader2); for (AtomicReaderContext context : multi.leaves()) { DocIdSet docIdSet = tf.getDocIdSet(context, context.reader().getLiveDocs()); if (context.reader().docFreq(new Term(fieldName, "content1")) == 0) { assertNull(docIdSet); } else { FixedBitSet bits = (FixedBitSet) docIdSet; assertTrue("Must be >= 0", bits.cardinality() >= 0); } } multi.close(); reader1.close(); reader2.close(); rd1.close(); rd2.close(); }
@Override public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { Bits docsWithField; if (field == null) { docsWithField = null;//all docs } else { //NOTE By using the FieldCache we re-use a cache // which is nice but loading it in this way might be slower than say using an // intersects filter against the world bounds. So do we add a method to the // strategy, perhaps? But the strategy can't cache it. docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), field); final int maxDoc = context.reader().maxDoc(); if (docsWithField.length() != maxDoc ) throw new IllegalStateException("Bits length should be maxDoc ("+maxDoc+") but wasn't: "+docsWithField); if (docsWithField instanceof Bits.MatchNoBits) { return null;//match nothing } else if (docsWithField instanceof Bits.MatchAllBits) { docsWithField = null;//all docs } } //not so much a chain but a way to conveniently invert the Filter DocIdSet docIdSet = new ChainedFilter(new Filter[]{intersectsFilter}, ChainedFilter.ANDNOT).getDocIdSet(context, acceptDocs); return BitsFilteredDocIdSet.wrap(docIdSet, docsWithField); }
@Override public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException { return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { throw new UnsupportedOperationException( "Iteration is too slow; instead try FilteredQuery.QUERY_FIRST_FILTER_STRATEGY"); //Note that if you're truly bent on doing this, then see FunctionValues.getRangeScorer } @Override public Bits bits() throws IOException { //null Map context -- we simply don't have one. That's ok. final FunctionValues predFuncValues = predicateValueSource.getValues(null, context); return new Bits() { @Override public boolean get(int index) { if (acceptDocs != null && !acceptDocs.get(index)) return false; return predFuncValues.boolVal(index); } @Override public int length() { return context.reader().maxDoc(); } }; } }; }
@Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final FunctionValues values = source.getValues( null, context ); return new FilteredDocIdSet(startingFilter.getDocIdSet(context, acceptDocs)) { @Override public boolean match(int doc) { double val = values.doubleVal( doc ); return val >= min && val <= max; } }; }
@Override protected DocIdSet cacheImpl(DocIdSetIterator iterator, AtomicReader reader) throws IOException { final FixedBitSet cached = new FixedBitSet(reader.maxDoc()); cached.or(iterator); return cached; }
/** Sole constructor. */ public MatchingDocs(AtomicReaderContext context, DocIdSet bits, int totalHits, float[] scores) { this.context = context; this.bits = bits; this.scores = scores; this.totalHits = totalHits; }
@Override public Scorer scorer(AtomicReaderContext readerContext, Bits acceptDocs) throws IOException { final Scorer childScorer = childWeight.scorer(readerContext, readerContext.reader().getLiveDocs()); if (childScorer == null) { // No matches return null; } final int firstChildDoc = childScorer.nextDoc(); if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) { // No matches return null; } // NOTE: we cannot pass acceptDocs here because this // will (most likely, justifiably) cause the filter to // not return a FixedBitSet but rather a // BitsFilteredDocIdSet. Instead, we filter by // acceptDocs when we score: final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null); if (parents == null) { // No matches return null; } if (!(parents instanceof FixedBitSet)) { throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents); } return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode, acceptDocs); }