/** * This method iterates all terms in the given {@link TermsEnum} and * associates each terms ordinal with the terms documents. The caller must * exhaust the returned {@link BytesRefIterator} which returns all values * where the first returned value is associated with the ordinal <tt>1</tt> * etc. * <p> * If the {@link TermsEnum} contains prefix coded numerical values the terms * enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)} * or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If * the {@link TermsEnum} is not wrapped the returned * {@link BytesRefIterator} will contain partial precision terms rather than * only full-precision terms. * </p> */ public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException { return new BytesRefIterator() { private PostingsEnum docsEnum = null; @Override public BytesRef next() throws IOException { BytesRef ref; if ((ref = termsEnum.next()) != null) { docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE); nextOrdinal(); int docId; while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { addDoc(docId); } } return ref; } }; }
@Override public TwoPhaseIterator twoPhaseIterator() { final TwoPhaseIterator inTwoPhase = this.in.twoPhaseIterator(); final DocIdSetIterator approximation = inTwoPhase == null ? in.iterator() : inTwoPhase.approximation(); return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { // we need to check the two-phase iterator first // otherwise calling score() is illegal if (inTwoPhase != null && inTwoPhase.matches() == false) { return false; } return in.score() >= minScore; } @Override public float matchCost() { return 1000f // random constant for the score computation + (inTwoPhase == null ? 0 : inTwoPhase.matchCost()); } }; }
/** Return null if id is not found. */ public DocIdAndVersion lookup(BytesRef id, Bits liveDocs, LeafReaderContext context) throws IOException { if (termsEnum.seekExact(id)) { // there may be more than one matching docID, in the case of nested docs, so we want the last one: docsEnum = termsEnum.postings(docsEnum, 0); int docID = DocIdSetIterator.NO_MORE_DOCS; for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) { if (liveDocs != null && liveDocs.get(d) == false) { continue; } docID = d; } if (docID != DocIdSetIterator.NO_MORE_DOCS) { return new DocIdAndVersion(docID, versions.get(docID), context); } } return null; }
/** * Check whether there is one or more documents matching the provided query. */ public static boolean exists(IndexSearcher searcher, Query query) throws IOException { final Weight weight = searcher.createNormalizedWeight(query, false); // the scorer API should be more efficient at stopping after the first // match than the bulk scorer API for (LeafReaderContext context : searcher.getIndexReader().leaves()) { final Scorer scorer = weight.scorer(context); if (scorer == null) { continue; } final Bits liveDocs = context.reader().getLiveDocs(); final DocIdSetIterator iterator = scorer.iterator(); for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { if (liveDocs == null || liveDocs.get(doc)) { return true; } } } return false; }
/** * Return a Scorer that throws an ElasticsearchIllegalStateException * on all operations with the given message. */ public static Scorer illegalScorer(final String message) { return new Scorer(null) { @Override public float score() throws IOException { throw new IllegalStateException(message); } @Override public int freq() throws IOException { throw new IllegalStateException(message); } @Override public int docID() { throw new IllegalStateException(message); } @Override public DocIdSetIterator iterator() { throw new IllegalStateException(message); } }; }
SeqSpanScorer(SeqSpanWeight weight, PostingsAndFreq[] postings, Similarity.SimScorer docScorer, boolean needsScores, float matchCost) throws IOException { super(weight); this.selfWeight = weight; this.docScorer = docScorer; this.needsScores = needsScores; List<DocIdSetIterator> iterators = new ArrayList<>(); List<PostingsAndPosition> postingsAndPositions = new ArrayList<>(); for(PostingsAndFreq posting : postings) { iterators.add(posting.postings); postingsAndPositions.add(new PostingsAndPosition(posting.postings, posting.position)); } conjunction = ConjunctionDISI.intersectIterators(iterators); this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]); this.matchCost = matchCost; }
@Override public boolean next() throws IOException { if (count == freq) { if (postings == null) { return false; } doc = postings.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { return false; } freq = postings.freq(); count = 0; } position = postings.nextPosition(); count++; readPayload = false; return true; }
@Override int nextDoc() { if (idx >= size) { offset = -1; return doc = DocIdSetIterator.NO_MORE_DOCS; } doc = (int) docs.get(idx); ++idx; while (idx < size && docs.get(idx) == doc) { ++idx; } // idx points to the "next" element long prevIdx = idx - 1; // cannot change 'value' here because nextDoc is called before the // value is used, and it's a waste to clone the BytesRef when we // obtain the value offset = (int) offsets.get(prevIdx); length = (int) lengths.get(prevIdx); return doc; }
/** Does in-place OR of the bits provided by the * iterator. */ public void or(DocIdSetIterator iter) throws IOException { if (iter instanceof OpenBitSetIterator && iter.docID() == -1) { final OpenBitSetIterator obs = (OpenBitSetIterator) iter; or(obs.arr, obs.words); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): obs.advance(numBits); } else if (iter instanceof FixedBitSetIterator && iter.docID() == -1) { final FixedBitSetIterator fbs = (FixedBitSetIterator) iter; or(fbs.bits, fbs.numWords); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): fbs.advance(numBits); } else { int doc; while ((doc = iter.nextDoc()) < numBits) { set(doc); } } }
/** Does in-place AND NOT of the bits provided by the * iterator. */ public void andNot(DocIdSetIterator iter) throws IOException { if (iter instanceof OpenBitSetIterator && iter.docID() == -1) { final OpenBitSetIterator obs = (OpenBitSetIterator) iter; andNot(obs.arr, obs.words); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): obs.advance(numBits); } else if (iter instanceof FixedBitSetIterator && iter.docID() == -1) { final FixedBitSetIterator fbs = (FixedBitSetIterator) iter; andNot(fbs.bits, fbs.numWords); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): fbs.advance(numBits); } else { int doc; while ((doc = iter.nextDoc()) < numBits) { clear(doc); } } }
/** * This method iterates all terms in the given {@link TermsEnum} and * associates each terms ordinal with the terms documents. The caller must * exhaust the returned {@link BytesRefIterator} which returns all values * where the first returned value is associted with the ordinal <tt>1</tt> * etc. * <p> * If the {@link TermsEnum} contains prefix coded numerical values the terms * enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)} * or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If * the {@link TermsEnum} is not wrapped the returned * {@link BytesRefIterator} will contain partial precision terms rather than * only full-precision terms. * </p> */ public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException { return new BytesRefIterator() { private PostingsEnum docsEnum = null; @Override public BytesRef next() throws IOException { BytesRef ref; if ((ref = termsEnum.next()) != null) { docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE); nextOrdinal(); int docId; while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { addDoc(docId); } } return ref; } }; }
@Override public Scorer scorer(LeafReaderContext context) throws IOException { final Scorer parentScorer = parentWeight.scorer(context); // no matches if (parentScorer == null) { return null; } BitSet parents = parentsFilter.getBitSet(context); if (parents == null) { // No matches return null; } int firstParentDoc = parentScorer.iterator().nextDoc(); if (firstParentDoc == DocIdSetIterator.NO_MORE_DOCS) { // No matches return null; } return new IncludeNestedDocsScorer(this, parentScorer, parents, firstParentDoc); }
@Override public Scorer scorer(LeafReaderContext context) throws IOException { DocIdSet childrenDocSet = childrenFilter.getDocIdSet(context, null); // we forcefully apply live docs here so that deleted children don't give matching parents childrenDocSet = BitsFilteredDocIdSet.wrap(childrenDocSet, context.reader().getLiveDocs()); if (Lucene.isEmpty(childrenDocSet)) { return null; } final DocIdSetIterator childIterator = childrenDocSet.iterator(); if (childIterator == null) { return null; } SortedDocValues bytesValues = globalIfd.load(context).getOrdinalsValues(parentType); if (bytesValues == null) { return null; } return new ChildScorer(this, parentIdxs, scores, childIterator, bytesValues); }
@Override public Scorer scorer(LeafReaderContext context) throws IOException { DocIdSet childrenDocIdSet = childrenFilter.getDocIdSet(context, null); if (Lucene.isEmpty(childrenDocIdSet)) { return null; } SortedDocValues globalValues = globalIfd.load(context).getOrdinalsValues(parentType); if (globalValues != null) { // we forcefully apply live docs here so that deleted children don't give matching parents childrenDocIdSet = BitsFilteredDocIdSet.wrap(childrenDocIdSet, context.reader().getLiveDocs()); DocIdSetIterator innerIterator = childrenDocIdSet.iterator(); if (innerIterator != null) { ChildrenDocIdIterator childrenDocIdIterator = new ChildrenDocIdIterator( innerIterator, parentOrds, globalValues ); return ConstantScorer.create(childrenDocIdIterator, this, queryWeight); } } return null; }
/** * Tests only with a single empty <code>{@link PostingsEnumMock}</code> as basis for the <code>{@link MultiDocIdSetIterator}</code> */ @Test public void test_SingleEmpty() throws IOException { // arrange PostingsEnumMock mainPostings = new PostingsEnumMock(new int[0], new int[0]); MultiDocIdSetIterator postingsEnum = new MultiDocIdSetIterator( new PostingsEnumWeightTuple[]{ new PostingsEnumWeightTuple(mainPostings,0) }); // act + assert postingsEnum.nextDoc(); Assert.assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.docID()); Assert.assertEquals(DocIdSetIterator.NO_MORE_DOCS, mainPostings.docID()); }
public int nextDoc() throws IOException { int doc = DocIdSetIterator.NO_MORE_DOCS; while ((doc = nextDocAtomicContext()) == DocIdSetIterator.NO_MORE_DOCS) { if (!isAtLastContext()) { if (!initAtomicContextPositionsFreqs()) { return DocIdSetIterator.NO_MORE_DOCS; } continue; } break; } if (doc != DocIdSetIterator.NO_MORE_DOCS) { setupPerDoc(); } return doc; }
private void execute(ComputesBooleanResult join, IndexReader r, JoinType joinType, int queryId, Map<Integer, List<JoinType>> numDocsJoinType) throws IOException { int numResultDocs = 0; int docId = -1; long startTime = System.nanoTime(); join.setup(r); docId = join.nextDoc(); while (docId != DocIdSetIterator.NO_MORE_DOCS) { boolean matches = join.match(); if (matches) { numResultDocs++; } docId = join.nextDoc(); } long endTime = System.nanoTime(); System.out.println(queryId + "," + joinType.getId() + "," + numResultDocs + "," + countingNPA.getCountingOperatorAware().getCount() + "," + (endTime - startTime) / 1000000); if (!numDocsJoinType.containsKey(numResultDocs)) { numDocsJoinType.put(numResultDocs, new ArrayList<JoinType>()); } numDocsJoinType.get(numResultDocs).add(joinType); }
private void printAvgTfInDocsWithAllTerms() throws IOException { DocIter docIter = new DocIter(query.labels()); int numDocs = 0; docIter.setup(reader); int numQueryTerms = query.labels().length; long[] totals = new long[numQueryTerms]; Arrays.fill(totals, 0); while (docIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { numDocs++; for (int i = 0; i < numQueryTerms; i++) { totals[i] += docIter.getTermFreq(i); } } if (numQueryTerms > 0) { StringBuilder builder = new StringBuilder(); builder.append(format.format(totals[0] * 1.0 / numDocs)); for (int j = 1; j < numQueryTerms; j++) { builder.append(","); builder.append(format.format(totals[j] * 1.0 / numDocs)); } System.out.println(builder.toString()); } }
private void execute(ComputesBooleanResult join, IndexReader r, JoinType joinType, int queryId) throws IOException { int numResultDocs = 0; int docId = -1; long startTime = System.nanoTime(); join.setup(r); docId = join.nextDoc(); while (docId != DocIdSetIterator.NO_MORE_DOCS) { boolean matches = join.match(); if (matches) { numResultDocs++; } docId = join.nextDoc(); } long endTime = System.nanoTime(); System.out.println(queryId + "," + joinType.getId() + "," + numResultDocs + "," + (endTime - startTime) / 1000000); }
private void execute(ComputesFullResults join, IndexReader r, JoinType joinType, int queryId) throws IOException { int numResultDocs = 0; int totalMatches = 0; int docId = -1; long startTime = System.nanoTime(); join.setup(r); docId = join.nextDoc(); while (docId != DocIdSetIterator.NO_MORE_DOCS) { List<int[]> matches = join.match(); int size = matches.size(); if (size > 0) { numResultDocs++; totalMatches += size; } docId = join.nextDoc(); } long endTime = System.nanoTime(); System.out.println(queryId + "," + joinType.getId() + "," + numResultDocs + "," + (endTime - startTime) / 1000000 + "," + totalMatches); }
public void testNextDocSeamlesslyIteratesOverMultipleAtomicContexts() throws Exception { AbstractJoin aj = new AbstractJoinTestStub(new String[] { "AA", "BB", "DD" }); IndexWriter w = setupIndex(); for (int i = 0; i < 250000; i++) { w.addDocument(getDoc("(AA(BB DD))")); w.addDocument(getDoc("(KK(LL MM)(NN OO)(PP QQ))")); w.addDocument(getDoc("(RR(SS TT)(ZZ(UU VV))(WW YY))")); } IndexReader r = commitIndexAndOpenReader(w); assertTrue(r.getContext().leaves().size() > 1); aj.setup(r); assertFalse("Should not be at last context on start", aj.isAtLastContext()); int doc; for (int i = 0; i < 250000; i++) { doc = aj.nextDoc(); assertTrue("Expected 250000 docs with reqd terms but found " + (i + 1), doc != DocIdSetIterator.NO_MORE_DOCS); } doc = aj.nextDocAtomicContext(); assertEquals(DocIdSetIterator.NO_MORE_DOCS, doc); }
public boolean next() throws IOException { if (postings == null) { doc = DocIdSetIterator.NO_MORE_DOCS; return false; } doc = postings.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { return false; } freq = postings.freq(); positions = new ArrayList<>(); for (int i = 0; i < freq; i++) { positions.add(postings.nextPosition()); } position = positions.get(0); return true; }
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { if (!needsScores) { // If scores are not needed simply return a constant score on all docs return new ConstantScoreWeight(this) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc())); } }; } List<Weight> weights = new ArrayList<>(queries.size()); for (Query q : queries) { weights.add(searcher.createWeight(q, needsScores)); } return new RankerWeight(weights); }
private NamedList<Object> buildEntryValue(long count, Term t, List<Entry<LeafReader, Bits>> leaves) throws IOException { NamedList<Object> entry = new NamedList<>(); entry.add("count", count); int i = -1; for (Entry<LeafReader, Bits> e : leaves) { PostingsEnum postings = e.getKey().postings(t, PostingsEnum.PAYLOADS); Bits liveDocs = e.getValue(); while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { if (!liveDocs.get(postings.docID())) { continue; } i++; NamedList<Object> documentEntry = new NamedList<>(); entry.add("doc" + i, documentEntry); for (int j = 0; j < postings.freq(); j++) { postings.nextPosition(); String extra = postings.getPayload().utf8ToString(); documentEntry.add("position" + j, extra); } } } return entry; }
/** "typical" single-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */ static void accumSingleSeg(int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { // First count in seg-ord space: final int segCounts[]; if (map == null) { segCounts = counts; } else { segCounts = new int[1+si.getValueCount()]; } int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { segCounts[1+si.getOrd(doc)]++; } // migrate to global ords (if necessary) if (map != null) { migrateGlobal(counts, segCounts, subIndex, map); } }
/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space on-the-fly */ static void accumMultiGeneric(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex); int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); // strange do-while to collect the missing count (first ord is NO_MORE_ORDS) int term = (int) si.nextOrd(); if (term < 0) { if (startTermIndex == -1) { counts[0]++; // missing count } continue; } do { if (map != null) { term = (int) ordMap.get(term); } int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++; } while ((term = (int) si.nextOrd()) >= 0); } }
public void testDocsEnumStart() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); MemoryIndex memory = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024); memory.addField("foo", "bar", analyzer); AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader(); DocsEnum disi = TestUtil.docs(random(), reader, "foo", new BytesRef("bar"), null, null, DocsEnum.FLAG_NONE); int docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again TermsEnum te = reader.terms("foo").iterator(null); assertTrue(te.seekExact(new BytesRef("bar"))); disi = te.docs(null, disi, DocsEnum.FLAG_NONE); docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.close(); }
@Override public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { Terms terms = context.reader().terms(term.field()); if (terms == null) { return null; } final TermsEnum termsEnum = terms.iterator(null); if (!termsEnum.seekExact(term.bytes())) { return null; } return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { return termsEnum.docs(acceptDocs, null, DocsEnum.FLAG_NONE); } }; }
private SmallDocSet collectDocs(Bits acceptContains) throws IOException { SmallDocSet set = null; docsEnum = termsEnum.docs(acceptContains, docsEnum, DocsEnum.FLAG_NONE); int docid; while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (set == null) { int size = termsEnum.docFreq(); if (size <= 0) size = 16; set = new SmallDocSet(size); } set.set(docid); } return set; }
protected int getFirstMatch(IndexReader r, Term t) throws IOException { Fields fields = MultiFields.getFields(r); if (fields == null) return -1; Terms terms = fields.terms(t.field()); if (terms == null) return -1; BytesRef termBytes = t.bytes(); final TermsEnum termsEnum = terms.iterator(null); if (!termsEnum.seekExact(termBytes)) { return -1; } DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE); int id = docs.nextDoc(); if (id != DocIdSetIterator.NO_MORE_DOCS) { int next = docs.nextDoc(); assertEquals(DocIdSetIterator.NO_MORE_DOCS, next); } return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id; }
@Override protected DocIdSet docIdSetToCache(DocIdSet docIdSet, AtomicReader reader) throws IOException { if (docIdSet == null) { return EMPTY; } else if (docIdSet instanceof FixedBitSet) { // this is different from CachingWrapperFilter: even when the DocIdSet is // cacheable, we convert it to a FixedBitSet since we require all the // cached filters to be FixedBitSets return docIdSet; } else { final DocIdSetIterator it = docIdSet.iterator(); if (it == null) { return EMPTY; } else { final FixedBitSet copy = new FixedBitSet(reader.maxDoc()); copy.or(it); return copy; } } }
int nextDocOutOfOrder() throws IOException { while (true) { if (docsEnum != null) { int docId = docsEnumNextDoc(); if (docId == DocIdSetIterator.NO_MORE_DOCS) { docsEnum = null; } else { return doc = docId; } } if (upto == terms.size()) { return doc = DocIdSetIterator.NO_MORE_DOCS; } scoreUpto = upto; if (termsEnum.seekExact(terms.get(ords[upto++], spare))) { docsEnum = reuse = termsEnum.docs(acceptDocs, reuse, DocsEnum.FLAG_NONE); } } }
private Explanation explain(int target) throws IOException { int docId; do { docId = nextDocOutOfOrder(); if (docId < target) { int tempDocId = docsEnum.advance(target); if (tempDocId == target) { docId = tempDocId; break; } } else if (docId == target) { break; } docsEnum = null; // goto the next ord. } while (docId != DocIdSetIterator.NO_MORE_DOCS); return new ComplexExplanation(true, scores[ords[scoreUpto]], "Score based on join value " + termsEnum.term().utf8ToString()); }
protected void fillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, TermsEnum termsEnum) throws IOException { BytesRef spare = new BytesRef(); DocsEnum docsEnum = null; for (int i = 0; i < terms.size(); i++) { if (termsEnum.seekExact(terms.get(ords[i], spare))) { docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE); float score = TermsIncludingScoreQuery.this.scores[ords[i]]; for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) { matchingDocs.set(doc); // In the case the same doc is also related to a another doc, a score might be overwritten. I think this // can only happen in a many-to-many relation scores[doc] = score; } } } }
@Override protected void fillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, TermsEnum termsEnum) throws IOException { BytesRef spare = new BytesRef(); DocsEnum docsEnum = null; for (int i = 0; i < terms.size(); i++) { if (termsEnum.seekExact(terms.get(ords[i], spare))) { docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE); float score = TermsIncludingScoreQuery.this.scores[ords[i]]; for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) { // I prefer this: /*if (scores[doc] < score) { scores[doc] = score; matchingDocs.set(doc); }*/ // But this behaves the same as MVInnerScorer and only then the tests will pass: if (!matchingDocs.get(doc)) { scores[doc] = score; matchingDocs.set(doc); } } } } }
public void testKeepsLastFilter() throws Throwable { DuplicateFilter df = new DuplicateFilter(KEY_FIELD); df.setKeepMode(DuplicateFilter.KeepMode.KM_USE_LAST_OCCURRENCE); ScoreDoc[] hits = searcher.search(tq, df, 1000).scoreDocs; assertTrue("Filtered searching should have found some matches", hits.length > 0); for (ScoreDoc hit : hits) { Document d = searcher.doc(hit.doc); String url = d.get(KEY_FIELD); DocsEnum td = TestUtil.docs(random(), reader, KEY_FIELD, new BytesRef(url), MultiFields.getLiveDocs(reader), null, 0); int lastDoc = 0; while (td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { lastDoc = td.docID(); } assertEquals("Duplicate urls should return last doc", lastDoc, hit.doc); } }
private static Document getFirstLiveDoc(Terms terms, AtomicReader reader) throws IOException { DocsEnum docsEnum = null; TermsEnum termsEnum = terms.iterator(null); BytesRef text; // Deal with the chance that the first bunch of terms are in deleted documents. Is there a better way? for (int idx = 0; idx < 1000 && docsEnum == null; ++idx) { text = termsEnum.next(); if (text == null) { // Ran off the end of the terms enum without finding any live docs with that field in them. return null; } docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { return reader.document(docsEnum.docID()); } } return null; }
/** * checks docs + freqs + positions + payloads, sequentially */ public void assertDocsAndPositionsEnumEquals(String info, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) throws IOException { if (leftDocs == null || rightDocs == null) { assertNull(leftDocs); assertNull(rightDocs); return; } assertEquals(info, -1, leftDocs.docID()); assertEquals(info, -1, rightDocs.docID()); int docid; while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { assertEquals(info, docid, rightDocs.nextDoc()); int freq = leftDocs.freq(); assertEquals(info, freq, rightDocs.freq()); for (int i = 0; i < freq; i++) { assertEquals(info, leftDocs.nextPosition(), rightDocs.nextPosition()); assertEquals(info, leftDocs.getPayload(), rightDocs.getPayload()); assertEquals(info, leftDocs.startOffset(), rightDocs.startOffset()); assertEquals(info, leftDocs.endOffset(), rightDocs.endOffset()); } } assertEquals(info, DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc()); }
/** * checks docs + freqs, sequentially */ public void assertDocsEnumEquals(String info, DocsEnum leftDocs, DocsEnum rightDocs, boolean hasFreqs) throws IOException { if (leftDocs == null) { assertNull(rightDocs); return; } assertEquals(info, -1, leftDocs.docID()); assertEquals(info, -1, rightDocs.docID()); int docid; while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { assertEquals(info, docid, rightDocs.nextDoc()); if (hasFreqs) { assertEquals(info, leftDocs.freq(), rightDocs.freq()); } } assertEquals(info, DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc()); }