private MonotonicAppendingLongBuffer getDeletes(List<AtomicReader> readers) { MonotonicAppendingLongBuffer deletes = new MonotonicAppendingLongBuffer(); int deleteCount = 0; for (AtomicReader reader : readers) { final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); for (int i = 0; i < maxDoc; ++i) { if (liveDocs != null && !liveDocs.get(i)) { ++deleteCount; } else { deletes.add(deleteCount); } } } deletes.freeze(); return deletes; }
WAH8DocIdSet(byte[] data, int cardinality, int indexInterval, MonotonicAppendingLongBuffer positions, MonotonicAppendingLongBuffer wordNums) { this.data = data; this.cardinality = cardinality; this.indexInterval = indexInterval; this.positions = positions; this.wordNums = wordNums; }
Iterator(byte[] data, int cardinality, int indexInterval, MonotonicAppendingLongBuffer positions, MonotonicAppendingLongBuffer wordNums) { this.in = new ByteArrayDataInput(data); this.cardinality = cardinality; this.indexInterval = indexInterval; this.positions = positions; this.wordNums = wordNums; wordNum = -1; word = 0; bitList = 0; sequenceNum = -1; docID = -1; indexThreshold = indexThreshold(cardinality, indexInterval); }
PForDeltaDocIdSet(byte[] data, int cardinality, int indexInterval, MonotonicAppendingLongBuffer docIDs, MonotonicAppendingLongBuffer offsets) { this.data = data; this.cardinality = cardinality; this.indexInterval = indexInterval; this.docIDs = docIDs; this.offsets = offsets; }
Iterator(byte[] data, int cardinality, int indexInterval, MonotonicAppendingLongBuffer docIDs, MonotonicAppendingLongBuffer offsets) { this.data = data; this.cardinality = cardinality; this.indexInterval = indexInterval; this.docIDs = docIDs; this.offsets = offsets; offset = 0; nextDocs = new int[BLOCK_SIZE]; Arrays.fill(nextDocs, -1); i = BLOCK_SIZE; nextExceptions = new int[BLOCK_SIZE]; blockIdx = -1; docID = -1; }
/** * Creates an ordinal map that allows mapping ords to/from a merged * space from <code>subs</code>. * @param owner a cache key * @param subs TermsEnums that support {@link TermsEnum#ord()}. They need * not be dense (e.g. can be FilteredTermsEnums}. * @throws IOException if an I/O error occurred. */ public OrdinalMap(Object owner, TermsEnum subs[]) throws IOException { // create the ordinal mappings by pulling a termsenum over each sub's // unique terms, and walking a multitermsenum over those this.owner = owner; globalOrdDeltas = new MonotonicAppendingLongBuffer(); subIndexes = new AppendingLongBuffer(); ordDeltas = new MonotonicAppendingLongBuffer[subs.length]; for (int i = 0; i < ordDeltas.length; i++) { ordDeltas[i] = new MonotonicAppendingLongBuffer(); } long segmentOrds[] = new long[subs.length]; ReaderSlice slices[] = new ReaderSlice[subs.length]; TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length]; for (int i = 0; i < slices.length; i++) { slices[i] = new ReaderSlice(0, 0, i); indexes[i] = new TermsEnumIndex(subs[i], i); } MultiTermsEnum mte = new MultiTermsEnum(slices); mte.reset(indexes); long globalOrd = 0; while (mte.next() != null) { TermsEnumWithSlice matches[] = mte.getMatchArray(); for (int i = 0; i < mte.getMatchCount(); i++) { int subIndex = matches[i].index; long segmentOrd = matches[i].terms.ord(); long delta = globalOrd - segmentOrd; // for each unique term, just mark the first subindex/delta where it occurs if (i == 0) { subIndexes.add(subIndex); globalOrdDeltas.add(delta); } // for each per-segment ord, map it back to the global term. while (segmentOrds[subIndex] <= segmentOrd) { ordDeltas[subIndex].add(delta); segmentOrds[subIndex]++; } } globalOrd++; } }
public SortedDocValuesImpl(PagedBytes.Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) { this.bytes = bytes; this.docToTermOrd = docToTermOrd; this.termOrdToBytesOffset = termOrdToBytesOffset; this.numOrd = numOrd; }
@Override protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */) throws IOException { final int maxDoc = reader.maxDoc(); Terms terms = reader.terms(key.field); final float acceptableOverheadRatio = ((Float) key.custom).floatValue(); final PagedBytes bytes = new PagedBytes(15); int startTermsBPV; final int termCountHardLimit; if (maxDoc == Integer.MAX_VALUE) { termCountHardLimit = Integer.MAX_VALUE; } else { termCountHardLimit = maxDoc+1; } // TODO: use Uninvert? if (terms != null) { // Try for coarse estimate for number of bits; this // should be an underestimate most of the time, which // is fine -- GrowableWriter will reallocate as needed long numUniqueTerms = terms.size(); if (numUniqueTerms != -1L) { if (numUniqueTerms > termCountHardLimit) { // app is misusing the API (there is more than // one term per doc); in this case we make best // effort to load what we can (see LUCENE-2142) numUniqueTerms = termCountHardLimit; } startTermsBPV = PackedInts.bitsRequired(numUniqueTerms); } else { startTermsBPV = 1; } } else { startTermsBPV = 1; } MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer(); final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio); int termOrd = 0; // TODO: use Uninvert? if (terms != null) { final TermsEnum termsEnum = terms.iterator(null); DocsEnum docs = null; while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } if (termOrd >= termCountHardLimit) { break; } termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term)); docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } // Store 1+ ord into packed bits docToTermOrd.set(docID, 1+termOrd); } termOrd++; } } termOrdToBytesOffset.freeze(); // maybe an int-only impl? return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset, docToTermOrd.getMutable(), termOrd); }
/** * Creates an ordinal map that allows mapping ords to/from a merged * space from <code>subs</code>. * @param owner a cache key * @param subs TermsEnums that support {@link TermsEnum#ord()}. They need * not be dense (e.g. can be FilteredTermsEnums}. * @throws IOException if an I/O error occurred. */ public OrdinalMap(Object owner, TermsEnum subs[]) throws IOException { // create the ordinal mappings by pulling a termsenum over each sub's // unique terms, and walking a multitermsenum over those this.owner = owner; globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT); firstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT); ordDeltas = new MonotonicAppendingLongBuffer[subs.length]; for (int i = 0; i < ordDeltas.length; i++) { ordDeltas[i] = new MonotonicAppendingLongBuffer(); } long segmentOrds[] = new long[subs.length]; ReaderSlice slices[] = new ReaderSlice[subs.length]; TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length]; for (int i = 0; i < slices.length; i++) { slices[i] = new ReaderSlice(0, 0, i); indexes[i] = new TermsEnumIndex(subs[i], i); } MultiTermsEnum mte = new MultiTermsEnum(slices); mte.reset(indexes); long globalOrd = 0; while (mte.next() != null) { TermsEnumWithSlice matches[] = mte.getMatchArray(); for (int i = 0; i < mte.getMatchCount(); i++) { int segmentIndex = matches[i].index; long segmentOrd = matches[i].terms.ord(); long delta = globalOrd - segmentOrd; // for each unique term, just mark the first segment index/delta where it occurs if (i == 0) { firstSegments.add(segmentIndex); globalOrdDeltas.add(delta); } // for each per-segment ord, map it back to the global term. while (segmentOrds[segmentIndex] <= segmentOrd) { ordDeltas[segmentIndex].add(delta); segmentOrds[segmentIndex]++; } } globalOrd++; } firstSegments.freeze(); globalOrdDeltas.freeze(); for (int i = 0; i < ordDeltas.length; ++i) { ordDeltas[i].freeze(); } }