@Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException { finishLeaf(); context = ctx; docDeltas = PackedLongValues.packedBuilder(PackedInts.DEFAULT); buckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT); return new LeafBucketCollector() { int lastDoc = 0; @Override public void collect(int doc, long bucket) throws IOException { docDeltas.add(doc - lastDoc); buckets.add(bucket); lastDoc = doc; maxBucket = Math.max(maxBucket, bucket); } }; }
private void addAddresses(FieldInfo field, Iterable<Number> values) throws IOException { meta.writeVInt(field.number); meta.writeByte(Lucene49DocValuesFormat.NUMERIC); meta.writeVInt(MONOTONIC_COMPRESSED); meta.writeLong(-1L); meta.writeLong(data.getFilePointer()); meta.writeVLong(maxDoc); meta.writeVInt(PackedInts.VERSION_CURRENT); meta.writeVInt(BLOCK_SIZE); final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE); long addr = 0; writer.add(addr); for (Number v : values) { addr += v.longValue(); writer.add(addr); } writer.finish(); meta.writeLong(data.getFilePointer()); }
private void addAddresses(FieldInfo field, Iterable<Number> values) throws IOException { meta.writeVInt(field.number); meta.writeByte(Lucene410DocValuesFormat.NUMERIC); meta.writeVInt(MONOTONIC_COMPRESSED); meta.writeLong(-1L); meta.writeLong(data.getFilePointer()); meta.writeVLong(maxDoc); meta.writeVInt(PackedInts.VERSION_CURRENT); meta.writeVInt(BLOCK_SIZE); final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE); long addr = 0; writer.add(addr); for (Number v : values) { addr += v.longValue(); writer.add(addr); } writer.finish(); meta.writeLong(data.getFilePointer()); }
void pforDecompress(byte token) { final int bitsPerValue = token & 0x1F; if (bitsPerValue == 0) { Arrays.fill(nextDocs, 0); } else { DECODERS[bitsPerValue].decode(data, offset, nextDocs, 0, ITERATIONS[bitsPerValue]); offset += BYTE_BLOCK_COUNTS[bitsPerValue]; } if ((token & HAS_EXCEPTIONS) != 0) { // there are exceptions final int numExceptions = data[offset++]; final int bitsPerException = data[offset++]; final int numIterations = (numExceptions + DECODERS[bitsPerException].byteValueCount() - 1) / DECODERS[bitsPerException].byteValueCount(); DECODERS[bitsPerException].decode(data, offset, nextExceptions, 0, numIterations); offset += PackedInts.Format.PACKED.byteCount(PackedInts.VERSION_CURRENT, numExceptions, bitsPerException); for (int i = 0; i < numExceptions; ++i) { nextDocs[data[offset++]] |= nextExceptions[i] << bitsPerValue; } } for (int previousDoc = docID, i = 0; i < BLOCK_SIZE; ++i) { final int doc = previousDoc + 1 + nextDocs[i]; previousDoc = nextDocs[i] = doc; } }
private long getPageMemoryUsage(PackedLongValues values, float acceptableOverheadRatio, int pageSize, long pageMinOrdinal, long pageMaxOrdinal) { int bitsRequired; long pageMemorySize = 0; PackedInts.FormatAndBits formatAndBits; if (pageMaxOrdinal == Long.MIN_VALUE) { // empty page - will use the null reader which just stores size pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT); } else { long pageMinValue = values.get(pageMinOrdinal); long pageMaxValue = values.get(pageMaxOrdinal); long pageDelta = pageMaxValue - pageMinValue; if (pageDelta != 0) { bitsRequired = pageDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta); formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio); pageMemorySize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG; pageMemorySize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage } else { // empty page pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT); } } return pageMemorySize; }
/** * Builds the final automaton from a list of entries. */ private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException { // Build the automaton. final Outputs<Object> outputs = NoOutputs.getSingleton(); final Object empty = outputs.getNoOutput(); final Builder<Object> builder = new Builder<>( FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, false, PackedInts.DEFAULT, true, 15); BytesRefBuilder scratch = new BytesRefBuilder(); BytesRef entry; final IntsRefBuilder scratchIntsRef = new IntsRefBuilder(); int count = 0; BytesRefIterator iter = sorter.iterator(); while((entry = iter.next()) != null) { count++; if (scratch.get().compareTo(entry) != 0) { builder.add(Util.toIntsRef(entry, scratchIntsRef), empty); scratch.copyBytes(entry); } } return count == 0 ? null : builder.finish(); }
private PackedLongValues getDeletes(List<AtomicReader> readers) { PackedLongValues.Builder deletes = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); int deleteCount = 0; for (AtomicReader reader : readers) { final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); for (int i = 0; i < maxDoc; ++i) { if (liveDocs != null && !liveDocs.get(i)) { ++deleteCount; } else { deletes.add(deleteCount); } } } return deletes.build(); }
public void testDateCompression() throws IOException { final Directory dir = new RAMDirectory(); final IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); final IndexWriter iwriter = new IndexWriter(dir, iwc); final long base = 13; // prime final long day = 1000L * 60 * 60 * 24; final Document doc = new Document(); final NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.add(dvf); for (int i = 0; i < 300; ++i) { dvf.setLongValue(base + random().nextInt(1000) * day); iwriter.addDocument(doc); } iwriter.forceMerge(1); final long size1 = dirSize(dir); for (int i = 0; i < 50; ++i) { dvf.setLongValue(base + random().nextInt(1000) * day); iwriter.addDocument(doc); } iwriter.forceMerge(1); final long size2 = dirSize(dir); // make sure the new longs costed less than if they had only been packed assertTrue(size2 < size1 + (PackedInts.bitsRequired(day) * 50) / 8); }
public void testInternalFinalState() throws Exception { final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final boolean willRewrite = random().nextBoolean(); final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, willRewrite, PackedInts.DEFAULT, true, 15); builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRefBuilder()), outputs.getNoOutput()); builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRefBuilder()), outputs.getNoOutput()); final FST<Long> fst = builder.finish(); StringWriter w = new StringWriter(); //Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot")); Util.toDot(fst, w, false, false); w.close(); //System.out.println(w.toString()); // check for accept state at label t assertTrue(w.toString().indexOf("[label=\"t\" style=\"bold\"") != -1); // check for accept state at label n assertTrue(w.toString().indexOf("[label=\"n\" style=\"bold\"") != -1); }
TermsWriter(FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; noOutputs = NoOutputs.getSingleton(); // This Builder is just used transiently to fragment // terms into "good" blocks; we don't save the // resulting FST: blockBuilder = new Builder<Object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, noOutputs, new FindBlocks(), false, PackedInts.COMPACT, true, 15); postingsWriter.setField(fieldInfo); }
public void testInternalFinalState() throws Exception { final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true); final boolean willRewrite = random().nextBoolean(); final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite, PackedInts.DEFAULT, true, 15); builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRef()), outputs.getNoOutput()); builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRef()), outputs.getNoOutput()); final FST<Long> fst = builder.finish(); StringWriter w = new StringWriter(); //Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot")); Util.toDot(fst, w, false, false); w.close(); //System.out.println(w.toString()); // check for accept state at label t assertTrue(w.toString().indexOf("[label=\"t\" style=\"bold\"") != -1); // check for accept state at label n assertTrue(w.toString().indexOf("[label=\"n\" style=\"bold\"") != -1); }
@Test public void testWritePerformance() throws Exception { System.out.println("Writing " + NumberFormat.getInstance().format(MAX_DOCS) + " values."); System.out.println("Float array bytes: " + NumberFormat.getInstance().format(RamUsageEstimator.sizeOf(new float[MAX_DOCS]))); OffsetGrowableFloatWriter writer = new OffsetGrowableFloatWriter(OffsetGrowableFloatWriter.DEFAULT_PRECISION, 2, MAX_DOCS, PackedInts.DEFAULT); long start = System.currentTimeMillis(); for (int i = 0; i < MAX_DOCS; i++) { float value = RandomUtils.nextFloat() * RandomUtils.nextFloat(); int j = RandomUtils.nextInt(MAX_DOCS); writer.setFloat(j, value); assertEquals(value, writer.getFloat(j), OffsetGrowableFloatWriter.DEFAULT_PRECISION); } System.out.println("OffsetGrowableFloatWriter bytes: " + NumberFormat.getInstance().format(writer.ramBytesUsed()) + " in " + (System.currentTimeMillis() - start) + "ms ..."); }
@Override public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException { meta.writeVInt(field.number); meta.writeByte(DiskDocValuesFormat.SORTED_SET); // write the ord -> byte[] as a binary field addBinaryField(field, values); // write the stream of ords as a numeric field // NOTE: we could return an iterator that delta-encodes these within a doc addNumericField(field, ords); // write the doc -> ord count as a absolute index to the stream meta.writeVInt(field.number); meta.writeByte(DiskDocValuesFormat.NUMERIC); meta.writeVInt(PackedInts.VERSION_CURRENT); meta.writeLong(data.getFilePointer()); meta.writeVLong(maxDoc); meta.writeVInt(BLOCK_SIZE); final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE); long addr = 0; for (Number v : docToOrdCount) { addr += v.longValue(); writer.add(addr); } writer.finish(); }
TermsWriter(FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; noOutputs = NoOutputs.getSingleton(); // This Builder is just used transiently to fragment // terms into "good" blocks; we don't save the // resulting FST: blockBuilder = new Builder<Object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, noOutputs, new FindBlocks(), false, PackedInts.COMPACT, true, 15); this.longsSize = postingsWriter.setField(fieldInfo); }
/** * Builds the final automaton from a list of entries. */ private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException { // Build the automaton. final Outputs<Object> outputs = NoOutputs.getSingleton(); final Object empty = outputs.getNoOutput(); final Builder<Object> builder = new Builder<Object>( FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInts.DEFAULT, true, 15); BytesRef scratch = new BytesRef(); BytesRef entry; final IntsRef scratchIntsRef = new IntsRef(); int count = 0; BytesRefIterator iter = sorter.iterator(); while((entry = iter.next()) != null) { count++; if (scratch.compareTo(entry) != 0) { builder.add(Util.toIntsRef(entry, scratchIntsRef), empty); scratch.copyBytes(entry); } } return count == 0 ? null : builder.finish(); }
/** * Compute the required precision so that <code>count</code> distinct entries * would be counted with linear counting. */ public static int precisionFromThreshold(long count) { final long hashTableEntries = (long) Math.ceil(count / MAX_LOAD_FACTOR); int precision = PackedInts.bitsRequired(hashTableEntries * Integer.BYTES); precision = Math.max(precision, MIN_PRECISION); precision = Math.min(precision, MAX_PRECISION); return precision; }
/** * Builds an {@link Ordinals} instance from the builders current state. */ public Ordinals build() { final float acceptableOverheadRatio = PackedInts.DEFAULT; if (numMultiValuedDocs > 0 || MultiOrdinals.significantlySmallerThanSinglePackedOrdinals(maxDoc, numDocsWithValue, getValueCount(), acceptableOverheadRatio)) { // MultiOrdinals can be smaller than SinglePackedOrdinals for sparse fields return new MultiOrdinals(this, acceptableOverheadRatio); } else { return new SinglePackedOrdinals(this, acceptableOverheadRatio); } }
/** * Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%. */ public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds, float acceptableOverheadRatio) { int bitsPerOrd = PackedInts.bitsRequired(numOrds); bitsPerOrd = PackedInts.fastestFormatAndBits(numDocsWithValue, bitsPerOrd, acceptableOverheadRatio).bitsPerValue; // Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the // beginning of the block and all docs have one at the end of the block final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc; final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc); int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign bitsPerOffset = PackedInts.fastestFormatAndBits(maxDoc, bitsPerOffset, acceptableOverheadRatio).bitsPerValue; final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset; final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd; return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes; }
private static OrdinalMap buildOrdinalMap(AtomicParentChildFieldData[] atomicFD, String parentType) throws IOException { final SortedDocValues[] ordinals = new SortedDocValues[atomicFD.length]; for (int i = 0; i < ordinals.length; ++i) { ordinals[i] = atomicFD[i].getOrdinalsValues(parentType); } return OrdinalMap.build(null, ordinals, PackedInts.DEFAULT); }
private SortedDocValues loadBytesFixedSorted(FieldInfo field, IndexInput data, IndexInput index) throws IOException { CodecUtil.checkHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); final int fixedLength = data.readInt(); final int valueCount = index.readInt(); PagedBytes bytes = new PagedBytes(16); bytes.copy(data, fixedLength * (long) valueCount); final PagedBytes.Reader bytesReader = bytes.freeze(true); final PackedInts.Reader reader = PackedInts.getReader(index); ramBytesUsed.addAndGet(bytesReader.ramBytesUsed() + reader.ramBytesUsed()); return correctBuggyOrds(new SortedDocValues() { @Override public int getOrd(int docID) { return (int) reader.get(docID); } @Override public BytesRef lookupOrd(int ord) { final BytesRef term = new BytesRef(); bytesReader.fillSlice(term, fixedLength * (long) ord, fixedLength); return term; } @Override public int getValueCount() { return valueCount; } }); }
private SortedDocValues loadBytesVarSorted(FieldInfo field, IndexInput data, IndexInput index) throws IOException { CodecUtil.checkHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); long maxAddress = index.readLong(); PagedBytes bytes = new PagedBytes(16); bytes.copy(data, maxAddress); final PagedBytes.Reader bytesReader = bytes.freeze(true); final PackedInts.Reader addressReader = PackedInts.getReader(index); final PackedInts.Reader ordsReader = PackedInts.getReader(index); final int valueCount = addressReader.size() - 1; ramBytesUsed.addAndGet(bytesReader.ramBytesUsed() + addressReader.ramBytesUsed() + ordsReader.ramBytesUsed()); return correctBuggyOrds(new SortedDocValues() { @Override public int getOrd(int docID) { return (int)ordsReader.get(docID); } @Override public BytesRef lookupOrd(int ord) { final BytesRef term = new BytesRef(); long startAddress = addressReader.get(ord); long endAddress = addressReader.get(ord+1); bytesReader.fillSlice(term, startAddress, (int)(endAddress - startAddress)); return term; } @Override public int getValueCount() { return valueCount; } }); }
@Override public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) throws IOException { checkCanWrite(field); meta.writeVInt(field.number); meta.writeByte(Lucene45DocValuesFormat.SORTED_SET); if (isSingleValued(docToOrdCount)) { meta.writeVInt(SORTED_SET_SINGLE_VALUED_SORTED); // The field is single-valued, we can encode it as SORTED addSortedField(field, values, singletonView(docToOrdCount, ords, -1L)); return; } meta.writeVInt(SORTED_SET_WITH_ADDRESSES); // write the ord -> byte[] as a binary field addTermsDict(field, values); // write the stream of ords as a numeric field // NOTE: we could return an iterator that delta-encodes these within a doc addNumericField(field, ords, false); // write the doc -> ord count as a absolute index to the stream meta.writeVInt(field.number); meta.writeByte(Lucene45DocValuesFormat.NUMERIC); meta.writeVInt(DELTA_COMPRESSED); meta.writeLong(-1L); meta.writeVInt(PackedInts.VERSION_CURRENT); meta.writeLong(data.getFilePointer()); meta.writeVLong(maxDoc); meta.writeVInt(BLOCK_SIZE); final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE); long addr = 0; for (Number v : docToOrdCount) { addr += v.longValue(); writer.add(addr); } writer.finish(); }
void reset(int len) { final int bitsPerOffset = PackedInts.bitsRequired(len - LAST_LITERALS); final int bitsPerOffsetLog = 32 - Integer.numberOfLeadingZeros(bitsPerOffset - 1); hashLog = MEMORY_USAGE + 3 - bitsPerOffsetLog; if (hashTable == null || hashTable.size() < 1 << hashLog || hashTable.getBitsPerValue() < bitsPerOffset) { hashTable = PackedInts.getMutable(1 << hashLog, bitsPerOffset, PackedInts.DEFAULT); } else { hashTable.clear(); } }
CompressingStoredFieldsIndexWriter(IndexOutput indexOutput) throws IOException { this.fieldsIndexOut = indexOutput; reset(); totalDocs = 0; docBaseDeltas = new int[BLOCK_SIZE]; startPointerDeltas = new long[BLOCK_SIZE]; fieldsIndexOut.writeVInt(PackedInts.VERSION_CURRENT); }
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.fieldInfo = fieldInfo; this.bytes = new PagedBytes(BLOCK_BITS); this.bytesOut = bytes.getDataOutput(); this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); this.iwBytesUsed = iwBytesUsed; this.docsWithField = new FixedBitSet(64); this.bytesUsed = docsWithFieldBytesUsed(); iwBytesUsed.addAndGet(bytesUsed); }
public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.fieldInfo = fieldInfo; this.iwBytesUsed = iwBytesUsed; hash = new BytesRefHash( new ByteBlockPool( new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed)); pending = PackedLongValues.packedBuilder(PackedInts.COMPACT); pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed(); iwBytesUsed.addAndGet(bytesUsed); }
public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.fieldInfo = fieldInfo; this.iwBytesUsed = iwBytesUsed; hash = new BytesRefHash( new ByteBlockPool( new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed)); pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); bytesUsed = pending.ramBytesUsed(); iwBytesUsed.addAndGet(bytesUsed); }
public BinaryDocValuesFieldUpdates(String field, int maxDoc) { super(field, FieldInfo.DocValuesType.BINARY); bitsPerValue = PackedInts.bitsRequired(maxDoc - 1); docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT); offsets = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST); lengths = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST); values = new BytesRefBuilder(); size = 0; }
public NumericDocValuesFieldUpdates(String field, int maxDoc) { super(field, FieldInfo.DocValuesType.NUMERIC); bitsPerValue = PackedInts.bitsRequired(maxDoc - 1); docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT); values = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST); size = 0; }
/** Returns a SortedDocValues for a reader's docvalues (potentially doing extremely slow things). * <p> * This is an extremely slow way to access sorted values. Instead, access them per-segment * with {@link AtomicReader#getSortedDocValues(String)} * </p> */ public static SortedDocValues getSortedValues(final IndexReader r, final String field) throws IOException { final List<AtomicReaderContext> leaves = r.leaves(); final int size = leaves.size(); if (size == 0) { return null; } else if (size == 1) { return leaves.get(0).reader().getSortedDocValues(field); } boolean anyReal = false; final SortedDocValues[] values = new SortedDocValues[size]; final int[] starts = new int[size+1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = leaves.get(i); SortedDocValues v = context.reader().getSortedDocValues(field); if (v == null) { v = DocValues.emptySorted(); } else { anyReal = true; } values[i] = v; starts[i] = context.docBase; } starts[size] = r.maxDoc(); if (!anyReal) { return null; } else { OrdinalMap mapping = OrdinalMap.build(r.getCoreCacheKey(), values, PackedInts.DEFAULT); return new MultiSortedDocValues(values, starts, mapping); } }
/** Returns a SortedSetDocValues for a reader's docvalues (potentially doing extremely slow things). * <p> * This is an extremely slow way to access sorted values. Instead, access them per-segment * with {@link AtomicReader#getSortedSetDocValues(String)} * </p> */ public static SortedSetDocValues getSortedSetValues(final IndexReader r, final String field) throws IOException { final List<AtomicReaderContext> leaves = r.leaves(); final int size = leaves.size(); if (size == 0) { return null; } else if (size == 1) { return leaves.get(0).reader().getSortedSetDocValues(field); } boolean anyReal = false; final SortedSetDocValues[] values = new SortedSetDocValues[size]; final int[] starts = new int[size+1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = leaves.get(i); SortedSetDocValues v = context.reader().getSortedSetDocValues(field); if (v == null) { v = DocValues.emptySortedSet(); } else { anyReal = true; } values[i] = v; starts[i] = context.docBase; } starts[size] = r.maxDoc(); if (!anyReal) { return null; } else { OrdinalMap mapping = OrdinalMap.build(r.getCoreCacheKey(), values, PackedInts.DEFAULT); return new MultiSortedSetDocValues(values, starts, mapping); } }