/** Creates a {@code MultiLevelSkipListWriter}. */ protected MultiLevelSkipListWriter(int skipInterval, int skipMultiplier, int maxSkipLevels, int df) { this.skipInterval = skipInterval; this.skipMultiplier = skipMultiplier; // calculate the maximum number of skip levels for this document frequency if (df <= skipInterval) { numberOfSkipLevels = 1; } else { numberOfSkipLevels = 1+MathUtil.log(df/skipInterval, skipMultiplier); } // make sure it does not exceed maxSkipLevels if (numberOfSkipLevels > maxSkipLevels) { numberOfSkipLevels = maxSkipLevels; } }
/** Loads the skip levels */ private void loadSkipLevels() throws IOException { if (docCount <= skipInterval[0]) { numberOfSkipLevels = 1; } else { numberOfSkipLevels = 1+MathUtil.log(docCount/skipInterval[0], skipMultiplier); } if (numberOfSkipLevels > maxNumberOfSkipLevels) { numberOfSkipLevels = maxNumberOfSkipLevels; } skipStream[0].seek(skipPointer[0]); int toBuffer = numberOfLevelsToBuffer; for (int i = numberOfSkipLevels - 1; i > 0; i--) { // the length of the current level long length = skipStream[0].readVLong(); // the start pointer of the current level skipPointer[i] = skipStream[0].getFilePointer(); if (toBuffer > 0) { // buffer this level skipStream[i] = new SkipBuffer(skipStream[0], (int) length); toBuffer--; } else { // clone this stream, it is already at the start of the current level skipStream[i] = skipStream[0].clone(); if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) { ((BufferedIndexInput) skipStream[i]).setBufferSize(Math.max(BufferedIndexInput.MIN_BUFFER_SIZE, (int) length)); } // move base stream beyond the current level skipStream[0].seek(skipStream[0].getFilePointer() + length); } } // use base stream for the lowest level skipPointer[0] = skipStream[0].getFilePointer(); }
/** * Loads the segment information at segment load time. * * @param indexEnum * the term enum. * @param indexDivisor * the index divisor. * @param tiiFileLength * the size of the tii file, used to approximate the size of the * buffer. * @param totalIndexInterval * the total index interval. */ TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) throws IOException { this.totalIndexInterval = totalIndexInterval; indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; skipInterval = indexEnum.skipInterval; // this is only an inital size, it will be GCed once the build is complete long initialSize = (long) (tiiFileLength * 1.5) / indexDivisor; PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize)); PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput(); final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2); GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInts.DEFAULT); String currentField = null; List<String> fieldStrs = new ArrayList<>(); int fieldCounter = -1; for (int i = 0; indexEnum.next(); i++) { Term term = indexEnum.term(); if (currentField == null || !currentField.equals(term.field())) { currentField = term.field(); fieldStrs.add(currentField); fieldCounter++; } TermInfo termInfo = indexEnum.termInfo(); indexToTerms.set(i, dataOutput.getPosition()); dataOutput.writeVInt(fieldCounter); dataOutput.writeString(term.text()); dataOutput.writeVInt(termInfo.docFreq); if (termInfo.docFreq >= skipInterval) { dataOutput.writeVInt(termInfo.skipOffset); } dataOutput.writeVLong(termInfo.freqPointer); dataOutput.writeVLong(termInfo.proxPointer); dataOutput.writeVLong(indexEnum.indexPointer); for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.next()) { break; } } } fields = new Term[fieldStrs.size()]; for (int i = 0; i < fields.length; i++) { fields[i] = new Term(fieldStrs.get(i)); } dataPagedBytes.freeze(true); dataInput = dataPagedBytes.getDataInput(); indexToDataOffset = indexToTerms.getMutable(); long ramBytesUsed = RamUsageEstimator.shallowSizeOf(fields); ramBytesUsed += RamUsageEstimator.shallowSizeOf(dataInput); ramBytesUsed += fields.length * RamUsageEstimator.shallowSizeOfInstance(Term.class); ramBytesUsed += dataPagedBytes.ramBytesUsed(); ramBytesUsed += indexToDataOffset.ramBytesUsed(); this.ramBytesUsed = ramBytesUsed; }
/** Loads the skip levels */ private void loadSkipLevels() throws IOException { if (docCount <= skipInterval[0]) { numberOfSkipLevels = 1; } else { numberOfSkipLevels = 1+MathUtil.log(docCount/skipInterval[0], skipMultiplier); } if (numberOfSkipLevels > maxNumberOfSkipLevels) { numberOfSkipLevels = maxNumberOfSkipLevels; } skipStream[0].seek(skipPointer[0]); int toBuffer = numberOfLevelsToBuffer; for (int i = numberOfSkipLevels - 1; i > 0; i--) { // the length of the current level long length = skipStream[0].readVLong(); // the start pointer of the current level skipPointer[i] = skipStream[0].getFilePointer(); if (toBuffer > 0) { // buffer this level skipStream[i] = new SkipBuffer(skipStream[0], (int) length); toBuffer--; } else { // clone this stream, it is already at the start of the current level skipStream[i] = skipStream[0].clone(); if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) { ((BufferedIndexInput) skipStream[i]).setBufferSize((int) length); } // move base stream beyond the current level skipStream[0].seek(skipStream[0].getFilePointer() + length); } } // use base stream for the lowest level skipPointer[0] = skipStream[0].getFilePointer(); }
/** * Loads the segment information at segment load time. * * @param indexEnum * the term enum. * @param indexDivisor * the index divisor. * @param tiiFileLength * the size of the tii file, used to approximate the size of the * buffer. * @param totalIndexInterval * the total index interval. */ TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) throws IOException { this.totalIndexInterval = totalIndexInterval; indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; skipInterval = indexEnum.skipInterval; // this is only an inital size, it will be GCed once the build is complete long initialSize = (long) (tiiFileLength * 1.5) / indexDivisor; PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize)); PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput(); final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2); GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInts.DEFAULT); String currentField = null; List<String> fieldStrs = new ArrayList<String>(); int fieldCounter = -1; for (int i = 0; indexEnum.next(); i++) { Term term = indexEnum.term(); if (currentField == null || !currentField.equals(term.field())) { currentField = term.field(); fieldStrs.add(currentField); fieldCounter++; } TermInfo termInfo = indexEnum.termInfo(); indexToTerms.set(i, dataOutput.getPosition()); dataOutput.writeVInt(fieldCounter); dataOutput.writeString(term.text()); dataOutput.writeVInt(termInfo.docFreq); if (termInfo.docFreq >= skipInterval) { dataOutput.writeVInt(termInfo.skipOffset); } dataOutput.writeVLong(termInfo.freqPointer); dataOutput.writeVLong(termInfo.proxPointer); dataOutput.writeVLong(indexEnum.indexPointer); for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.next()) { break; } } } fields = new Term[fieldStrs.size()]; for (int i = 0; i < fields.length; i++) { fields[i] = new Term(fieldStrs.get(i)); } dataPagedBytes.freeze(true); dataInput = dataPagedBytes.getDataInput(); indexToDataOffset = indexToTerms.getMutable(); }
/** * Loads the segment information at segment load time. * * @param indexEnum * the term enum. * @param indexDivisor * the index divisor. * @param tiiFileLength * the size of the tii file, used to approximate the size of the * buffer. * @param totalIndexInterval * the total index interval. */ TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) throws IOException { this.totalIndexInterval = totalIndexInterval; indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; skipInterval = indexEnum.skipInterval; // this is only an inital size, it will be GCed once the build is complete long initialSize = (long) (tiiFileLength * 1.5) / indexDivisor; PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize)); PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput(); final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2); GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInts.DEFAULT); String currentField = null; List<String> fieldStrs = new ArrayList<String>(); int fieldCounter = -1; for (int i = 0; indexEnum.next(); i++) { Term term = indexEnum.term(); if (currentField == null || !currentField.equals(term.field())) { currentField = term.field(); fieldStrs.add(currentField); fieldCounter++; } TermInfo termInfo = indexEnum.termInfo(); indexToTerms.set(i, dataOutput.getPosition()); dataOutput.writeVInt(fieldCounter); dataOutput.writeString(term.text()); dataOutput.writeVInt(termInfo.docFreq); if (termInfo.docFreq >= skipInterval) { dataOutput.writeVInt(termInfo.skipOffset); } dataOutput.writeVLong(termInfo.freqPointer); dataOutput.writeVLong(termInfo.proxPointer); dataOutput.writeVLong(indexEnum.indexPointer); for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.next()) { break; } } } fields = new Term[fieldStrs.size()]; for (int i = 0; i < fields.length; i++) { fields[i] = new Term(fieldStrs.get(i)); } dataPagedBytes.freeze(true); dataInput = dataPagedBytes.getDataInput(); indexToDataOffset = indexToTerms.getMutable(); ramBytesUsed = fields.length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.shallowSizeOfInstance(Term.class)) + dataPagedBytes.ramBytesUsed() + indexToDataOffset.ramBytesUsed(); }