/** * Loads the segment information at segment load time. * * @param indexEnum * the term enum. * @param indexDivisor * the index divisor. * @param tiiFileLength * the size of the tii file, used to approximate the size of the * buffer. * @param totalIndexInterval * the total index interval. */ TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) throws IOException { this.totalIndexInterval = totalIndexInterval; indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; skipInterval = indexEnum.skipInterval; // this is only an inital size, it will be GCed once the build is complete long initialSize = (long) (tiiFileLength * 1.5) / indexDivisor; PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize)); PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput(); final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2); GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInts.DEFAULT); String currentField = null; List<String> fieldStrs = new ArrayList<>(); int fieldCounter = -1; for (int i = 0; indexEnum.next(); i++) { Term term = indexEnum.term(); if (currentField == null || !currentField.equals(term.field())) { currentField = term.field(); fieldStrs.add(currentField); fieldCounter++; } TermInfo termInfo = indexEnum.termInfo(); indexToTerms.set(i, dataOutput.getPosition()); dataOutput.writeVInt(fieldCounter); dataOutput.writeString(term.text()); dataOutput.writeVInt(termInfo.docFreq); if (termInfo.docFreq >= skipInterval) { dataOutput.writeVInt(termInfo.skipOffset); } dataOutput.writeVLong(termInfo.freqPointer); dataOutput.writeVLong(termInfo.proxPointer); dataOutput.writeVLong(indexEnum.indexPointer); for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.next()) { break; } } } fields = new Term[fieldStrs.size()]; for (int i = 0; i < fields.length; i++) { fields[i] = new Term(fieldStrs.get(i)); } dataPagedBytes.freeze(true); dataInput = dataPagedBytes.getDataInput(); indexToDataOffset = indexToTerms.getMutable(); long ramBytesUsed = RamUsageEstimator.shallowSizeOf(fields); ramBytesUsed += RamUsageEstimator.shallowSizeOf(dataInput); ramBytesUsed += fields.length * RamUsageEstimator.shallowSizeOfInstance(Term.class); ramBytesUsed += dataPagedBytes.ramBytesUsed(); ramBytesUsed += indexToDataOffset.ramBytesUsed(); this.ramBytesUsed = ramBytesUsed; }
/** * Loads the segment information at segment load time. * * @param indexEnum * the term enum. * @param indexDivisor * the index divisor. * @param tiiFileLength * the size of the tii file, used to approximate the size of the * buffer. * @param totalIndexInterval * the total index interval. */ TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) throws IOException { this.totalIndexInterval = totalIndexInterval; indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; skipInterval = indexEnum.skipInterval; // this is only an inital size, it will be GCed once the build is complete long initialSize = (long) (tiiFileLength * 1.5) / indexDivisor; PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize)); PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput(); final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2); GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInts.DEFAULT); String currentField = null; List<String> fieldStrs = new ArrayList<String>(); int fieldCounter = -1; for (int i = 0; indexEnum.next(); i++) { Term term = indexEnum.term(); if (currentField == null || !currentField.equals(term.field())) { currentField = term.field(); fieldStrs.add(currentField); fieldCounter++; } TermInfo termInfo = indexEnum.termInfo(); indexToTerms.set(i, dataOutput.getPosition()); dataOutput.writeVInt(fieldCounter); dataOutput.writeString(term.text()); dataOutput.writeVInt(termInfo.docFreq); if (termInfo.docFreq >= skipInterval) { dataOutput.writeVInt(termInfo.skipOffset); } dataOutput.writeVLong(termInfo.freqPointer); dataOutput.writeVLong(termInfo.proxPointer); dataOutput.writeVLong(indexEnum.indexPointer); for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.next()) { break; } } } fields = new Term[fieldStrs.size()]; for (int i = 0; i < fields.length; i++) { fields[i] = new Term(fieldStrs.get(i)); } dataPagedBytes.freeze(true); dataInput = dataPagedBytes.getDataInput(); indexToDataOffset = indexToTerms.getMutable(); }
/** * Loads the segment information at segment load time. * * @param indexEnum * the term enum. * @param indexDivisor * the index divisor. * @param tiiFileLength * the size of the tii file, used to approximate the size of the * buffer. * @param totalIndexInterval * the total index interval. */ TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) throws IOException { this.totalIndexInterval = totalIndexInterval; indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; skipInterval = indexEnum.skipInterval; // this is only an inital size, it will be GCed once the build is complete long initialSize = (long) (tiiFileLength * 1.5) / indexDivisor; PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize)); PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput(); final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2); GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInts.DEFAULT); String currentField = null; List<String> fieldStrs = new ArrayList<String>(); int fieldCounter = -1; for (int i = 0; indexEnum.next(); i++) { Term term = indexEnum.term(); if (currentField == null || !currentField.equals(term.field())) { currentField = term.field(); fieldStrs.add(currentField); fieldCounter++; } TermInfo termInfo = indexEnum.termInfo(); indexToTerms.set(i, dataOutput.getPosition()); dataOutput.writeVInt(fieldCounter); dataOutput.writeString(term.text()); dataOutput.writeVInt(termInfo.docFreq); if (termInfo.docFreq >= skipInterval) { dataOutput.writeVInt(termInfo.skipOffset); } dataOutput.writeVLong(termInfo.freqPointer); dataOutput.writeVLong(termInfo.proxPointer); dataOutput.writeVLong(indexEnum.indexPointer); for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.next()) { break; } } } fields = new Term[fieldStrs.size()]; for (int i = 0; i < fields.length; i++) { fields[i] = new Term(fieldStrs.get(i)); } dataPagedBytes.freeze(true); dataInput = dataPagedBytes.getDataInput(); indexToDataOffset = indexToTerms.getMutable(); ramBytesUsed = fields.length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.shallowSizeOfInstance(Term.class)) + dataPagedBytes.ramBytesUsed() + indexToDataOffset.ramBytesUsed(); }