void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException { PagedBytesDataInput input = dataInput.clone(); input.setPosition(indexToDataOffset.get(indexOffset)); // read the term int fieldId = input.readVInt(); Term field = fields[fieldId]; Term term = new Term(field.field(), input.readString()); // read the terminfo TermInfo termInfo = new TermInfo(); termInfo.docFreq = input.readVInt(); if (termInfo.docFreq >= skipInterval) { termInfo.skipOffset = input.readVInt(); } else { termInfo.skipOffset = 0; } termInfo.freqPointer = input.readVLong(); termInfo.proxPointer = input.readVLong(); long pointer = input.readVLong(); // perform the seek enumerator.seek(pointer, ((long) indexOffset * totalIndexInterval) - 1, term, termInfo); }
/** * Binary search for the given term. * * @param term * the term to locate. * @throws IOException If there is a low-level I/O error. */ int getIndexOffset(Term term) throws IOException { int lo = 0; int hi = indexSize - 1; PagedBytesDataInput input = dataInput.clone(); BytesRefBuilder scratch = new BytesRefBuilder(); while (hi >= lo) { int mid = (lo + hi) >>> 1; int delta = compareTo(term, mid, input, scratch); if (delta < 0) hi = mid - 1; else if (delta > 0) lo = mid + 1; else return mid; } return hi; }
/** * Binary search for the given term. * * @param term * the term to locate. * @throws IOException If there is a low-level I/O error. */ int getIndexOffset(Term term) throws IOException { int lo = 0; int hi = indexSize - 1; PagedBytesDataInput input = dataInput.clone(); BytesRef scratch = new BytesRef(); while (hi >= lo) { int mid = (lo + hi) >>> 1; int delta = compareTo(term, mid, input, scratch); if (delta < 0) hi = mid - 1; else if (delta > 0) lo = mid + 1; else return mid; } return hi; }
/** * Gets the term at the given position. For testing. * * @param termIndex * the position to read the term from the index. * @return the term. * @throws IOException If there is a low-level I/O error. */ Term getTerm(int termIndex) throws IOException { PagedBytesDataInput input = dataInput.clone(); input.setPosition(indexToDataOffset.get(termIndex)); // read the term int fieldId = input.readVInt(); Term field = fields[fieldId]; return new Term(field.field(), input.readString()); }
private void addReverseTermIndex(FieldInfo field, final Iterable<BytesRef> values, int maxLength) throws IOException { long count = 0; BytesRefBuilder priorTerm = new BytesRefBuilder(); priorTerm.grow(maxLength); BytesRef indexTerm = new BytesRef(); long startFP = data.getFilePointer(); PagedBytes pagedBytes = new PagedBytes(15); MonotonicBlockPackedWriter addresses = new MonotonicBlockPackedWriter(data, BLOCK_SIZE); for (BytesRef b : values) { int termPosition = (int) (count & REVERSE_INTERVAL_MASK); if (termPosition == 0) { int len = StringHelper.sortKeyLength(priorTerm.get(), b); indexTerm.bytes = b.bytes; indexTerm.offset = b.offset; indexTerm.length = len; addresses.add(pagedBytes.copyUsingLengthPrefix(indexTerm)); } else if (termPosition == REVERSE_INTERVAL_MASK) { priorTerm.copyBytes(b); } count++; } addresses.finish(); long numBytes = pagedBytes.getPointer(); pagedBytes.freeze(true); PagedBytesDataInput in = pagedBytes.getDataInput(); meta.writeLong(startFP); data.writeVLong(numBytes); data.copyBytes(in, numBytes); }
/** * Compare the fields of the terms first, and if not equals return from * compare. If equal compare terms. * * @param term * the term to compare. * @param termIndex * the position of the term in the input to compare * @param input * the input buffer. * @return int. * @throws IOException If there is a low-level I/O error. */ private int compareTo(Term term, int termIndex, PagedBytesDataInput input, BytesRefBuilder reuse) throws IOException { // if term field does not equal mid's field index, then compare fields // else if they are equal, compare term's string values... int c = compareField(term, termIndex, input); if (c == 0) { reuse.setLength(input.readVInt()); reuse.grow(reuse.length()); input.readBytes(reuse.bytes(), 0, reuse.length()); return comparator.compare(term.bytes(), reuse.get()); } return c; }
/** * Compare the fields of the terms first, and if not equals return from * compare. If equal compare terms. * * @param term * the term to compare. * @param termIndex * the position of the term in the input to compare * @param input * the input buffer. * @return int. * @throws IOException If there is a low-level I/O error. */ private int compareTo(Term term, int termIndex, PagedBytesDataInput input, BytesRef reuse) throws IOException { // if term field does not equal mid's field index, then compare fields // else if they are equal, compare term's string values... int c = compareField(term, termIndex, input); if (c == 0) { reuse.length = input.readVInt(); reuse.grow(reuse.length); input.readBytes(reuse.bytes, 0, reuse.length); return comparator.compare(term.bytes(), reuse); } return c; }
/** * Compares the fields before checking the text of the terms. * * @param term * the given term. * @param termIndex * the term that exists in the data block. * @param input * the data block. * @return int. * @throws IOException If there is a low-level I/O error. */ private int compareField(Term term, int termIndex, PagedBytesDataInput input) throws IOException { input.setPosition(indexToDataOffset.get(termIndex)); return term.field().compareTo(fields[input.readVInt()].field()); }