private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException { meta.writeVInt(field.number); meta.writeByte(FST); meta.writeLong(data.getFilePointer()); PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs); IntsRefBuilder scratch = new IntsRefBuilder(); long ord = 0; for (BytesRef v : values) { builder.add(Util.toIntsRef(v, scratch), ord); ord++; } FST<Long> fst = builder.finish(); if (fst != null) { fst.save(data); } meta.writeVLong(ord); }
@Override public boolean load(DataInput input) throws IOException { CodecUtil.checkHeader(input, CODEC_NAME, VERSION_START, VERSION_START); count = input.readVLong(); byte separatorOrig = input.readByte(); if (separatorOrig != separator) { throw new IllegalStateException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig); } int gramsOrig = input.readVInt(); if (gramsOrig != grams) { throw new IllegalStateException("grams=" + grams + " is incorrect: original model was built with grams=" + gramsOrig); } totTokens = input.readVLong(); fst = new FST<>(input, PositiveIntOutputs.getSingleton()); return true; }
private TokenInfoDictionary() throws IOException { super(); InputStream is = null; FST<Long> fst = null; boolean success = false; try { is = getResource(FST_FILENAME_SUFFIX); is = new BufferedInputStream(is); fst = new FST<>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton()); success = true; } finally { if (success) { IOUtils.close(is); } else { IOUtils.closeWhileHandlingException(is); } } // TODO: some way to configure? this.fst = new TokenInfoFST(fst, true); }
@Override public void build(TermFreqIterator iterator) throws IOException { BytesRef scratch = new BytesRef(); TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator); IntsRef scratchInts = new IntsRef(); BytesRef previous = null; PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true); Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs); while ((scratch = iter.next()) != null) { long cost = iter.weight(); if (previous == null) { previous = new BytesRef(); } else if (scratch.equals(previous)) { continue; // for duplicate suggestions, the best weight is actually // added } Util.toIntsRef(scratch, scratchInts); builder.add(scratchInts, cost); previous.copyBytes(scratch); } fst = builder.finish(); }
private TokenInfoDictionary() throws IOException { super(); IOException priorE = null; InputStream is = null; FST<Long> fst = null; try { is = getResource(FST_FILENAME_SUFFIX); is = new BufferedInputStream(is); fst = new FST<Long>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton(true)); } catch (IOException ioe) { priorE = ioe; } finally { IOUtils.closeWhileHandlingException(priorE, is); } // TODO: some way to configure? this.fst = new TokenInfoFST(fst, true); }
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException { meta.writeVInt(field.number); meta.writeByte(FST); meta.writeLong(data.getFilePointer()); PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true); Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs); IntsRef scratch = new IntsRef(); long ord = 0; for (BytesRef v : values) { builder.add(Util.toIntsRef(v, scratch), ord); ord++; } FST<Long> fst = builder.finish(); if (fst != null) { fst.save(data); } meta.writeVLong(ord); }
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException { meta.writeVInt(field.number); meta.writeByte(FST); meta.writeLong(data.getFilePointer()); PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs); IntsRef scratch = new IntsRef(); long ord = 0; for (BytesRef v : values) { builder.add(Util.toIntsRef(v, scratch), ord); ord++; } FST<Long> fst = builder.finish(); if (fst != null) { fst.save(data); } meta.writeVLong(ord); }
@Override public boolean load(DataInput input) throws IOException { CodecUtil.checkHeader(input, CODEC_NAME, VERSION_START, VERSION_START); count = input.readVLong(); byte separatorOrig = input.readByte(); if (separatorOrig != separator) { throw new IllegalStateException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig); } int gramsOrig = input.readVInt(); if (gramsOrig != grams) { throw new IllegalStateException("grams=" + grams + " is incorrect: original model was built with grams=" + gramsOrig); } totTokens = input.readVLong(); fst = new FST<Long>(input, PositiveIntOutputs.getSingleton()); return true; }
private TokenInfoDictionary() throws IOException { super(); IOException priorE = null; InputStream is = null; FST<Long> fst = null; try { is = getResource(FST_FILENAME_SUFFIX); is = new BufferedInputStream(is); fst = new FST<Long>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton()); } catch (IOException ioe) { priorE = ioe; } finally { IOUtils.closeWhileHandlingException(priorE, is); } // TODO: some way to configure? this.fst = new TokenInfoFST(fst, true); }
@Override public boolean load(InputStream input) throws IOException { DataInput in = new InputStreamDataInput(input); CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START); byte separatorOrig = in.readByte(); if (separatorOrig != separator) { throw new IllegalStateException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig); } int gramsOrig = in.readVInt(); if (gramsOrig != grams) { throw new IllegalStateException("grams=" + grams + " is incorrect: original model was built with grams=" + gramsOrig); } totTokens = in.readVLong(); fst = new FST<Long>(in, PositiveIntOutputs.getSingleton()); return true; }
@Override public void build(InputIterator iterator) throws IOException { if (iterator.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } BytesRef scratch = new BytesRef(); InputIterator iter = new WFSTInputIterator(iterator); IntsRef scratchInts = new IntsRef(); BytesRef previous = null; PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs); while ((scratch = iter.next()) != null) { long cost = iter.weight(); if (previous == null) { previous = new BytesRef(); } else if (scratch.equals(previous)) { continue; // for duplicate suggestions, the best weight is actually // added } Util.toIntsRef(scratch, scratchInts); builder.add(scratchInts, cost); previous.copyBytes(scratch); } fst = builder.finish(); }
@Override public boolean load(InputStream input) throws IOException { DataInput dataIn = new InputStreamDataInput(input); try { this.fst = new FST<>(dataIn, new PairOutputs<>( PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); maxAnalyzedPathsForOneInput = dataIn.readVInt(); hasPayloads = dataIn.readByte() == 1; } finally { IOUtils.close(input); } return true; }
@Override public boolean load(DataInput input) throws IOException { count = input.readVLong(); this.fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); maxAnalyzedPathsForOneInput = input.readVInt(); hasPayloads = input.readByte() == 1; return true; }
public XBuilder(int maxSurfaceFormsPerAnalyzedForm, boolean hasPayloads, int payloadSep) { this.payloadSep = payloadSep; this.outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()); this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs); this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm; this.hasPayloads = hasPayloads; surfaceFormsAndPayload = new SurfaceFormAndPayload[maxSurfaceFormsPerAnalyzedForm]; }
@Override public boolean load(InputStream input) throws IOException { DataInput dataIn = new InputStreamDataInput(input); try { this.fst = new FST<>(dataIn, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); maxAnalyzedPathsForOneInput = dataIn.readVInt(); hasPayloads = dataIn.readByte() == 1; } finally { IOUtils.close(input); } return true; }
TermsWriter(FieldInfo fieldInfo) { this.numTerms = 0; this.fieldInfo = fieldInfo; this.longsSize = postingsWriter.setField(fieldInfo); this.outputs = PositiveIntOutputs.getSingleton(); this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs); this.lastBlockStatsFP = 0; this.lastBlockMetaLongsFP = 0; this.lastBlockMetaBytesFP = 0; this.lastBlockLongs = new long[longsSize]; this.lastLongs = new long[longsSize]; this.lastMetaBytesFP = 0; }
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException { this.fieldInfo = fieldInfo; fstOutputs = PositiveIntOutputs.getSingleton(); fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, fstOutputs); indexStart = out.getFilePointer(); ////System.out.println("VGW: field=" + fieldInfo.name); // Always put empty string in fstBuilder.add(new IntsRef(), termsFilePointer); startTermsFilePointer = termsFilePointer; }
private void loadTermsIndex() throws IOException { if (fst == null) { IndexInput clone = in.clone(); clone.seek(indexStart); fst = new FST<>(clone, fstOutputs); clone.close(); /* final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); Util.toDot(fst, w, false, false); System.out.println("FST INDEX: SAVED to " + dotFileName); w.close(); */ if (indexDivisor > 1) { // subsample final IntsRefBuilder scratchIntsRef = new IntsRefBuilder(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs); final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst); BytesRefFSTEnum.InputOutput<Long> result; int count = indexDivisor; while((result = fstEnum.next()) != null) { if (count == indexDivisor) { builder.add(Util.toIntsRef(result.input, scratchIntsRef), result.output); count = 0; } count++; } fst = builder.finish(); } } }
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException { this.fieldInfo = fieldInfo; fstOutputs = PositiveIntOutputs.getSingleton(true); fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, fstOutputs); indexStart = out.getFilePointer(); ////System.out.println("VGW: field=" + fieldInfo.name); // Always put empty string in fstBuilder.add(new IntsRef(), termsFilePointer); startTermsFilePointer = termsFilePointer; }
private void loadTermsIndex() throws IOException { if (fst == null) { IndexInput clone = in.clone(); clone.seek(indexStart); fst = new FST<Long>(clone, fstOutputs); clone.close(); /* final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); Util.toDot(fst, w, false, false); System.out.println("FST INDEX: SAVED to " + dotFileName); w.close(); */ if (indexDivisor > 1) { // subsample final IntsRef scratchIntsRef = new IntsRef(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true); final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs); final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst); BytesRefFSTEnum.InputOutput<Long> result; int count = indexDivisor; while((result = fstEnum.next()) != null) { if (count == indexDivisor) { builder.add(Util.toIntsRef(result.input, scratchIntsRef), result.output); count = 0; } count++; } fst = builder.finish(); } } }
@Override public boolean load(InputStream input) throws IOException { DataInput dataIn = new InputStreamDataInput(input); try { this.fst = new FST<Pair<Long,BytesRef>>(dataIn, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(true), ByteSequenceOutputs.getSingleton())); maxAnalyzedPathsForOneInput = dataIn.readVInt(); } finally { IOUtils.close(input); } return true; }
@Override public boolean load(InputStream input) throws IOException { try { this.fst = new FST<Long>(new InputStreamDataInput(input), PositiveIntOutputs.getSingleton(true)); } finally { IOUtils.close(input); } return true; }
TermsWriter(FieldInfo fieldInfo) { this.numTerms = 0; this.fieldInfo = fieldInfo; this.longsSize = postingsWriter.setField(fieldInfo); this.outputs = PositiveIntOutputs.getSingleton(); this.builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs); this.lastBlockStatsFP = 0; this.lastBlockMetaLongsFP = 0; this.lastBlockMetaBytesFP = 0; this.lastBlockLongs = new long[longsSize]; this.lastLongs = new long[longsSize]; this.lastMetaBytesFP = 0; }
public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException { final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION); final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION); this.postingsReader = postingsReader; try { this.indexIn = state.directory.openInput(termsIndexFileName, state.context); this.blockIn = state.directory.openInput(termsBlockFileName, state.context); readHeader(indexIn); readHeader(blockIn); this.postingsReader.init(blockIn); seekDir(blockIn); final FieldInfos fieldInfos = state.fieldInfos; final int numFields = blockIn.readVInt(); for (int i = 0; i < numFields; i++) { FieldInfo fieldInfo = fieldInfos.fieldInfo(blockIn.readVInt()); boolean hasFreq = fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY; long numTerms = blockIn.readVLong(); long sumTotalTermFreq = hasFreq ? blockIn.readVLong() : -1; long sumDocFreq = blockIn.readVLong(); int docCount = blockIn.readVInt(); int longsSize = blockIn.readVInt(); FST<Long> index = new FST<Long>(indexIn, PositiveIntOutputs.getSingleton()); TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index); TermsReader previous = fields.put(fieldInfo.name, current); checkFieldSummary(state.segmentInfo, current, previous); } } finally { IOUtils.closeWhileHandlingException(indexIn, blockIn); } }
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException { this.fieldInfo = fieldInfo; fstOutputs = PositiveIntOutputs.getSingleton(); fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, fstOutputs); indexStart = out.getFilePointer(); ////System.out.println("VGW: field=" + fieldInfo.name); // Always put empty string in fstBuilder.add(new IntsRef(), termsFilePointer); startTermsFilePointer = termsFilePointer; }
private void loadTermsIndex() throws IOException { if (fst == null) { IndexInput clone = in.clone(); clone.seek(indexStart); fst = new FST<Long>(clone, fstOutputs); clone.close(); /* final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); Util.toDot(fst, w, false, false); System.out.println("FST INDEX: SAVED to " + dotFileName); w.close(); */ if (indexDivisor > 1) { // subsample final IntsRef scratchIntsRef = new IntsRef(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs); final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst); BytesRefFSTEnum.InputOutput<Long> result; int count = indexDivisor; while((result = fstEnum.next()) != null) { if (count == indexDivisor) { builder.add(Util.toIntsRef(result.input, scratchIntsRef), result.output); count = 0; } count++; } fst = builder.finish(); } } }
@Override public boolean load(DataInput input) throws IOException { count = input.readVLong(); this.fst = new FST<Pair<Long,BytesRef>>(input, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); maxAnalyzedPathsForOneInput = input.readVInt(); hasPayloads = input.readByte() == 1; return true; }
@Override public void build(InputIterator iterator) throws IOException { if (iterator.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } count = 0; BytesRef scratch = new BytesRef(); InputIterator iter = new WFSTInputIterator(iterator); IntsRef scratchInts = new IntsRef(); BytesRef previous = null; PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs); while ((scratch = iter.next()) != null) { long cost = iter.weight(); if (previous == null) { previous = new BytesRef(); } else if (scratch.equals(previous)) { continue; // for duplicate suggestions, the best weight is actually // added } Util.toIntsRef(scratch, scratchInts); builder.add(scratchInts, cost); previous.copyBytes(scratch); count++; } fst = builder.finish(); }