Java 类org.apache.lucene.util.fst.Builder 实例源码

项目:lams    文件:NormalizeCharMap.java   
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
项目:lams    文件:Dictionary.java   
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
  Map<String,String> mappings = new TreeMap<>();

  for (int i = 0; i < num; i++) {
    String line = reader.readLine();
    String parts[] = line.split("\\s+");
    if (parts.length != 3) {
      throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
    }
    if (mappings.put(parts[1], parts[2]) != null) {
      throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
    }
  }

  Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
  Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  for (Map.Entry<String,String> entry : mappings.entrySet()) {
    Util.toUTF16(entry.getKey(), scratchInts);
    builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
  }

  return builder.finish();
}
项目:search    文件:MemoryDocValuesConsumer.java   
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
项目:search    文件:NormalizeCharMap.java   
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
项目:search    文件:Dictionary.java   
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
  Map<String,String> mappings = new TreeMap<>();

  for (int i = 0; i < num; i++) {
    String line = reader.readLine();
    String parts[] = line.split("\\s+");
    if (parts.length != 3) {
      throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
    }
    if (mappings.put(parts[1], parts[2]) != null) {
      throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
    }
  }

  Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
  Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  for (Map.Entry<String,String> entry : mappings.entrySet()) {
    Util.toUTF16(entry.getKey(), scratchInts);
    builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
  }

  return builder.finish();
}
项目:search    文件:Lucene42DocValuesConsumer.java   
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
项目:NYBC    文件:WFSTCompletionLookup.java   
@Override
public void build(TermFreqIterator iterator) throws IOException {
  BytesRef scratch = new BytesRef();
  TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
  IntsRef scratchInts = new IntsRef();
  BytesRef previous = null;
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
  Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
  while ((scratch = iter.next()) != null) {
    long cost = iter.weight();

    if (previous == null) {
      previous = new BytesRef();
    } else if (scratch.equals(previous)) {
      continue; // for duplicate suggestions, the best weight is actually
                // added
    }
    Util.toIntsRef(scratch, scratchInts);
    builder.add(scratchInts, cost);
    previous.copyBytes(scratch);
  }
  fst = builder.finish();
}
项目:NYBC    文件:NormalizeCharMap.java   
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRef scratch = new IntsRef();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
项目:NYBC    文件:BlockTreeTermsWriter.java   
TermsWriter(FieldInfo fieldInfo) {
  this.fieldInfo = fieldInfo;

  noOutputs = NoOutputs.getSingleton();

  // This Builder is just used transiently to fragment
  // terms into "good" blocks; we don't save the
  // resulting FST:
  blockBuilder = new Builder<Object>(FST.INPUT_TYPE.BYTE1,
                                     0, 0, true,
                                     true, Integer.MAX_VALUE,
                                     noOutputs,
                                     new FindBlocks(), false,
                                     PackedInts.COMPACT,
                                     true, 15);

  postingsWriter.setField(fieldInfo);
}
项目:NYBC    文件:Lucene42DocValuesConsumer.java   
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
  Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
  IntsRef scratch = new IntsRef();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
项目:read-open-source-code    文件:MemoryDocValuesConsumer.java   
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
  IntsRef scratch = new IntsRef();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
项目:read-open-source-code    文件:BlockTreeTermsWriter.java   
TermsWriter(FieldInfo fieldInfo) {
  this.fieldInfo = fieldInfo;

  noOutputs = NoOutputs.getSingleton();

  // This Builder is just used transiently to fragment
  // terms into "good" blocks; we don't save the
  // resulting FST:
  blockBuilder = new Builder<Object>(FST.INPUT_TYPE.BYTE1,
                                     0, 0, true,
                                     true, Integer.MAX_VALUE,
                                     noOutputs,
                                     new FindBlocks(), false,
                                     PackedInts.COMPACT,
                                     true, 15);

  this.longsSize = postingsWriter.setField(fieldInfo);
}
项目:read-open-source-code    文件:NormalizeCharMap.java   
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRef scratch = new IntsRef();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
项目:read-open-source-code    文件:MemoryDocValuesConsumer.java   
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
  IntsRef scratch = new IntsRef();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
项目:read-open-source-code    文件:BlockTreeTermsWriter.java   
TermsWriter(FieldInfo fieldInfo) {
  this.fieldInfo = fieldInfo;

  noOutputs = NoOutputs.getSingleton();

  // This Builder is just used transiently to fragment
  // terms into "good" blocks; we don't save the
  // resulting FST:
  blockBuilder = new Builder<Object>(FST.INPUT_TYPE.BYTE1,
                                     0, 0, true,
                                     true, Integer.MAX_VALUE,
                                     noOutputs,
                                     new FindBlocks(), false,
                                     PackedInts.COMPACT,
                                     true, 15);

  this.longsSize = postingsWriter.setField(fieldInfo);
}
项目:read-open-source-code    文件:NormalizeCharMap.java   
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRef scratch = new IntsRef();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
项目:read-open-source-code    文件:MemoryDocValuesConsumer.java   
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
项目:read-open-source-code    文件:NormalizeCharMap.java   
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
项目:read-open-source-code    文件:Dictionary.java   
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
  Map<String,String> mappings = new TreeMap<>();

  for (int i = 0; i < num; i++) {
    String line = reader.readLine();
    String parts[] = line.split("\\s+");
    if (parts.length != 3) {
      throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
    }
    if (mappings.put(parts[1], parts[2]) != null) {
      throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
    }
  }

  Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
  Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  for (Map.Entry<String,String> entry : mappings.entrySet()) {
    Util.toUTF16(entry.getKey(), scratchInts);
    builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
  }

  return builder.finish();
}
项目:Maskana-Gestor-de-Conocimiento    文件:MemoryDocValuesConsumer.java   
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
  IntsRef scratch = new IntsRef();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
项目:Maskana-Gestor-de-Conocimiento    文件:WFSTCompletionLookup.java   
@Override
public void build(InputIterator iterator) throws IOException {
  if (iterator.hasPayloads()) {
    throw new IllegalArgumentException("this suggester doesn't support payloads");
  }
  BytesRef scratch = new BytesRef();
  InputIterator iter = new WFSTInputIterator(iterator);
  IntsRef scratchInts = new IntsRef();
  BytesRef previous = null;
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
  while ((scratch = iter.next()) != null) {
    long cost = iter.weight();

    if (previous == null) {
      previous = new BytesRef();
    } else if (scratch.equals(previous)) {
      continue; // for duplicate suggestions, the best weight is actually
                // added
    }
    Util.toIntsRef(scratch, scratchInts);
    builder.add(scratchInts, cost);
    previous.copyBytes(scratch);
  }
  fst = builder.finish();
}
项目:Maskana-Gestor-de-Conocimiento    文件:NormalizeCharMap.java   
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRef scratch = new IntsRef();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
项目:Maskana-Gestor-de-Conocimiento    文件:Lucene42DocValuesConsumer.java   
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
  IntsRef scratch = new IntsRef();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
项目:Maskana-Gestor-de-Conocimiento    文件:BlockTreeTermsWriter.java   
TermsWriter(FieldInfo fieldInfo) {
  this.fieldInfo = fieldInfo;

  noOutputs = NoOutputs.getSingleton();

  // This Builder is just used transiently to fragment
  // terms into "good" blocks; we don't save the
  // resulting FST:
  blockBuilder = new Builder<Object>(FST.INPUT_TYPE.BYTE1,
                                     0, 0, true,
                                     true, Integer.MAX_VALUE,
                                     noOutputs,
                                     new FindBlocks(), false,
                                     PackedInts.COMPACT,
                                     true, 15);

  postingsWriter.setField(fieldInfo);
}
项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
public XBuilder(int maxSurfaceFormsPerAnalyzedForm, boolean hasPayloads, int payloadSep) {
    this.payloadSep = payloadSep;
    this.outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
    this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
    this.hasPayloads = hasPayloads;
    surfaceFormsAndPayload = new SurfaceFormAndPayload[maxSurfaceFormsPerAnalyzedForm];

}
项目:lams    文件:BlockTreeTermsWriter.java   
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
}
项目:lams    文件:Dictionary.java   
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
  IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
  Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
    Util.toUTF32(entry.getKey(), scratch);
    List<Integer> entries = entry.getValue();
    IntsRef output = new IntsRef(entries.size());
    for (Integer c : entries) {
      output.ints[output.length++] = c;
    }
    builder.add(scratch.get(), output);
  }
  return builder.finish();
}
项目:Elasticsearch    文件:XAnalyzingSuggester.java   
public XBuilder(int maxSurfaceFormsPerAnalyzedForm, boolean hasPayloads, int payloadSep) {
    this.payloadSep = payloadSep;
    this.outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
    this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
    this.hasPayloads = hasPayloads;
    surfaceFormsAndPayload = new SurfaceFormAndPayload[maxSurfaceFormsPerAnalyzedForm];

}
项目:search    文件:FSTTermsWriter.java   
TermsWriter(FieldInfo fieldInfo) {
  this.numTerms = 0;
  this.fieldInfo = fieldInfo;
  this.longsSize = postingsWriter.setField(fieldInfo);
  this.outputs = new FSTTermOutputs(fieldInfo, longsSize);
  this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
}
项目:search    文件:MemoryPostingsFormat.java   
public TermsWriter(IndexOutput out, FieldInfo field, boolean doPackFST, float acceptableOverheadRatio) {
  this.out = out;
  this.field = field;
  this.doPackFST = doPackFST;
  this.acceptableOverheadRatio = acceptableOverheadRatio;
  builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, doPackFST, acceptableOverheadRatio, true, 15);
}
项目:search    文件:FSTOrdTermsWriter.java   
TermsWriter(FieldInfo fieldInfo) {
  this.numTerms = 0;
  this.fieldInfo = fieldInfo;
  this.longsSize = postingsWriter.setField(fieldInfo);
  this.outputs = PositiveIntOutputs.getSingleton();
  this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);

  this.lastBlockStatsFP = 0;
  this.lastBlockMetaLongsFP = 0;
  this.lastBlockMetaBytesFP = 0;
  this.lastBlockLongs = new long[longsSize];

  this.lastLongs = new long[longsSize];
  this.lastMetaBytesFP = 0;
}
项目:search    文件:OrdsBlockTreeTermsWriter.java   
private void append(Builder<Output> builder, FST<Output> subIndex, long termOrdOffset, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<Output> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<Output> indexEnt;
  while ((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    Output output = indexEnt.output;
    long blockTermCount = output.endOrd - output.startOrd + 1;
    Output newOutput = FST_OUTPUTS.newOutput(output.bytes, termOrdOffset+output.startOrd, output.endOrd-termOrdOffset);
    //System.out.println("  append sub=" + indexEnt.input + " output=" + indexEnt.output + " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount  + " newOutput=" + newOutput  + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
    builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput);
  }
}
项目:search    文件:VariableGapTermsIndexWriter.java   
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
  this.fieldInfo = fieldInfo;
  fstOutputs = PositiveIntOutputs.getSingleton();
  fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, fstOutputs);
  indexStart = out.getFilePointer();
  ////System.out.println("VGW: field=" + fieldInfo.name);

  // Always put empty string in
  fstBuilder.add(new IntsRef(), termsFilePointer);
  startTermsFilePointer = termsFilePointer;
}
项目:search    文件:VariableGapTermsIndexReader.java   
private void loadTermsIndex() throws IOException {
  if (fst == null) {
    IndexInput clone = in.clone();
    clone.seek(indexStart);
    fst = new FST<>(clone, fstOutputs);
    clone.close();

    /*
    final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
    Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
    Util.toDot(fst, w, false, false);
    System.out.println("FST INDEX: SAVED to " + dotFileName);
    w.close();
    */

    if (indexDivisor > 1) {
      // subsample
      final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
      final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
      final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
      BytesRefFSTEnum.InputOutput<Long> result;
      int count = indexDivisor;
      while((result = fstEnum.next()) != null) {
        if (count == indexDivisor) {
          builder.add(Util.toIntsRef(result.input, scratchIntsRef), result.output);
          count = 0;
        }
        count++;
      }
      fst = builder.finish();
    }
  }
}
项目:search    文件:VersionBlockTreeTermsWriter.java   
private void append(Builder<Pair<BytesRef,Long>> builder, FST<Pair<BytesRef,Long>> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<Pair<BytesRef,Long>> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<Pair<BytesRef,Long>> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
}
项目:search    文件:Dictionary.java   
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
  IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
  Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
    Util.toUTF32(entry.getKey(), scratch);
    List<Integer> entries = entry.getValue();
    IntsRef output = new IntsRef(entries.size());
    for (Integer c : entries) {
      output.ints[output.length++] = c;
    }
    builder.add(scratch.get(), output);
  }
  return builder.finish();
}
项目:search    文件:BlockTreeTermsWriter.java   
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
}
项目:NYBC    文件:MemoryPostingsFormat.java   
public TermsWriter(IndexOutput out, FieldInfo field, boolean doPackFST, float acceptableOverheadRatio) {
  this.out = out;
  this.field = field;
  this.doPackFST = doPackFST;
  this.acceptableOverheadRatio = acceptableOverheadRatio;
  builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio, true, 15);
}
项目:NYBC    文件:VariableGapTermsIndexWriter.java   
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
  this.fieldInfo = fieldInfo;
  fstOutputs = PositiveIntOutputs.getSingleton(true);
  fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, fstOutputs);
  indexStart = out.getFilePointer();
  ////System.out.println("VGW: field=" + fieldInfo.name);

  // Always put empty string in
  fstBuilder.add(new IntsRef(), termsFilePointer);
  startTermsFilePointer = termsFilePointer;
}
项目:NYBC    文件:VariableGapTermsIndexReader.java   
private void loadTermsIndex() throws IOException {
  if (fst == null) {
    IndexInput clone = in.clone();
    clone.seek(indexStart);
    fst = new FST<Long>(clone, fstOutputs);
    clone.close();

    /*
    final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
    Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
    Util.toDot(fst, w, false, false);
    System.out.println("FST INDEX: SAVED to " + dotFileName);
    w.close();
    */

    if (indexDivisor > 1) {
      // subsample
      final IntsRef scratchIntsRef = new IntsRef();
      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
      final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
      final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
      BytesRefFSTEnum.InputOutput<Long> result;
      int count = indexDivisor;
      while((result = fstEnum.next()) != null) {
        if (count == indexDivisor) {
          builder.add(Util.toIntsRef(result.input, scratchIntsRef), result.output);
          count = 0;
        }
        count++;
      }
      fst = builder.finish();
    }
  }
}