Java 类org.apache.lucene.util.fst.PairOutputs 实例源码

项目:elasticsearch_my    文件:XFuzzySuggester.java   
@Override
protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(
    List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton,
    FST<PairOutputs.Pair<Long,BytesRef>> fst)
        throws IOException {

    // TODO: right now there's no penalty for fuzzy/edits,
    // ie a completion whose prefix matched exactly what the
    // user typed gets no boost over completions that
    // required an edit, which get no boost over completions
    // requiring two edits.  I suspect a multiplicative
    // factor is appropriate (eg, say a fuzzy match must be at
    // least 2X better weight than the non-fuzzy match to
    // "compete") ... in which case I think the wFST needs
    // to be log weights or something ...

    Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
/*
  Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
  w.write(levA.toDot());
  w.close();
  System.out.println("Wrote LevA to out.dot");
*/
    return FSTUtil.intersectPrefixPaths(levA, fst);
}
项目:Elasticsearch    文件:XFuzzySuggester.java   
@Override
protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths,
                                                                                 Automaton lookupAutomaton,
                                                                                 FST<PairOutputs.Pair<Long,BytesRef>> fst)
        throws IOException {

    // TODO: right now there's no penalty for fuzzy/edits,
    // ie a completion whose prefix matched exactly what the
    // user typed gets no boost over completions that
    // required an edit, which get no boost over completions
    // requiring two edits.  I suspect a multiplicative
    // factor is appropriate (eg, say a fuzzy match must be at
    // least 2X better weight than the non-fuzzy match to
    // "compete") ... in which case I think the wFST needs
    // to be log weights or something ...

    Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
/*
  Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
  w.write(levA.toDot());
  w.close();
  System.out.println("Wrote LevA to out.dot");
*/
    return FSTUtil.intersectPrefixPaths(levA, fst);
}
项目:search    文件:SimpleTextFieldsReader.java   
@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {

  //System.out.println("seek to text=" + text.utf8ToString());
  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekCeil(text);
  if (result == null) {
    //System.out.println("  end");
    return SeekStatus.END;
  } else {
    //System.out.println("  got text=" + term.utf8ToString());
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;

    if (result.input.equals(text)) {
      //System.out.println("  match docsStart=" + docsStart);
      return SeekStatus.FOUND;
    } else {
      //System.out.println("  not match docsStart=" + docsStart);
      return SeekStatus.NOT_FOUND;
    }
  }
}
项目:NYBC    文件:SimpleTextFieldsReader.java   
@Override
public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {

  //System.out.println("seek to text=" + text.utf8ToString());
  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekCeil(text);
  if (result == null) {
    //System.out.println("  end");
    return SeekStatus.END;
  } else {
    //System.out.println("  got text=" + term.utf8ToString());
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;

    if (result.input.equals(text)) {
      //System.out.println("  match docsStart=" + docsStart);
      return SeekStatus.FOUND;
    } else {
      //System.out.println("  not match docsStart=" + docsStart);
      return SeekStatus.NOT_FOUND;
    }
  }
}
项目:read-open-source-code    文件:SimpleTextFieldsReader.java   
@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {

  //System.out.println("seek to text=" + text.utf8ToString());
  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekCeil(text);
  if (result == null) {
    //System.out.println("  end");
    return SeekStatus.END;
  } else {
    //System.out.println("  got text=" + term.utf8ToString());
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;

    if (result.input.equals(text)) {
      //System.out.println("  match docsStart=" + docsStart);
      return SeekStatus.FOUND;
    } else {
      //System.out.println("  not match docsStart=" + docsStart);
      return SeekStatus.NOT_FOUND;
    }
  }
}
项目:read-open-source-code    文件:SimpleTextFieldsReader.java   
@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {

  //System.out.println("seek to text=" + text.utf8ToString());
  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekCeil(text);
  if (result == null) {
    //System.out.println("  end");
    return SeekStatus.END;
  } else {
    //System.out.println("  got text=" + term.utf8ToString());
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;

    if (result.input.equals(text)) {
      //System.out.println("  match docsStart=" + docsStart);
      return SeekStatus.FOUND;
    } else {
      //System.out.println("  not match docsStart=" + docsStart);
      return SeekStatus.NOT_FOUND;
    }
  }
}
项目:read-open-source-code    文件:SimpleTextFieldsReader.java   
@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {

  //System.out.println("seek to text=" + text.utf8ToString());
  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekCeil(text);
  if (result == null) {
    //System.out.println("  end");
    return SeekStatus.END;
  } else {
    //System.out.println("  got text=" + term.utf8ToString());
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;

    if (result.input.equals(text)) {
      //System.out.println("  match docsStart=" + docsStart);
      return SeekStatus.FOUND;
    } else {
      //System.out.println("  not match docsStart=" + docsStart);
      return SeekStatus.NOT_FOUND;
    }
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:SimpleTextFieldsReader.java   
@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {

  //System.out.println("seek to text=" + text.utf8ToString());
  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekCeil(text);
  if (result == null) {
    //System.out.println("  end");
    return SeekStatus.END;
  } else {
    //System.out.println("  got text=" + term.utf8ToString());
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;

    if (result.input.equals(text)) {
      //System.out.println("  match docsStart=" + docsStart);
      return SeekStatus.FOUND;
    } else {
      //System.out.println("  not match docsStart=" + docsStart);
      return SeekStatus.NOT_FOUND;
    }
  }
}
项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
@Override
public boolean load(InputStream input) throws IOException {
  DataInput dataIn = new InputStreamDataInput(input);
  try {
    this.fst = new FST<>(dataIn, new PairOutputs<>(
        PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
    maxAnalyzedPathsForOneInput = dataIn.readVInt();
    hasPayloads = dataIn.readByte() == 1;
  } finally {
    IOUtils.close(input);
  }
  return true;
}
项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
@Override
public boolean load(DataInput input) throws IOException {
  count = input.readVLong();
  this.fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
  maxAnalyzedPathsForOneInput = input.readVInt();
  hasPayloads = input.readByte() == 1;
  return true;
}
项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
public XBuilder(int maxSurfaceFormsPerAnalyzedForm, boolean hasPayloads, int payloadSep) {
    this.payloadSep = payloadSep;
    this.outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
    this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
    this.hasPayloads = hasPayloads;
    surfaceFormsAndPayload = new SurfaceFormAndPayload[maxSurfaceFormsPerAnalyzedForm];

}
项目:Elasticsearch    文件:XAnalyzingSuggester.java   
@Override
public boolean load(InputStream input) throws IOException {
  DataInput dataIn = new InputStreamDataInput(input);
  try {
    this.fst = new FST<>(dataIn, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
    maxAnalyzedPathsForOneInput = dataIn.readVInt();
    hasPayloads = dataIn.readByte() == 1;
  } finally {
    IOUtils.close(input);
  }
  return true;
}
项目:Elasticsearch    文件:XAnalyzingSuggester.java   
@Override
public boolean load(DataInput input) throws IOException {
  count = input.readVLong();
  this.fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
  maxAnalyzedPathsForOneInput = input.readVInt();
  hasPayloads = input.readByte() == 1;
  return true;
}
项目:Elasticsearch    文件:XAnalyzingSuggester.java   
public XBuilder(int maxSurfaceFormsPerAnalyzedForm, boolean hasPayloads, int payloadSep) {
    this.payloadSep = payloadSep;
    this.outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
    this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
    this.hasPayloads = hasPayloads;
    surfaceFormsAndPayload = new SurfaceFormAndPayload[maxSurfaceFormsPerAnalyzedForm];

}
项目:search    文件:SimpleTextFieldsReader.java   
@Override
public boolean seekExact(BytesRef text) throws IOException {

  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekExact(text);
  if (result != null) {
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;
    return true;
  } else {
    return false;
  }
}
项目:search    文件:AnalyzingSuggester.java   
@Override
public boolean load(DataInput input) throws IOException {
  count = input.readVLong();
  this.fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
  maxAnalyzedPathsForOneInput = input.readVInt();
  hasPayloads = input.readByte() == 1;
  return true;
}
项目:NYBC    文件:SimpleTextFieldsReader.java   
@Override
public boolean seekExact(BytesRef text, boolean useCache /* ignored */) throws IOException {

  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekExact(text);
  if (result != null) {
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;
    return true;
  } else {
    return false;
  }
}
项目:NYBC    文件:AnalyzingSuggester.java   
@Override
public boolean load(InputStream input) throws IOException {
  DataInput dataIn = new InputStreamDataInput(input);
  try {
    this.fst = new FST<Pair<Long,BytesRef>>(dataIn, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(true), ByteSequenceOutputs.getSingleton()));
    maxAnalyzedPathsForOneInput = dataIn.readVInt();
  } finally {
    IOUtils.close(input);
  }
  return true;
}
项目:read-open-source-code    文件:SimpleTextFieldsReader.java   
@Override
public boolean seekExact(BytesRef text) throws IOException {

  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekExact(text);
  if (result != null) {
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;
    return true;
  } else {
    return false;
  }
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
@Override
public boolean load(DataInput input) throws IOException {
  count = input.readVLong();
  this.fst = new FST<Pair<Long,BytesRef>>(input, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
  maxAnalyzedPathsForOneInput = input.readVInt();
  hasPayloads = input.readByte() == 1;
  return true;
}
项目:read-open-source-code    文件:SimpleTextFieldsReader.java   
@Override
public boolean seekExact(BytesRef text) throws IOException {

  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekExact(text);
  if (result != null) {
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;
    return true;
  } else {
    return false;
  }
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
@Override
public boolean load(DataInput input) throws IOException {
  count = input.readVLong();
  this.fst = new FST<Pair<Long,BytesRef>>(input, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
  maxAnalyzedPathsForOneInput = input.readVInt();
  hasPayloads = input.readByte() == 1;
  return true;
}
项目:read-open-source-code    文件:SimpleTextFieldsReader.java   
@Override
public boolean seekExact(BytesRef text) throws IOException {

  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekExact(text);
  if (result != null) {
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;
    return true;
  } else {
    return false;
  }
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
@Override
public boolean load(DataInput input) throws IOException {
  count = input.readVLong();
  this.fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
  maxAnalyzedPathsForOneInput = input.readVInt();
  hasPayloads = input.readByte() == 1;
  return true;
}
项目:Maskana-Gestor-de-Conocimiento    文件:SimpleTextFieldsReader.java   
@Override
public boolean seekExact(BytesRef text) throws IOException {

  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekExact(text);
  if (result != null) {
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;
    return true;
  } else {
    return false;
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:AnalyzingSuggester.java   
@Override
public boolean load(InputStream input) throws IOException {
  DataInput dataIn = new InputStreamDataInput(input);
  try {
    this.fst = new FST<Pair<Long,BytesRef>>(dataIn, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
    maxAnalyzedPathsForOneInput = dataIn.readVInt();
    hasPayloads = dataIn.readByte() == 1;
  } finally {
    IOUtils.close(input);
  }
  return true;
}
项目:elasticsearch_my    文件:XFuzzySuggester.java   
/**
 * Creates a {@link FuzzySuggester} instance.
 *
 * @param indexAnalyzer Analyzer that will be used for
 *        analyzing suggestions while building the index.
 * @param queryAnalyzer Analyzer that will be used for
 *        analyzing query text during lookup
 * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
 * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
 *        surface forms to keep for a single analyzed form.
 *        When there are too many surface forms we discard the
 *        lowest weighted ones.
 * @param maxGraphExpansions Maximum number of graph paths
 *        to expand from the analyzed form.  Set this to -1 for
 *        no limit.
 * @param maxEdits must be &gt;= 0 and &lt;= {@link org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
 * @param transpositions <code>true</code> if transpositions should be treated as a primitive
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
 * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
 * @param sepLabel separation label
 * @param payloadSep payload separator byte
 * @param endByte end byte marker byte
 */
public XFuzzySuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer,
                       int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
                       int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength,
                       boolean unicodeAware, FST<PairOutputs.Pair<Long, BytesRef>> fst, boolean hasPayloads,
                       int maxAnalyzedPathsForOneInput, int sepLabel, int payloadSep, int endByte, int holeCharacter) {
    super(indexAnalyzer, queryPrefix, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions,
        true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter);
    if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
        throw new IllegalArgumentException(
            "maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    }
    if (nonFuzzyPrefix < 0) {
        throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
    }
    if (minFuzzyLength < 0) {
        throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
    }

    this.maxEdits = maxEdits;
    this.transpositions = transpositions;
    this.nonFuzzyPrefix = nonFuzzyPrefix;
    this.minFuzzyLength = minFuzzyLength;
    this.unicodeAware = unicodeAware;
}
项目:Elasticsearch    文件:XFuzzySuggester.java   
/**
 * Creates a {@link FuzzySuggester} instance.
 *
 * @param indexAnalyzer Analyzer that will be used for
 *        analyzing suggestions while building the index.
 * @param queryAnalyzer Analyzer that will be used for
 *        analyzing query text during lookup
 * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
 * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
 *        surface forms to keep for a single analyzed form.
 *        When there are too many surface forms we discard the
 *        lowest weighted ones.
 * @param maxGraphExpansions Maximum number of graph paths
 *        to expand from the analyzed form.  Set this to -1 for
 *        no limit.
 * @param maxEdits must be &gt;= 0 and &lt;= {@link org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
 * @param transpositions <code>true</code> if transpositions should be treated as a primitive
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
 * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
 * @param sepLabel separation label
 * @param payloadSep payload separator byte
 * @param endByte end byte marker byte
 */
public XFuzzySuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
                       int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware,
                       FST<PairOutputs.Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput,
                       int sepLabel, int payloadSep, int endByte, int holeCharacter) {
    super(indexAnalyzer, queryPrefix, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter);
    if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
        throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    }
    if (nonFuzzyPrefix < 0) {
        throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
    }
    if (minFuzzyLength < 0) {
        throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
    }

    this.maxEdits = maxEdits;
    this.transpositions = transpositions;
    this.nonFuzzyPrefix = nonFuzzyPrefix;
    this.minFuzzyLength = minFuzzyLength;
    this.unicodeAware = unicodeAware;
}
项目:search    文件:SimpleTextFieldsReader.java   
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, IndexOptions indexOptions) {
  this.indexOptions = indexOptions;
  fstEnum = new BytesRefFSTEnum<>(fst);
}
项目:search    文件:SimpleTextFieldsReader.java   
private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
  final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<>(posIntOutputs,
                                                                                                                  outputsInner);
  b = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRefBuilder lastTerm = new BytesRefBuilder();
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.get().equals(END) || StringHelper.startsWith(scratch.get(), FIELD)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef),
              outputs.newPair(lastDocsStart,
                              outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch.get(), DOC)) {
      docFreq++;
      sumDocFreq++;
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch.get(), FREQ)) {
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
    } else if (StringHelper.startsWith(scratch.get(), TERM)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
                                                                        outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length() - TERM.length;
      lastTerm.grow(len);
      System.arraycopy(scratch.bytes(), TERM.length, lastTerm.bytes(), 0, len);
      lastTerm.setLength(len);
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = visitedDocs.cardinality();
  fst = b.finish();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}
项目:NYBC    文件:SimpleTextFieldsReader.java   
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, IndexOptions indexOptions) {
  this.indexOptions = indexOptions;
  fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(fst);
}
项目:NYBC    文件:SimpleTextFieldsReader.java   
private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
  final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
                                                                                                                  outputsInner);
  b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRef lastTerm = new BytesRef(10);
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  OpenBitSet visitedDocs = new OpenBitSet();
  final IntsRef scratchIntsRef = new IntsRef();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef),
              outputs.newPair(lastDocsStart,
                              outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch, DOC)) {
      docFreq++;
      sumDocFreq++;
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch, FREQ)) {
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
    } else if (StringHelper.startsWith(scratch, TERM)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair(lastDocsStart,
                                                                        outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length - TERM.length;
      if (len > lastTerm.length) {
        lastTerm.grow(len);
      }
      System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
      lastTerm.length = len;
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = (int) visitedDocs.cardinality();
  fst = b.finish();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}
项目:read-open-source-code    文件:SimpleTextFieldsReader.java   
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, IndexOptions indexOptions) {
  this.indexOptions = indexOptions;
  fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(fst);
}
项目:read-open-source-code    文件:SimpleTextFieldsReader.java   
private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
  final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
                                                                                                                  outputsInner);
  b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRef lastTerm = new BytesRef(10);
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
  final IntsRef scratchIntsRef = new IntsRef();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef),
              outputs.newPair(lastDocsStart,
                              outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch, DOC)) {
      docFreq++;
      sumDocFreq++;
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch, FREQ)) {
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
    } else if (StringHelper.startsWith(scratch, TERM)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair(lastDocsStart,
                                                                        outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length - TERM.length;
      if (len > lastTerm.length) {
        lastTerm.grow(len);
      }
      System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
      lastTerm.length = len;
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = (int) visitedDocs.cardinality();
  fst = b.finish();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}
项目:read-open-source-code    文件:SimpleTextFieldsReader.java   
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, IndexOptions indexOptions) {
  this.indexOptions = indexOptions;
  fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(fst);
}
项目:read-open-source-code    文件:SimpleTextFieldsReader.java   
private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
  final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
                                                                                                                  outputsInner);
  b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRef lastTerm = new BytesRef(10);
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
  final IntsRef scratchIntsRef = new IntsRef();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef),
              outputs.newPair(lastDocsStart,
                              outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch, DOC)) {
      docFreq++;
      sumDocFreq++;
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch, FREQ)) {
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
    } else if (StringHelper.startsWith(scratch, TERM)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair(lastDocsStart,
                                                                        outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length - TERM.length;
      if (len > lastTerm.length) {
        lastTerm.grow(len);
      }
      System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
      lastTerm.length = len;
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = (int) visitedDocs.cardinality();
  fst = b.finish();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}
项目:read-open-source-code    文件:SimpleTextFieldsReader.java   
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, IndexOptions indexOptions) {
  this.indexOptions = indexOptions;
  fstEnum = new BytesRefFSTEnum<>(fst);
}
项目:read-open-source-code    文件:SimpleTextFieldsReader.java   
private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
  final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<>(posIntOutputs,
                                                                                                                  outputsInner);
  b = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRefBuilder lastTerm = new BytesRefBuilder();
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.get().equals(END) || StringHelper.startsWith(scratch.get(), FIELD)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef),
              outputs.newPair(lastDocsStart,
                              outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch.get(), DOC)) {
      docFreq++;
      sumDocFreq++;
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch.get(), FREQ)) {
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
    } else if (StringHelper.startsWith(scratch.get(), TERM)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
                                                                        outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length() - TERM.length;
      lastTerm.grow(len);
      System.arraycopy(scratch.bytes(), TERM.length, lastTerm.bytes(), 0, len);
      lastTerm.setLength(len);
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = visitedDocs.cardinality();
  fst = b.finish();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}
项目:Maskana-Gestor-de-Conocimiento    文件:SimpleTextFieldsReader.java   
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, IndexOptions indexOptions) {
  this.indexOptions = indexOptions;
  fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(fst);
}
项目:Maskana-Gestor-de-Conocimiento    文件:SimpleTextFieldsReader.java   
private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
  final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
                                                                                                                  outputsInner);
  b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRef lastTerm = new BytesRef(10);
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  OpenBitSet visitedDocs = new OpenBitSet();
  final IntsRef scratchIntsRef = new IntsRef();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef),
              outputs.newPair(lastDocsStart,
                              outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch, DOC)) {
      docFreq++;
      sumDocFreq++;
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch, FREQ)) {
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
    } else if (StringHelper.startsWith(scratch, TERM)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair(lastDocsStart,
                                                                        outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length - TERM.length;
      if (len > lastTerm.length) {
        lastTerm.grow(len);
      }
      System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
      lastTerm.length = len;
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = (int) visitedDocs.cardinality();
  fst = b.finish();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}