Java 类org.apache.lucene.util.fst.PairOutputs.Pair 实例源码

项目:search    文件:FuzzySuggester.java   
@Override
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {

  // TODO: right now there's no penalty for fuzzy/edits,
  // ie a completion whose prefix matched exactly what the
  // user typed gets no boost over completions that
  // required an edit, which get no boost over completions
  // requiring two edits.  I suspect a multiplicative
  // factor is appropriate (eg, say a fuzzy match must be at
  // least 2X better weight than the non-fuzzy match to
  // "compete") ... in which case I think the wFST needs
  // to be log weights or something ...

  Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
  /*
    Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8);
    w.write(levA.toDot());
    w.close();
    System.out.println("Wrote LevA to out.dot");
  */
  return FSTUtil.intersectPrefixPaths(levA, fst);
}
项目:search    文件:IDVersionSegmentTermsEnum.java   
IDVersionSegmentTermsEnumFrame pushFrame(FST.Arc<Pair<BytesRef,Long>> arc, Pair<BytesRef,Long> frameData, int length) throws IOException {
  scratchReader.reset(frameData.output1.bytes, frameData.output1.offset, frameData.output1.length);
  final long code = scratchReader.readVLong();
  final long fpSeek = code >>> VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
  final IDVersionSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
  f.maxIDVersion = Long.MAX_VALUE - frameData.output2;
  f.hasTerms = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
  f.hasTermsOrig = f.hasTerms;
  f.isFloor = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
  if (f.isFloor) {
    f.setFloorData(scratchReader, frameData.output1);
  }
  pushFrame(arc, fpSeek, length);

  return f;
}
项目:NYBC    文件:FuzzySuggester.java   
@Override
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {

  // TODO: right now there's no penalty for fuzzy/edits,
  // ie a completion whose prefix matched exactly what the
  // user typed gets no boost over completions that
  // required an edit, which get no boost over completions
  // requiring two edits.  I suspect a multiplicative
  // factor is appropriate (eg, say a fuzzy match must be at
  // least 2X better weight than the non-fuzzy match to
  // "compete") ... in which case I think the wFST needs
  // to be log weights or something ...

  Automaton levA = toLevenshteinAutomata(lookupAutomaton);
  /*
    Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
    w.write(levA.toDot());
    w.close();
    System.out.println("Wrote LevA to out.dot");
  */
  return FSTUtil.intersectPrefixPaths(levA, fst);
}
项目:read-open-source-code    文件:FuzzySuggester.java   
@Override
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {

  // TODO: right now there's no penalty for fuzzy/edits,
  // ie a completion whose prefix matched exactly what the
  // user typed gets no boost over completions that
  // required an edit, which get no boost over completions
  // requiring two edits.  I suspect a multiplicative
  // factor is appropriate (eg, say a fuzzy match must be at
  // least 2X better weight than the non-fuzzy match to
  // "compete") ... in which case I think the wFST needs
  // to be log weights or something ...

  Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
  /*
    Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
    w.write(levA.toDot());
    w.close();
    System.out.println("Wrote LevA to out.dot");
  */
  return FSTUtil.intersectPrefixPaths(levA, fst);
}
项目:read-open-source-code    文件:FuzzySuggester.java   
@Override
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {

  // TODO: right now there's no penalty for fuzzy/edits,
  // ie a completion whose prefix matched exactly what the
  // user typed gets no boost over completions that
  // required an edit, which get no boost over completions
  // requiring two edits.  I suspect a multiplicative
  // factor is appropriate (eg, say a fuzzy match must be at
  // least 2X better weight than the non-fuzzy match to
  // "compete") ... in which case I think the wFST needs
  // to be log weights or something ...

  Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
  /*
    Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
    w.write(levA.toDot());
    w.close();
    System.out.println("Wrote LevA to out.dot");
  */
  return FSTUtil.intersectPrefixPaths(levA, fst);
}
项目:read-open-source-code    文件:IDVersionSegmentTermsEnum.java   
IDVersionSegmentTermsEnumFrame pushFrame(FST.Arc<Pair<BytesRef,Long>> arc, Pair<BytesRef,Long> frameData, int length) throws IOException {
  scratchReader.reset(frameData.output1.bytes, frameData.output1.offset, frameData.output1.length);
  final long code = scratchReader.readVLong();
  final long fpSeek = code >>> VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
  final IDVersionSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
  f.maxIDVersion = Long.MAX_VALUE - frameData.output2;
  f.hasTerms = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
  f.hasTermsOrig = f.hasTerms;
  f.isFloor = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
  if (f.isFloor) {
    f.setFloorData(scratchReader, frameData.output1);
  }
  pushFrame(arc, fpSeek, length);

  return f;
}
项目:read-open-source-code    文件:FuzzySuggester.java   
@Override
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {

  // TODO: right now there's no penalty for fuzzy/edits,
  // ie a completion whose prefix matched exactly what the
  // user typed gets no boost over completions that
  // required an edit, which get no boost over completions
  // requiring two edits.  I suspect a multiplicative
  // factor is appropriate (eg, say a fuzzy match must be at
  // least 2X better weight than the non-fuzzy match to
  // "compete") ... in which case I think the wFST needs
  // to be log weights or something ...

  Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
  /*
    Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8);
    w.write(levA.toDot());
    w.close();
    System.out.println("Wrote LevA to out.dot");
  */
  return FSTUtil.intersectPrefixPaths(levA, fst);
}
项目:Maskana-Gestor-de-Conocimiento    文件:FuzzySuggester.java   
@Override
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {

  // TODO: right now there's no penalty for fuzzy/edits,
  // ie a completion whose prefix matched exactly what the
  // user typed gets no boost over completions that
  // required an edit, which get no boost over completions
  // requiring two edits.  I suspect a multiplicative
  // factor is appropriate (eg, say a fuzzy match must be at
  // least 2X better weight than the non-fuzzy match to
  // "compete") ... in which case I think the wFST needs
  // to be log weights or something ...

  Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
  /*
    Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
    w.write(levA.toDot());
    w.close();
    System.out.println("Wrote LevA to out.dot");
  */
  return FSTUtil.intersectPrefixPaths(levA, fst);
}
项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
/** Returns all completion paths to initialize the search. */
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {
  return prefixPaths;
}
项目:Elasticsearch    文件:XAnalyzingSuggester.java   
/**
 * Creates a new suggester.
 *
 * @param indexAnalyzer Analyzer that will be used for
 *   analyzing suggestions while building the index.
 * @param queryAnalyzer Analyzer that will be used for
 *   analyzing query text during lookup
 * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
 * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
 *   surface forms to keep for a single analyzed form.
 *   When there are too many surface forms we discard the
 *   lowest weighted ones.
 * @param maxGraphExpansions Maximum number of graph paths
 *   to expand from the analyzed form.  Set this to -1 for
 *   no limit.
 */
public XAnalyzingSuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
                           boolean preservePositionIncrements, FST<Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput,
                           int sepLabel, int payloadSep, int endByte, int holeCharacter) {
    // SIMON EDIT: I added fst, hasPayloads and maxAnalyzedPathsForOneInput
  this.indexAnalyzer = indexAnalyzer;
  this.queryAnalyzer = queryAnalyzer;
  this.fst = fst;
  this.hasPayloads = hasPayloads;
  if ((options & ~(EXACT_FIRST | PRESERVE_SEP)) != 0) {
    throw new IllegalArgumentException("options should only contain EXACT_FIRST and PRESERVE_SEP; got " + options);
  }
  this.exactFirst = (options & EXACT_FIRST) != 0;
  this.preserveSep = (options & PRESERVE_SEP) != 0;

  // FLORIAN EDIT: I added <code>queryPrefix</code> for context dependent suggestions
  this.queryPrefix = queryPrefix;

  // NOTE: this is just an implementation limitation; if
  // somehow this is a problem we could fix it by using
  // more than one byte to disambiguate ... but 256 seems
  // like it should be way more then enough.
  if (maxSurfaceFormsPerAnalyzedForm <= 0 || maxSurfaceFormsPerAnalyzedForm > 256) {
    throw new IllegalArgumentException("maxSurfaceFormsPerAnalyzedForm must be > 0 and < 256 (got: " + maxSurfaceFormsPerAnalyzedForm + ")");
  }
  this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;

  if (maxGraphExpansions < 1 && maxGraphExpansions != -1) {
    throw new IllegalArgumentException("maxGraphExpansions must -1 (no limit) or > 0 (got: " + maxGraphExpansions + ")");
  }
  this.maxGraphExpansions = maxGraphExpansions;
  this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput;
  this.preservePositionIncrements = preservePositionIncrements;
  this.sepLabel = sepLabel;
  this.payloadSep = payloadSep;
  this.endByte = endByte;
  this.holeCharacter = holeCharacter;
}
项目:Elasticsearch    文件:XAnalyzingSuggester.java   
/** Returns all completion paths to initialize the search. */
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {
  return prefixPaths;
}
项目:Elasticsearch    文件:AnalyzingCompletionLookupProvider.java   
public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst, int sepLabel, int payloadSep, int endByte, int holeCharacter) {
    this.preserveSep = preserveSep;
    this.preservePositionIncrements = preservePositionIncrements;
    this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
    this.maxGraphExpansions = maxGraphExpansions;
    this.hasPayloads = hasPayloads;
    this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput;
    this.fst = fst;
    this.sepLabel = sepLabel;
    this.payloadSep = payloadSep;
    this.endByte = endByte;
    this.holeCharacter = holeCharacter;
}
项目:search    文件:AnalyzingSuggester.java   
/** Returns all prefix paths to initialize the search. */
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {
  return prefixPaths;
}
项目:search    文件:IDVersionSegmentTermsEnum.java   
private FST.Arc<Pair<BytesRef,Long>> getArc(int ord) {
  if (ord >= arcs.length) {
    @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<Pair<BytesRef,Long>>[] next =
    new FST.Arc[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
    System.arraycopy(arcs, 0, next, 0, arcs.length);
    for(int arcOrd=arcs.length;arcOrd<next.length;arcOrd++) {
      next[arcOrd] = new FST.Arc<>();
    }
    arcs = next;
  }
  return arcs[ord];
}
项目:search    文件:IDVersionSegmentTermsEnum.java   
IDVersionSegmentTermsEnumFrame pushFrame(FST.Arc<Pair<BytesRef,Long>> arc, long fp, int length) throws IOException {
  final IDVersionSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
  f.arc = arc;
  if (f.fpOrig == fp && f.nextEnt != -1) {
    //if (DEBUG) System.out.println("      push reused frame ord=" + f.ord + " fp=" + f.fp + " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + term.length + " vs prefix=" + f.prefix);
    if (f.prefix > targetBeforeCurrentLength) {
      f.rewind();
    } else {
      // if (DEBUG) {
      //   System.out.println("        skip rewind!");
      // }
    }
    assert length == f.prefix;
  } else {
    f.nextEnt = -1;
    f.prefix = length;
    f.state.termBlockOrd = 0;
    f.fpOrig = f.fp = fp;
    f.lastSubFP = -1;
    // if (DEBUG) {
    //   final int sav = term.length;
    //   term.length = length;
    //   System.out.println("      push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
    //   term.length = sav;
    // }
  }

  return f;
}
项目:search    文件:VersionFieldReader.java   
VersionFieldReader(VersionBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, Pair<BytesRef,Long> rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
            long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
  assert numTerms > 0;
  this.fieldInfo = fieldInfo;
  //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
  this.parent = parent;
  this.numTerms = numTerms;
  this.sumTotalTermFreq = sumTotalTermFreq; 
  this.sumDocFreq = sumDocFreq; 
  this.docCount = docCount;
  this.indexStartFP = indexStartFP;
  this.rootCode = rootCode;
  this.longsSize = longsSize;
  this.minTerm = minTerm;
  this.maxTerm = maxTerm;
  // if (DEBUG) {
  //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
  // }

  rootBlockFP = (new ByteArrayDataInput(rootCode.output1.bytes, rootCode.output1.offset, rootCode.output1.length)).readVLong() >>> VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;

  if (indexIn != null) {
    final IndexInput clone = indexIn.clone();
    //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
    clone.seek(indexStartFP);
    index = new FST<>(clone, VersionBlockTreeTermsWriter.FST_OUTPUTS);

    /*
      if (false) {
      final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
      Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
      Util.toDot(index, w, false, false);
      System.out.println("FST INDEX: SAVED to " + dotFileName);
      w.close();
      }
    */
  } else {
    index = null;
  }
}
项目:search    文件:VersionBlockTreeTermsWriter.java   
public FieldMetaData(FieldInfo fieldInfo, Pair<BytesRef,Long> rootCode, long numTerms, long indexStartFP, int longsSize,
                     BytesRef minTerm, BytesRef maxTerm) {
  assert numTerms > 0;
  this.fieldInfo = fieldInfo;
  assert rootCode != null: "field=" + fieldInfo.name + " numTerms=" + numTerms;
  this.rootCode = rootCode;
  this.indexStartFP = indexStartFP;
  this.numTerms = numTerms;
  this.longsSize = longsSize;
  this.minTerm = minTerm;
  this.maxTerm = maxTerm;
}
项目:search    文件:VersionBlockTreeTermsWriter.java   
public PendingBlock(BytesRef prefix, long maxVersion, long fp, boolean hasTerms, boolean isFloor, int floorLeadByte, List<FST<Pair<BytesRef,Long>>> subIndices) {
  super(false);
  this.prefix = prefix;
  this.maxVersion = maxVersion;
  this.fp = fp;
  this.hasTerms = hasTerms;
  this.isFloor = isFloor;
  this.floorLeadByte = floorLeadByte;
  this.subIndices = subIndices;
}
项目:search    文件:VersionBlockTreeTermsWriter.java   
private void append(Builder<Pair<BytesRef,Long>> builder, FST<Pair<BytesRef,Long>> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<Pair<BytesRef,Long>> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<Pair<BytesRef,Long>> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
}
项目:search    文件:TestFSTs.java   
/** like testShortestPaths, but uses pairoutputs so we have both a weight and an output */
public void testShortestPathsWFST() throws Exception {

  PairOutputs<Long,Long> outputs = new PairOutputs<>(
      PositiveIntOutputs.getSingleton(), // weight
      PositiveIntOutputs.getSingleton()  // output
  );

  final Builder<Pair<Long,Long>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);

  final IntsRefBuilder scratch = new IntsRefBuilder();
  builder.add(Util.toIntsRef(new BytesRef("aab"), scratch), outputs.newPair(22L, 57L));
  builder.add(Util.toIntsRef(new BytesRef("aac"), scratch), outputs.newPair(7L, 36L));
  builder.add(Util.toIntsRef(new BytesRef("ax"), scratch), outputs.newPair(17L, 85L));
  final FST<Pair<Long,Long>> fst = builder.finish();
  //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
  //Util.toDot(fst, w, false, false);
  //w.close();

  Util.TopResults<Pair<Long,Long>> res = Util.shortestPaths(fst,
                                                           fst.getFirstArc(new FST.Arc<Pair<Long,Long>>()),
                                                           outputs.getNoOutput(),
                                                           minPairWeightComparator,
                                                           3,
                                                           true);
  assertTrue(res.isComplete);
  assertEquals(3, res.topN.size());

  assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), res.topN.get(0).input);
  assertEquals(7L, res.topN.get(0).output.output1.longValue()); // weight
  assertEquals(36L, res.topN.get(0).output.output2.longValue()); // output

  assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), res.topN.get(1).input);
  assertEquals(17L, res.topN.get(1).output.output1.longValue()); // weight
  assertEquals(85L, res.topN.get(1).output.output2.longValue()); // output

  assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), res.topN.get(2).input);
  assertEquals(22L, res.topN.get(2).output.output1.longValue()); // weight
  assertEquals(57L, res.topN.get(2).output.output2.longValue()); // output
}
项目:NYBC    文件:AnalyzingSuggester.java   
@Override
public boolean load(InputStream input) throws IOException {
  DataInput dataIn = new InputStreamDataInput(input);
  try {
    this.fst = new FST<Pair<Long,BytesRef>>(dataIn, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(true), ByteSequenceOutputs.getSingleton()));
    maxAnalyzedPathsForOneInput = dataIn.readVInt();
  } finally {
    IOUtils.close(input);
  }
  return true;
}
项目:NYBC    文件:AnalyzingSuggester.java   
/** Returns all prefix paths to initialize the search. */
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {
  return prefixPaths;
}
项目:NYBC    文件:TestFSTs.java   
/** like testShortestPaths, but uses pairoutputs so we have both a weight and an output */
public void testShortestPathsWFST() throws Exception {

  PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(
      PositiveIntOutputs.getSingleton(true), // weight
      PositiveIntOutputs.getSingleton(true)  // output
  );

  final Builder<Pair<Long,Long>> builder = new Builder<Pair<Long,Long>>(FST.INPUT_TYPE.BYTE1, outputs);

  final IntsRef scratch = new IntsRef();
  builder.add(Util.toIntsRef(new BytesRef("aab"), scratch), outputs.newPair(22L, 57L));
  builder.add(Util.toIntsRef(new BytesRef("aac"), scratch), outputs.newPair(7L, 36L));
  builder.add(Util.toIntsRef(new BytesRef("ax"), scratch), outputs.newPair(17L, 85L));
  final FST<Pair<Long,Long>> fst = builder.finish();
  //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
  //Util.toDot(fst, w, false, false);
  //w.close();

  Util.MinResult<Pair<Long,Long>>[] r = Util.shortestPaths(fst,
                                                           fst.getFirstArc(new FST.Arc<Pair<Long,Long>>()),
                                                           outputs.getNoOutput(),
                                                           minPairWeightComparator,
                                                           3,
                                                           true);
  assertEquals(3, r.length);

  assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), r[0].input);
  assertEquals(7L, r[0].output.output1.longValue()); // weight
  assertEquals(36L, r[0].output.output2.longValue()); // output

  assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), r[1].input);
  assertEquals(17L, r[1].output.output1.longValue()); // weight
  assertEquals(85L, r[1].output.output2.longValue()); // output

  assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), r[2].input);
  assertEquals(22L, r[2].output.output1.longValue()); // weight
  assertEquals(57L, r[2].output.output2.longValue()); // output
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
@Override
public boolean load(DataInput input) throws IOException {
  count = input.readVLong();
  this.fst = new FST<Pair<Long,BytesRef>>(input, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
  maxAnalyzedPathsForOneInput = input.readVInt();
  hasPayloads = input.readByte() == 1;
  return true;
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
/** Returns all prefix paths to initialize the search. */
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {
  return prefixPaths;
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
@Override
public boolean load(DataInput input) throws IOException {
  count = input.readVLong();
  this.fst = new FST<Pair<Long,BytesRef>>(input, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
  maxAnalyzedPathsForOneInput = input.readVInt();
  hasPayloads = input.readByte() == 1;
  return true;
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
/** Returns all prefix paths to initialize the search. */
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {
  return prefixPaths;
}
项目:read-open-source-code    文件:IDVersionSegmentTermsEnum.java   
private FST.Arc<Pair<BytesRef,Long>> getArc(int ord) {
  if (ord >= arcs.length) {
    @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<Pair<BytesRef,Long>>[] next =
    new FST.Arc[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
    System.arraycopy(arcs, 0, next, 0, arcs.length);
    for(int arcOrd=arcs.length;arcOrd<next.length;arcOrd++) {
      next[arcOrd] = new FST.Arc<>();
    }
    arcs = next;
  }
  return arcs[ord];
}
项目:read-open-source-code    文件:IDVersionSegmentTermsEnum.java   
IDVersionSegmentTermsEnumFrame pushFrame(FST.Arc<Pair<BytesRef,Long>> arc, long fp, int length) throws IOException {
  final IDVersionSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
  f.arc = arc;
  if (f.fpOrig == fp && f.nextEnt != -1) {
    //if (DEBUG) System.out.println("      push reused frame ord=" + f.ord + " fp=" + f.fp + " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + term.length + " vs prefix=" + f.prefix);
    if (f.prefix > targetBeforeCurrentLength) {
      f.rewind();
    } else {
      // if (DEBUG) {
      //   System.out.println("        skip rewind!");
      // }
    }
    assert length == f.prefix;
  } else {
    f.nextEnt = -1;
    f.prefix = length;
    f.state.termBlockOrd = 0;
    f.fpOrig = f.fp = fp;
    f.lastSubFP = -1;
    // if (DEBUG) {
    //   final int sav = term.length;
    //   term.length = length;
    //   System.out.println("      push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
    //   term.length = sav;
    // }
  }

  return f;
}
项目:read-open-source-code    文件:VersionFieldReader.java   
VersionFieldReader(VersionBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, Pair<BytesRef,Long> rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
            long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
  assert numTerms > 0;
  this.fieldInfo = fieldInfo;
  //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
  this.parent = parent;
  this.numTerms = numTerms;
  this.sumTotalTermFreq = sumTotalTermFreq; 
  this.sumDocFreq = sumDocFreq; 
  this.docCount = docCount;
  this.indexStartFP = indexStartFP;
  this.rootCode = rootCode;
  this.longsSize = longsSize;
  this.minTerm = minTerm;
  this.maxTerm = maxTerm;
  // if (DEBUG) {
  //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
  // }

  rootBlockFP = (new ByteArrayDataInput(rootCode.output1.bytes, rootCode.output1.offset, rootCode.output1.length)).readVLong() >>> VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;

  if (indexIn != null) {
    final IndexInput clone = indexIn.clone();
    //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
    clone.seek(indexStartFP);
    index = new FST<>(clone, VersionBlockTreeTermsWriter.FST_OUTPUTS);

    /*
      if (false) {
      final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
      Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
      Util.toDot(index, w, false, false);
      System.out.println("FST INDEX: SAVED to " + dotFileName);
      w.close();
      }
    */
  } else {
    index = null;
  }
}
项目:read-open-source-code    文件:VersionBlockTreeTermsWriter.java   
public FieldMetaData(FieldInfo fieldInfo, Pair<BytesRef,Long> rootCode, long numTerms, long indexStartFP, int longsSize,
                     BytesRef minTerm, BytesRef maxTerm) {
  assert numTerms > 0;
  this.fieldInfo = fieldInfo;
  assert rootCode != null: "field=" + fieldInfo.name + " numTerms=" + numTerms;
  this.rootCode = rootCode;
  this.indexStartFP = indexStartFP;
  this.numTerms = numTerms;
  this.longsSize = longsSize;
  this.minTerm = minTerm;
  this.maxTerm = maxTerm;
}
项目:read-open-source-code    文件:VersionBlockTreeTermsWriter.java   
public PendingBlock(BytesRef prefix, long maxVersion, long fp, boolean hasTerms, boolean isFloor, int floorLeadByte, List<FST<Pair<BytesRef,Long>>> subIndices) {
  super(false);
  this.prefix = prefix;
  this.maxVersion = maxVersion;
  this.fp = fp;
  this.hasTerms = hasTerms;
  this.isFloor = isFloor;
  this.floorLeadByte = floorLeadByte;
  this.subIndices = subIndices;
}
项目:read-open-source-code    文件:VersionBlockTreeTermsWriter.java   
private void append(Builder<Pair<BytesRef,Long>> builder, FST<Pair<BytesRef,Long>> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<Pair<BytesRef,Long>> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<Pair<BytesRef,Long>> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
/** Returns all prefix paths to initialize the search. */
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {
  return prefixPaths;
}
项目:Maskana-Gestor-de-Conocimiento    文件:AnalyzingSuggester.java   
@Override
public boolean load(InputStream input) throws IOException {
  DataInput dataIn = new InputStreamDataInput(input);
  try {
    this.fst = new FST<Pair<Long,BytesRef>>(dataIn, new PairOutputs<Long,BytesRef>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
    maxAnalyzedPathsForOneInput = dataIn.readVInt();
    hasPayloads = dataIn.readByte() == 1;
  } finally {
    IOUtils.close(input);
  }
  return true;
}
项目:Maskana-Gestor-de-Conocimiento    文件:AnalyzingSuggester.java   
/** Returns all prefix paths to initialize the search. */
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {
  return prefixPaths;
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestFSTs.java   
/** like testShortestPaths, but uses pairoutputs so we have both a weight and an output */
public void testShortestPathsWFST() throws Exception {

  PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(
      PositiveIntOutputs.getSingleton(), // weight
      PositiveIntOutputs.getSingleton()  // output
  );

  final Builder<Pair<Long,Long>> builder = new Builder<Pair<Long,Long>>(FST.INPUT_TYPE.BYTE1, outputs);

  final IntsRef scratch = new IntsRef();
  builder.add(Util.toIntsRef(new BytesRef("aab"), scratch), outputs.newPair(22L, 57L));
  builder.add(Util.toIntsRef(new BytesRef("aac"), scratch), outputs.newPair(7L, 36L));
  builder.add(Util.toIntsRef(new BytesRef("ax"), scratch), outputs.newPair(17L, 85L));
  final FST<Pair<Long,Long>> fst = builder.finish();
  //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
  //Util.toDot(fst, w, false, false);
  //w.close();

  Util.MinResult<Pair<Long,Long>>[] r = Util.shortestPaths(fst,
                                                           fst.getFirstArc(new FST.Arc<Pair<Long,Long>>()),
                                                           outputs.getNoOutput(),
                                                           minPairWeightComparator,
                                                           3,
                                                           true);
  assertEquals(3, r.length);

  assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), r[0].input);
  assertEquals(7L, r[0].output.output1.longValue()); // weight
  assertEquals(36L, r[0].output.output2.longValue()); // output

  assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), r[1].input);
  assertEquals(17L, r[1].output.output1.longValue()); // weight
  assertEquals(85L, r[1].output.output2.longValue()); // output

  assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), r[2].input);
  assertEquals(22L, r[2].output.output1.longValue()); // weight
  assertEquals(57L, r[2].output.output2.longValue()); // output
}
项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
/**
 * Creates a new suggester.
 *
 * @param indexAnalyzer Analyzer that will be used for
 *   analyzing suggestions while building the index.
 * @param queryAnalyzer Analyzer that will be used for
 *   analyzing query text during lookup
 * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
 * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
 *   surface forms to keep for a single analyzed form.
 *   When there are too many surface forms we discard the
 *   lowest weighted ones.
 * @param maxGraphExpansions Maximum number of graph paths
 *   to expand from the analyzed form.  Set this to -1 for
 *   no limit.
 */
public XAnalyzingSuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer,
                           int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
                           boolean preservePositionIncrements, FST<Pair<Long, BytesRef>> fst,
                           boolean hasPayloads, int maxAnalyzedPathsForOneInput,
                           int sepLabel, int payloadSep, int endByte, int holeCharacter) {
    // SIMON EDIT: I added fst, hasPayloads and maxAnalyzedPathsForOneInput
  this.indexAnalyzer = indexAnalyzer;
  this.queryAnalyzer = queryAnalyzer;
  this.fst = fst;
  this.hasPayloads = hasPayloads;
  if ((options & ~(EXACT_FIRST | PRESERVE_SEP)) != 0) {
    throw new IllegalArgumentException("options should only contain EXACT_FIRST and PRESERVE_SEP; got " + options);
  }
  this.exactFirst = (options & EXACT_FIRST) != 0;
  this.preserveSep = (options & PRESERVE_SEP) != 0;

  // FLORIAN EDIT: I added <code>queryPrefix</code> for context dependent suggestions
  this.queryPrefix = queryPrefix;

  // NOTE: this is just an implementation limitation; if
  // somehow this is a problem we could fix it by using
  // more than one byte to disambiguate ... but 256 seems
  // like it should be way more then enough.
  if (maxSurfaceFormsPerAnalyzedForm <= 0 || maxSurfaceFormsPerAnalyzedForm > 256) {
    throw new IllegalArgumentException(
        "maxSurfaceFormsPerAnalyzedForm must be > 0 and < 256 (got: " + maxSurfaceFormsPerAnalyzedForm + ")");
  }
  this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;

  if (maxGraphExpansions < 1 && maxGraphExpansions != -1) {
    throw new IllegalArgumentException(
        "maxGraphExpansions must -1 (no limit) or > 0 (got: " + maxGraphExpansions + ")");
  }
  this.maxGraphExpansions = maxGraphExpansions;
  this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput;
  this.preservePositionIncrements = preservePositionIncrements;
  this.sepLabel = sepLabel;
  this.payloadSep = payloadSep;
  this.endByte = endByte;
  this.holeCharacter = holeCharacter;
}
项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
@Override
public int compare(Pair<Long,BytesRef> left, Pair<Long,BytesRef> right) {
  return left.output1.compareTo(right.output1);
}
项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
public FST<Pair<Long, BytesRef>> build() throws IOException {
    return builder.finish();
}