Java 类org.apache.lucene.util.IntsRefBuilder 实例源码

项目:lams    文件:NormalizeCharMap.java   
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
项目:lams    文件:Dictionary.java   
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
  Map<String,String> mappings = new TreeMap<>();

  for (int i = 0; i < num; i++) {
    String line = reader.readLine();
    String parts[] = line.split("\\s+");
    if (parts.length != 3) {
      throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
    }
    if (mappings.put(parts[1], parts[2]) != null) {
      throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
    }
  }

  Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
  Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  for (Map.Entry<String,String> entry : mappings.entrySet()) {
    Util.toUTF16(entry.getKey(), scratchInts);
    builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
  }

  return builder.finish();
}
项目:lams    文件:StemmerOverrideFilter.java   
/**
 * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @throws IOException if an {@link IOException} occurs;
 */
public StemmerOverrideMap build() throws IOException {
  ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
  org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>(
      FST.INPUT_TYPE.BYTE4, outputs);
  final int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
  IntsRefBuilder intsSpare = new IntsRefBuilder();
  final int size = hash.size();
  BytesRef spare = new BytesRef();
  for (int i = 0; i < size; i++) {
    int id = sort[i];
    BytesRef bytesRef = hash.get(id, spare);
    intsSpare.copyUTF8Bytes(bytesRef);
    builder.add(intsSpare.get(), new BytesRef(outputValues.get(id)));
  }
  return new StemmerOverrideMap(builder.finish(), ignoreCase);
}
项目:lams    文件:Util.java   
/** Adds all leaving arcs, including 'finished' arc, if
 *  the node is final, from this node into the queue.  */
public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input) throws IOException {

  // De-dup NO_OUTPUT since it must be a singleton:
  if (startOutput.equals(fst.outputs.getNoOutput())) {
    startOutput = fst.outputs.getNoOutput();
  }

  FSTPath<T> path = new FSTPath<>(startOutput, node, input);
  fst.readFirstTargetArc(node, path.arc, bytesReader);

  //System.out.println("add start paths");

  // Bootstrap: find the min starting arc
  while (true) {
    if (allowEmptyString || path.arc.label != FST.END_LABEL) {
      addIfCompetitive(path);
    }
    if (path.arc.isLast()) {
      break;
    }
    fst.readNextArc(path.arc, bytesReader);
  }
}
项目:search    文件:MemoryDocValuesConsumer.java   
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
项目:search    文件:FSTCompletionBuilder.java   
/**
 * Builds the final automaton from a list of entries.
 */
private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException {
  // Build the automaton.
  final Outputs<Object> outputs = NoOutputs.getSingleton();
  final Object empty = outputs.getNoOutput();
  final Builder<Object> builder = new Builder<>(
      FST.INPUT_TYPE.BYTE1, 0, 0, true, true, 
      shareMaxTailLength, outputs, false, 
      PackedInts.DEFAULT, true, 15);

  BytesRefBuilder scratch = new BytesRefBuilder();
  BytesRef entry;
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  int count = 0;
  BytesRefIterator iter = sorter.iterator();
  while((entry = iter.next()) != null) {
    count++;
    if (scratch.get().compareTo(entry) != 0) {
      builder.add(Util.toIntsRef(entry, scratchIntsRef), empty);
      scratch.copyBytes(entry);
    }
  }

  return count == 0 ? null : builder.finish();
}
项目:search    文件:NormalizeCharMap.java   
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
项目:search    文件:Dictionary.java   
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
  Map<String,String> mappings = new TreeMap<>();

  for (int i = 0; i < num; i++) {
    String line = reader.readLine();
    String parts[] = line.split("\\s+");
    if (parts.length != 3) {
      throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
    }
    if (mappings.put(parts[1], parts[2]) != null) {
      throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
    }
  }

  Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
  Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  for (Map.Entry<String,String> entry : mappings.entrySet()) {
    Util.toUTF16(entry.getKey(), scratchInts);
    builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
  }

  return builder.finish();
}
项目:search    文件:StemmerOverrideFilter.java   
/**
 * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @throws IOException if an {@link IOException} occurs;
 */
public StemmerOverrideMap build() throws IOException {
  ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
  org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>(
      FST.INPUT_TYPE.BYTE4, outputs);
  final int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
  IntsRefBuilder intsSpare = new IntsRefBuilder();
  final int size = hash.size();
  BytesRef spare = new BytesRef();
  for (int i = 0; i < size; i++) {
    int id = sort[i];
    BytesRef bytesRef = hash.get(id, spare);
    intsSpare.copyUTF8Bytes(bytesRef);
    builder.add(intsSpare.get(), new BytesRef(outputValues.get(id)));
  }
  return new StemmerOverrideMap(builder.finish(), ignoreCase);
}
项目:search    文件:Lucene42DocValuesConsumer.java   
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
项目:search    文件:Util.java   
/** Adds all leaving arcs, including 'finished' arc, if
 *  the node is final, from this node into the queue.  */
public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input) throws IOException {

  // De-dup NO_OUTPUT since it must be a singleton:
  if (startOutput.equals(fst.outputs.getNoOutput())) {
    startOutput = fst.outputs.getNoOutput();
  }

  FSTPath<T> path = new FSTPath<>(startOutput, node, input);
  fst.readFirstTargetArc(node, path.arc, bytesReader);

  //System.out.println("add start paths");

  // Bootstrap: find the min starting arc
  while (true) {
    if (allowEmptyString || path.arc.label != FST.END_LABEL) {
      addIfCompetitive(path);
    }
    if (path.arc.isLast()) {
      break;
    }
    fst.readNextArc(path.arc, bytesReader);
  }
}
项目:search    文件:TestFSTs.java   
public void testDuplicateFSAString() throws Exception {
  String str = "foobar";
  final Outputs<Object> outputs = NoOutputs.getSingleton();
  final Builder<Object> b = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
  IntsRefBuilder ints = new IntsRefBuilder();
  for(int i=0; i<10; i++) {
    b.add(Util.toIntsRef(new BytesRef(str), ints), outputs.getNoOutput());
  }
  FST<Object> fst = b.finish();

  // count the input paths
  int count = 0;
  final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<>(fst);
  while(fstEnum.next()!=null) {
    count++;
  }
  assertEquals(1, count);

  assertNotNull(Util.get(fst, new BytesRef(str)));
  assertNull(Util.get(fst, new BytesRef("foobaz")));
}
项目:search    文件:TestFSTs.java   
public void testInternalFinalState() throws Exception {
  final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  final boolean willRewrite = random().nextBoolean();
  final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, willRewrite, PackedInts.DEFAULT, true, 15);
  builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRefBuilder()), outputs.getNoOutput());
  builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRefBuilder()), outputs.getNoOutput());
  final FST<Long> fst = builder.finish();
  StringWriter w = new StringWriter();
  //Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
  Util.toDot(fst, w, false, false);
  w.close();
  //System.out.println(w.toString());

  // check for accept state at label t
  assertTrue(w.toString().indexOf("[label=\"t\" style=\"bold\"") != -1);
  // check for accept state at label n
  assertTrue(w.toString().indexOf("[label=\"n\" style=\"bold\"") != -1);
}
项目:search    文件:TestFSTs.java   
public void testLargeOutputsOnArrayArcs() throws Exception {
  final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
  final Builder<BytesRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);

  final byte[] bytes = new byte[300];
  final IntsRefBuilder input = new IntsRefBuilder();
  input.append(0);
  final BytesRef output = new BytesRef(bytes);
  for(int arc=0;arc<6;arc++) {
    input.setIntAt(0, arc);
    output.bytes[0] = (byte) arc;
    builder.add(input.get(), BytesRef.deepCopyOf(output));
  }

  final FST<BytesRef> fst = builder.finish();
  for(int arc=0;arc<6;arc++) {
    input.setIntAt(0,  arc);
    final BytesRef result = Util.get(fst, input.get());
    assertNotNull(result);
    assertEquals(300, result.length);
    assertEquals(result.bytes[result.offset], arc);
    for(int byteIDX=1;byteIDX<result.length;byteIDX++) {
      assertEquals(0, result.bytes[result.offset+byteIDX]);
    }
  }
}
项目:elasticsearch-plugin-bundle    文件:FstDecompounder.java   
/**
 * Splits the input sequence of characters into separate words if this sequence is
 * potentially a compound word.
 *
 * @param word The word to be split.
 * @return Returns <code>null</code> if this word is not recognized at all. Returns a
 * character sequence with '.'-delimited compound chunks (if ambiguous
 * interpretations are possible, they are separated by a ',' character). The
 * returned buffer will change with each call to <code>split</code> so copy the
 * content if needed.
 */
public CharSequence split(CharSequence word) {
    try {
        StringBuilder builder = new StringBuilder();
        builder.append(word);
        builder.reverse();
        for (int i = builder.length(); --i > 0; ) {
            builder.setCharAt(i, Character.toLowerCase(builder.charAt(i)));
        }
        IntsRefBuilder utf32Builder = new IntsRefBuilder();
        IntsRef utf32 = fromUTF16ToUTF32(builder, utf32Builder).get();
        builder.setLength(0);
        Deque<Chunk> chunks = new ArrayDeque<>();
        matchWord(utf32, utf32.offset, builder, chunks);
        return builder.length() == 0 ? null : builder;
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
项目:read-open-source-code    文件:MemoryDocValuesConsumer.java   
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
项目:read-open-source-code    文件:FSTCompletionBuilder.java   
/**
 * Builds the final automaton from a list of entries.
 */
private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException {
  // Build the automaton.
  final Outputs<Object> outputs = NoOutputs.getSingleton();
  final Object empty = outputs.getNoOutput();
  final Builder<Object> builder = new Builder<>(
      FST.INPUT_TYPE.BYTE1, 0, 0, true, true, 
      shareMaxTailLength, outputs, false, 
      PackedInts.DEFAULT, true, 15);

  BytesRefBuilder scratch = new BytesRefBuilder();
  BytesRef entry;
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  int count = 0;
  BytesRefIterator iter = sorter.iterator();
  while((entry = iter.next()) != null) {
    count++;
    if (scratch.get().compareTo(entry) != 0) {
      builder.add(Util.toIntsRef(entry, scratchIntsRef), empty);
      scratch.copyBytes(entry);
    }
  }

  return count == 0 ? null : builder.finish();
}
项目:read-open-source-code    文件:NormalizeCharMap.java   
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
项目:read-open-source-code    文件:Dictionary.java   
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
  Map<String,String> mappings = new TreeMap<>();

  for (int i = 0; i < num; i++) {
    String line = reader.readLine();
    String parts[] = line.split("\\s+");
    if (parts.length != 3) {
      throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
    }
    if (mappings.put(parts[1], parts[2]) != null) {
      throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
    }
  }

  Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
  Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  for (Map.Entry<String,String> entry : mappings.entrySet()) {
    Util.toUTF16(entry.getKey(), scratchInts);
    builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
  }

  return builder.finish();
}
项目:read-open-source-code    文件:StemmerOverrideFilter.java   
/**
 * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @throws IOException if an {@link IOException} occurs;
 */
public StemmerOverrideMap build() throws IOException {
  ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
  org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>(
      FST.INPUT_TYPE.BYTE4, outputs);
  final int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
  IntsRefBuilder intsSpare = new IntsRefBuilder();
  final int size = hash.size();
  BytesRef spare = new BytesRef();
  for (int i = 0; i < size; i++) {
    int id = sort[i];
    BytesRef bytesRef = hash.get(id, spare);
    intsSpare.copyUTF8Bytes(bytesRef);
    builder.add(intsSpare.get(), new BytesRef(outputValues.get(id)));
  }
  return new StemmerOverrideMap(builder.finish(), ignoreCase);
}
项目:read-open-source-code    文件:Util.java   
/** Adds all leaving arcs, including 'finished' arc, if
 *  the node is final, from this node into the queue.  */
public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input) throws IOException {

  // De-dup NO_OUTPUT since it must be a singleton:
  if (startOutput.equals(fst.outputs.getNoOutput())) {
    startOutput = fst.outputs.getNoOutput();
  }

  FSTPath<T> path = new FSTPath<>(startOutput, node, input);
  fst.readFirstTargetArc(node, path.arc, bytesReader);

  //System.out.println("add start paths");

  // Bootstrap: find the min starting arc
  while (true) {
    if (allowEmptyString || path.arc.label != FST.END_LABEL) {
      addIfCompetitive(path);
    }
    if (path.arc.isLast()) {
      break;
    }
    fst.readNextArc(path.arc, bytesReader);
  }
}
项目:lams    文件:BlockTreeTermsWriter.java   
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
}
项目:lams    文件:Dictionary.java   
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
  IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
  Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
    Util.toUTF32(entry.getKey(), scratch);
    List<Integer> entries = entry.getValue();
    IntsRef output = new IntsRef(entries.size());
    for (Integer c : entries) {
      output.ints[output.length++] = c;
    }
    builder.add(scratch.get(), output);
  }
  return builder.finish();
}
项目:lams    文件:Util.java   
/** Starting from node, find the top N min cost 
 *  completions to a final node. */
public static <T> TopResults<T> shortestPaths(FST<T> fst, FST.Arc<T> fromNode, T startOutput, Comparator<T> comparator, int topN,
                                               boolean allowEmptyString) throws IOException {

  // All paths are kept, so we can pass topN for
  // maxQueueDepth and the pruning is admissible:
  TopNSearcher<T> searcher = new TopNSearcher<>(fst, topN, topN, comparator);

  // since this search is initialized with a single start node 
  // it is okay to start with an empty input path here
  searcher.addStartPaths(fromNode, startOutput, allowEmptyString, new IntsRefBuilder());
  return searcher.search();
}
项目:lams    文件:Util.java   
/** Just maps each UTF16 unit (char) to the ints in an
 *  IntsRef. */
public static IntsRef toUTF16(CharSequence s, IntsRefBuilder scratch) {
  final int charLimit = s.length();
  scratch.setLength(charLimit);
  scratch.grow(charLimit);
  for (int idx = 0; idx < charLimit; idx++) {
    scratch.setIntAt(idx, (int) s.charAt(idx));
  }
  return scratch.get();
}
项目:lams    文件:Util.java   
/** Decodes the Unicode codepoints from the provided
 *  CharSequence and places them in the provided scratch
 *  IntsRef, which must not be null, returning it. */
public static IntsRef toUTF32(CharSequence s, IntsRefBuilder scratch) {
  int charIdx = 0;
  int intIdx = 0;
  final int charLimit = s.length();
  while(charIdx < charLimit) {
    scratch.grow(intIdx+1);
    final int utf32 = Character.codePointAt(s, charIdx);
    scratch.setIntAt(intIdx, utf32);
    charIdx += Character.charCount(utf32);
    intIdx++;
  }
  scratch.setLength(intIdx);
  return scratch.get();
}
项目:lams    文件:Util.java   
/** Decodes the Unicode codepoints from the provided
 *  char[] and places them in the provided scratch
 *  IntsRef, which must not be null, returning it. */
public static IntsRef toUTF32(char[] s, int offset, int length, IntsRefBuilder scratch) {
  int charIdx = offset;
  int intIdx = 0;
  final int charLimit = offset + length;
  while(charIdx < charLimit) {
    scratch.grow(intIdx+1);
    final int utf32 = Character.codePointAt(s, charIdx, charLimit);
    scratch.setIntAt(intIdx, utf32);
    charIdx += Character.charCount(utf32);
    intIdx++;
  }
  scratch.setLength(intIdx);
  return scratch.get();
}
项目:lams    文件:Util.java   
/** Just takes unsigned byte values from the BytesRef and
 *  converts into an IntsRef. */
public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) {
  scratch.clear();
  for(int i=0;i<input.length;i++) {
    scratch.append(input.bytes[i+input.offset] & 0xFF);
  }
  return scratch.get();
}
项目:search    文件:OrdsBlockTreeTermsWriter.java   
private void append(Builder<Output> builder, FST<Output> subIndex, long termOrdOffset, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<Output> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<Output> indexEnt;
  while ((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    Output output = indexEnt.output;
    long blockTermCount = output.endOrd - output.startOrd + 1;
    Output newOutput = FST_OUTPUTS.newOutput(output.bytes, termOrdOffset+output.startOrd, output.endOrd-termOrdOffset);
    //System.out.println("  append sub=" + indexEnt.input + " output=" + indexEnt.output + " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount  + " newOutput=" + newOutput  + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
    builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput);
  }
}
项目:search    文件:VariableGapTermsIndexReader.java   
private void loadTermsIndex() throws IOException {
  if (fst == null) {
    IndexInput clone = in.clone();
    clone.seek(indexStart);
    fst = new FST<>(clone, fstOutputs);
    clone.close();

    /*
    final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
    Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
    Util.toDot(fst, w, false, false);
    System.out.println("FST INDEX: SAVED to " + dotFileName);
    w.close();
    */

    if (indexDivisor > 1) {
      // subsample
      final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
      final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
      final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
      BytesRefFSTEnum.InputOutput<Long> result;
      int count = indexDivisor;
      while((result = fstEnum.next()) != null) {
        if (count == indexDivisor) {
          builder.add(Util.toIntsRef(result.input, scratchIntsRef), result.output);
          count = 0;
        }
        count++;
      }
      fst = builder.finish();
    }
  }
}
项目:search    文件:FSTUtil.java   
/** Sole constructor. */
public Path(int state, FST.Arc<T> fstNode, T output, IntsRefBuilder input) {
  this.state = state;
  this.fstNode = fstNode;
  this.output = output;
  this.input = input;
}
项目:search    文件:TestFSTsMisc.java   
public void testListOfOutputs() throws Exception {
  PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton();
  ListOfOutputs<Long> outputs = new ListOfOutputs<>(_outputs);
  final Builder<Object> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);

  final IntsRefBuilder scratch = new IntsRefBuilder();
  // Add the same input more than once and the outputs
  // are merged:
  builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 1L);
  builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 3L);
  builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L);
  builder.add(Util.toIntsRef(new BytesRef("b"), scratch), 17L);
  final FST<Object> fst = builder.finish();

  Object output = Util.get(fst, new BytesRef("a"));
  assertNotNull(output);
  List<Long> outputList = outputs.asList(output);
  assertEquals(3, outputList.size());
  assertEquals(1L, outputList.get(0).longValue());
  assertEquals(3L, outputList.get(1).longValue());
  assertEquals(0L, outputList.get(2).longValue());

  output = Util.get(fst, new BytesRef("b"));
  assertNotNull(output);
  outputList = outputs.asList(output);
  assertEquals(1, outputList.size());
  assertEquals(17L, outputList.get(0).longValue());
}
项目:search    文件:VersionBlockTreeTermsWriter.java   
private void append(Builder<Pair<BytesRef,Long>> builder, FST<Pair<BytesRef,Long>> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<Pair<BytesRef,Long>> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<Pair<BytesRef,Long>> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
}
项目:search    文件:Dictionary.java   
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
  IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
  Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
    Util.toUTF32(entry.getKey(), scratch);
    List<Integer> entries = entry.getValue();
    IntsRef output = new IntsRef(entries.size());
    for (Integer c : entries) {
      output.ints[output.length++] = c;
    }
    builder.add(scratch.get(), output);
  }
  return builder.finish();
}
项目:search    文件:AutomatonTestUtil.java   
/**
 * Returns the strings that can be produced from the given state, or
 * false if more than <code>limit</code> strings are found. 
 * <code>limit</code>&lt;0 means "infinite".
 */
private static boolean getFiniteStrings(Automaton a, int s, HashSet<Integer> pathstates, 
    HashSet<IntsRef> strings, IntsRefBuilder path, int limit) {
  pathstates.add(s);
  Transition t = new Transition();
  int count = a.initTransition(s, t);
  for (int i=0;i<count;i++) {
    a.getNextTransition(t);
    if (pathstates.contains(t.dest)) {
      return false;
    }
    for (int n = t.min; n <= t.max; n++) {
      path.append(n);
      if (a.isAccept(t.dest)) {
        strings.add(path.toIntsRef());
        if (limit >= 0 && strings.size() > limit) {
          return false;
        }
      }
      if (!getFiniteStrings(a, t.dest, pathstates, strings, path, limit)) {
        return false;
      }
      path.setLength(path.length() - 1);
    }
  }
  pathstates.remove(s);
  return true;
}
项目:search    文件:FSTTester.java   
static IntsRef toIntsRef(String s, int inputMode, IntsRefBuilder ir) {
  if (inputMode == 0) {
    // utf8
    return toIntsRef(new BytesRef(s), ir);
  } else {
    // utf32
    return toIntsRefUTF32(s, ir);
  }
}
项目:search    文件:FSTTester.java   
static IntsRef toIntsRefUTF32(String s, IntsRefBuilder ir) {
  final int charLength = s.length();
  int charIdx = 0;
  int intIdx = 0;
  ir.clear();
  while(charIdx < charLength) {
    ir.grow(intIdx+1);
    final int utf32 = s.codePointAt(charIdx);
    ir.append(utf32);
    charIdx += Character.charCount(utf32);
    intIdx++;
  }
  return ir.get();
}
项目:search    文件:FSTTester.java   
static IntsRef toIntsRef(BytesRef br, IntsRefBuilder ir) {
  ir.grow(br.length);
  ir.clear();
  for(int i=0;i<br.length;i++) {
    ir.append(br.bytes[br.offset+i]&0xFF);
  }
  return ir.get();
}
项目:search    文件:FSTTester.java   
private T randomAcceptedWord(FST<T> fst, IntsRefBuilder in) throws IOException {
  FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());

  final List<FST.Arc<T>> arcs = new ArrayList<>();
  in.clear();
  final T NO_OUTPUT = fst.outputs.getNoOutput();
  T output = NO_OUTPUT;
  final FST.BytesReader fstReader = fst.getBytesReader();

  while(true) {
    // read all arcs:
    fst.readFirstTargetArc(arc, arc, fstReader);
    arcs.add(new FST.Arc<T>().copyFrom(arc));
    while(!arc.isLast()) {
      fst.readNextArc(arc, fstReader);
      arcs.add(new FST.Arc<T>().copyFrom(arc));
    }

    // pick one
    arc = arcs.get(random.nextInt(arcs.size()));
    arcs.clear();

    // accumulate output
    output = fst.outputs.add(output, arc.output);

    // append label
    if (arc.label == FST.END_LABEL) {
      break;
    }

    in.append(arc.label);
  }

  return output;
}
项目:search    文件:BlockTreeTermsWriter.java   
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
}