/** * Returns the value mapped to the given key or <code>null</code> if the key is not in the FST dictionary. */ public BytesRef get(char[] buffer, int bufferLen, Arc<BytesRef> scratchArc, BytesReader fstReader) throws IOException { BytesRef pendingOutput = fst.outputs.getNoOutput(); BytesRef matchOutput = null; int bufUpto = 0; fst.getFirstArc(scratchArc); while (bufUpto < bufferLen) { final int codePoint = Character.codePointAt(buffer, bufUpto, bufferLen); if (fst.findTargetArc(ignoreCase ? Character.toLowerCase(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == null) { return null; } pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); bufUpto += Character.charCount(codePoint); } if (scratchArc.isFinal()) { matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput); } return matchOutput; }
/** Looks up the output for this input, or null if the * input is not accepted. */ public static<T> T get(FST<T> fst, IntsRef input) throws IOException { // TODO: would be nice not to alloc this on every lookup final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>()); final BytesReader fstReader = fst.getBytesReader(); // Accumulate output as we go T output = fst.outputs.getNoOutput(); for(int i=0;i<input.length;i++) { if (fst.findTargetArc(input.ints[input.offset + i], arc, arc, fstReader) == null) { return null; } output = fst.outputs.add(output, arc.output); } if (arc.isFinal()) { return fst.outputs.add(output, arc.nextFinalOutput); } else { return null; } }
/** Adds all leaving arcs, including 'finished' arc, if * the node is final, from this node into the queue. */ public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input) throws IOException { // De-dup NO_OUTPUT since it must be a singleton: if (startOutput.equals(fst.outputs.getNoOutput())) { startOutput = fst.outputs.getNoOutput(); } FSTPath<T> path = new FSTPath<>(startOutput, node, input); fst.readFirstTargetArc(node, path.arc, bytesReader); //System.out.println("add start paths"); // Bootstrap: find the min starting arc while (true) { if (allowEmptyString || path.arc.label != FST.END_LABEL) { addIfCompetitive(path); } if (path.arc.isLast()) { break; } fst.readNextArc(path.arc, bytesReader); } }
private Long lookupPrefix(FST<Long> fst, FST.BytesReader bytesReader, BytesRef scratch, Arc<Long> arc) throws /*Bogus*/IOException { Long output = fst.outputs.getNoOutput(); fst.getFirstArc(arc); byte[] bytes = scratch.bytes; int pos = scratch.offset; int end = pos + scratch.length; while (pos < end) { if (fst.findTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null) { return null; } else { output = fst.outputs.add(output, arc.output); } } return output; }
private Long lookupPrefix(BytesRef scratch, Arc<Long> arc) throws /*Bogus*/IOException { assert 0 == fst.outputs.getNoOutput().longValue(); long output = 0; BytesReader bytesReader = fst.getBytesReader(); fst.getFirstArc(arc); byte[] bytes = scratch.bytes; int pos = scratch.offset; int end = pos + scratch.length; while (pos < end) { if (fst.findTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null) { return null; } else { output += arc.output.longValue(); } } return output; }
/** * Returns the weight associated with an input string, * or null if it does not exist. */ public Object get(CharSequence key) { if (fst == null) { return null; } Arc<Long> arc = new Arc<>(); Long result = null; try { result = lookupPrefix(new BytesRef(key), arc); } catch (IOException bogus) { throw new RuntimeException(bogus); } if (result == null || !arc.isFinal()) { return null; } else { return Integer.valueOf(decodeWeight(result + arc.nextFinalOutput)); } }
/** * Cache the root node's output arcs starting with completions with the * highest weights. */ @SuppressWarnings({"unchecked","rawtypes"}) private static Arc<Object>[] cacheRootArcs(FST<Object> automaton) { try { List<Arc<Object>> rootArcs = new ArrayList<>(); Arc<Object> arc = automaton.getFirstArc(new Arc<>()); FST.BytesReader fstReader = automaton.getBytesReader(); automaton.readFirstTargetArc(arc, arc, fstReader); while (true) { rootArcs.add(new Arc<>().copyFrom(arc)); if (arc.isLast()) break; automaton.readNextArc(arc, fstReader); } Collections.reverse(rootArcs); // we want highest weights first. return rootArcs.toArray(new Arc[rootArcs.size()]); } catch (IOException e) { throw new RuntimeException(e); } }
private void checkStopNodes(FST<Long> fst, PositiveIntOutputs outputs) throws Exception { final Long nothing = outputs.getNoOutput(); FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<Long>()); assertEquals(nothing, startArc.output); assertEquals(nothing, startArc.nextFinalOutput); FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<Long>(), fst.getBytesReader()); assertEquals('a', arc.label); assertEquals(17, arc.nextFinalOutput.longValue()); assertTrue(arc.isFinal()); arc = fst.readNextArc(arc, fst.getBytesReader()); assertEquals('b', arc.label); assertFalse(arc.isFinal()); assertEquals(42, arc.output.longValue()); }
protected CcWordsFilter(TokenStream input, CcArgs args) { super(input); this.args = args; // this.fst = args.wordSet.fst; this.fstReader = args.wordSet.fst.getBytesReader(); this.fstWords = args.wordSet.words; this.fstFirstArc = new FST.Arc<>(); this.fst.getFirstArc(fstFirstArc); this.scratchWordBytesRef = new BytesRef(); this.scratchArc = new FST.Arc<>(); this.scratchArcOfSep = new FST.Arc<>(); this.scatchArcOfEnd = new FST.Arc<>(); // this.pendingOutputs = new LinkedList<>(); }
/** * Returns the weight associated with an input string, * or null if it does not exist. */ public Object get(CharSequence key) { if (fst == null) { return null; } Arc<Long> arc = new Arc<Long>(); Long result = null; try { result = lookupPrefix(new BytesRef(key), arc); } catch (IOException bogus) { throw new RuntimeException(bogus); } if (result == null || !arc.isFinal()) { return null; } else { return Integer.valueOf(decodeWeight(result + arc.nextFinalOutput)); } }
/** * Cache the root node's output arcs starting with completions with the * highest weights. */ @SuppressWarnings({"unchecked","rawtypes"}) private static Arc<Object>[] cacheRootArcs(FST<Object> automaton) { try { List<Arc<Object>> rootArcs = new ArrayList<Arc<Object>>(); Arc<Object> arc = automaton.getFirstArc(new Arc<Object>()); FST.BytesReader fstReader = automaton.getBytesReader(); automaton.readFirstTargetArc(arc, arc, fstReader); while (true) { rootArcs.add(new Arc<Object>().copyFrom(arc)); if (arc.isLast()) break; automaton.readNextArc(arc, fstReader); } Collections.reverse(rootArcs); // we want highest weights first. return rootArcs.toArray(new Arc[rootArcs.size()]); } catch (IOException e) { throw new RuntimeException(e); } }
/** Adds all leaving arcs, including 'finished' arc, if * the node is final, from this node into the queue. */ public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRef input) throws IOException { // De-dup NO_OUTPUT since it must be a singleton: if (startOutput.equals(fst.outputs.getNoOutput())) { startOutput = fst.outputs.getNoOutput(); } FSTPath<T> path = new FSTPath<T>(startOutput, node, input); fst.readFirstTargetArc(node, path.arc, bytesReader); //System.out.println("add start paths"); // Bootstrap: find the min starting arc while (true) { if (allowEmptyString || path.arc.label != FST.END_LABEL) { addIfCompetitive(path); } if (path.arc.isLast()) { break; } fst.readNextArc(path.arc, bytesReader); } }