private int[] topoSortStates(Automaton a) { int[] states = new int[a.getNumStates()]; final Set<Integer> visited = new HashSet<>(); final LinkedList<Integer> worklist = new LinkedList<>(); worklist.add(0); visited.add(0); int upto = 0; states[upto] = 0; upto++; Transition t = new Transition(); while (worklist.size() > 0) { int s = worklist.removeFirst(); int count = a.initTransition(s, t); for (int i=0;i<count;i++) { a.getNextTransition(t); if (!visited.contains(t.dest)) { visited.add(t.dest); worklist.add(t.dest); states[upto++] = t.dest; } } } return states; }
@Override public void setUp() throws Exception { super.setUp(); // build an automaton matching this jvm's letter definition State initial = new State(); State accept = new State(); accept.setAccept(true); for (int i = 0; i <= 0x10FFFF; i++) { if (Character.isLetter(i)) { initial.addTransition(new Transition(i, i, accept)); } } Automaton single = new Automaton(initial); single.reduce(); Automaton repeat = BasicOperations.repeat(single); jvmLetter = new CharacterRunAutomaton(repeat); }
private void copyDestTransitions(State from, State to, List<Transition> transitions) { if (to.isAccept()) { from.setAccept(true); } for(Transition t : to.getTransitions()) { transitions.add(t); } }
private static void addHoles(State startState, RollingBuffer<Position> positions, int pos) { Position posData = positions.get(pos); Position prevPosData = positions.get(pos-1); while(posData.arriving == null || prevPosData.leaving == null) { if (posData.arriving == null) { posData.arriving = new State(); posData.arriving.addTransition(new Transition(POS_SEP, posData.leaving)); } if (prevPosData.leaving == null) { if (pos == 1) { prevPosData.leaving = startState; } else { prevPosData.leaving = new State(); } if (prevPosData.arriving != null) { prevPosData.arriving.addTransition(new Transition(POS_SEP, prevPosData.leaving)); } } prevPosData.leaving.addTransition(new Transition(HOLE, posData.arriving)); pos--; if (pos <= 0) { break; } posData = prevPosData; prevPosData = positions.get(pos-1); } }
/** * Call this once you are done adding states/transitions. * @param maxDeterminizedStates Maximum number of states created when * determinizing the automaton. Higher numbers allow this operation to * consume more memory but allow more complex automatons. */ public void finish(int maxDeterminizedStates) { Automaton automaton = builder.finish(); // System.out.println("before det:\n" + automaton.toDot()); Transition t = new Transition(); // TODO: should we add "eps back to initial node" for all states, // and det that? then we don't need to revisit initial node at // every position? but automaton could blow up? And, this makes it // harder to skip useless positions at search time? if (anyTermID != -1) { // Make sure there are no leading or trailing ANY: int count = automaton.initTransition(0, t); for(int i=0;i<count;i++) { automaton.getNextTransition(t); if (anyTermID >= t.min && anyTermID <= t.max) { throw new IllegalStateException("automaton cannot lead with an ANY transition"); } } int numStates = automaton.getNumStates(); for(int i=0;i<numStates;i++) { count = automaton.initTransition(i, t); for(int j=0;j<count;j++) { automaton.getNextTransition(t); if (automaton.isAccept(t.dest) && anyTermID >= t.min && anyTermID <= t.max) { throw new IllegalStateException("automaton cannot end with an ANY transition"); } } } int termCount = termToID.size(); // We have to carefully translate these transitions so automaton // realizes they also match all other terms: Automaton newAutomaton = new Automaton(); for(int i=0;i<numStates;i++) { newAutomaton.createState(); newAutomaton.setAccept(i, automaton.isAccept(i)); } for(int i=0;i<numStates;i++) { count = automaton.initTransition(i, t); for(int j=0;j<count;j++) { automaton.getNextTransition(t); int min, max; if (t.min <= anyTermID && anyTermID <= t.max) { // Match any term min = 0; max = termCount-1; } else { min = t.min; max = t.max; } newAutomaton.addTransition(t.source, t.dest, min, max); } } newAutomaton.finishState(); automaton = newAutomaton; } det = Operations.removeDeadStates(Operations.determinize(automaton, maxDeterminizedStates)); }
/** * Returns the next String in lexicographic order that will not put * the machine into a reject state. * * This method traverses the DFA from the given position in the String, * starting at the given state. * * If this cannot satisfy the machine, returns false. This method will * walk the minimal path, in lexicographic order, as long as possible. * * If this method returns false, then there might still be more solutions, * it is necessary to backtrack to find out. * * @param state current non-reject state * @param position useful portion of the string * @return true if more possible solutions exist for the DFA from this * position */ private boolean nextString(int state, int position) { /* * the next lexicographic character must be greater than the existing * character, if it exists. */ int c = 0; if (position < seekBytesRef.length) { c = seekBytesRef.bytes[position] & 0xff; // if the next byte is 0xff and is not part of the useful portion, // then by definition it puts us in a reject state, and therefore this // path is dead. there cannot be any higher transitions. backtrack. if (c++ == 0xff) return false; } seekBytesRef.length = position; visited[state] = curGen; Transition transitions[] = allTransitions[state]; // find the minimal path (lexicographic order) that is >= c for (int i = 0; i < transitions.length; i++) { Transition transition = transitions[i]; if (transition.getMax() >= c) { int nextChar = Math.max(c, transition.getMin()); // append either the next sequential char, or the minimum transition seekBytesRef.grow(seekBytesRef.length + 1); seekBytesRef.length++; seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) nextChar; state = transition.getDest().getNumber(); /* * as long as is possible, continue down the minimal path in * lexicographic order. if a loop or accept state is encountered, stop. */ while (visited[state] != curGen && !runAutomaton.isAccept(state)) { visited[state] = curGen; /* * Note: we work with a DFA with no transitions to dead states. * so the below is ok, if it is not an accept state, * then there MUST be at least one transition. */ transition = allTransitions[state][0]; state = transition.getDest().getNumber(); // append the minimum transition seekBytesRef.grow(seekBytesRef.length + 1); seekBytesRef.length++; seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) transition.getMin(); // we found a loop, record it for faster enumeration if (!finite && !linear && visited[state] == curGen) { setLinear(seekBytesRef.length-1); } } return true; } } return false; }