Java 类org.apache.lucene.util.automaton.Transition 实例源码

项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
private int[] topoSortStates(Automaton a) {
  int[] states = new int[a.getNumStates()];
  final Set<Integer> visited = new HashSet<>();
  final LinkedList<Integer> worklist = new LinkedList<>();
  worklist.add(0);
  visited.add(0);
  int upto = 0;
  states[upto] = 0;
  upto++;
  Transition t = new Transition();
  while (worklist.size() > 0) {
    int s = worklist.removeFirst();
    int count = a.initTransition(s, t);
    for (int i=0;i<count;i++) {
      a.getNextTransition(t);
      if (!visited.contains(t.dest)) {
        visited.add(t.dest);
        worklist.add(t.dest);
        states[upto++] = t.dest;
      }
    }
  }
  return states;
}
项目:Elasticsearch    文件:XAnalyzingSuggester.java   
private int[] topoSortStates(Automaton a) {
  int[] states = new int[a.getNumStates()];
  final Set<Integer> visited = new HashSet<>();
  final LinkedList<Integer> worklist = new LinkedList<>();
  worklist.add(0);
  visited.add(0);
  int upto = 0;
  states[upto] = 0;
  upto++;
  Transition t = new Transition();
  while (worklist.size() > 0) {
    int s = worklist.removeFirst();
    int count = a.initTransition(s, t);
    for (int i=0;i<count;i++) {
      a.getNextTransition(t);
      if (!visited.contains(t.dest)) {
        visited.add(t.dest);
        worklist.add(t.dest);
        states[upto++] = t.dest;
      }
    }
  }
  return states;
}
项目:search    文件:AnalyzingSuggester.java   
private int[] topoSortStates(Automaton a) {
  int[] states = new int[a.getNumStates()];
  final Set<Integer> visited = new HashSet<>();
  final LinkedList<Integer> worklist = new LinkedList<>();
  worklist.add(0);
  visited.add(0);
  int upto = 0;
  states[upto] = 0;
  upto++;
  Transition t = new Transition();
  while (worklist.size() > 0) {
    int s = worklist.removeFirst();
    int count = a.initTransition(s, t);
    for (int i=0;i<count;i++) {
      a.getNextTransition(t);
      if (!visited.contains(t.dest)) {
        visited.add(t.dest);
        worklist.add(t.dest);
        states[upto++] = t.dest;
      }
    }
  }
  return states;
}
项目:NYBC    文件:TestDuelingAnalyzers.java   
@Override
public void setUp() throws Exception {
  super.setUp();
  // build an automaton matching this jvm's letter definition
  State initial = new State();
  State accept = new State();
  accept.setAccept(true);
  for (int i = 0; i <= 0x10FFFF; i++) {
    if (Character.isLetter(i)) {
      initial.addTransition(new Transition(i, i, accept));
    }
  }
  Automaton single = new Automaton(initial);
  single.reduce();
  Automaton repeat = BasicOperations.repeat(single);
  jvmLetter = new CharacterRunAutomaton(repeat);
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
private int[] topoSortStates(Automaton a) {
  int[] states = new int[a.getNumStates()];
  final Set<Integer> visited = new HashSet<>();
  final LinkedList<Integer> worklist = new LinkedList<>();
  worklist.add(0);
  visited.add(0);
  int upto = 0;
  states[upto] = 0;
  upto++;
  Transition t = new Transition();
  while (worklist.size() > 0) {
    int s = worklist.removeFirst();
    int count = a.initTransition(s, t);
    for (int i=0;i<count;i++) {
      a.getNextTransition(t);
      if (!visited.contains(t.dest)) {
        visited.add(t.dest);
        worklist.add(t.dest);
        states[upto++] = t.dest;
      }
    }
  }
  return states;
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestDuelingAnalyzers.java   
@Override
public void setUp() throws Exception {
  super.setUp();
  // build an automaton matching this jvm's letter definition
  State initial = new State();
  State accept = new State();
  accept.setAccept(true);
  for (int i = 0; i <= 0x10FFFF; i++) {
    if (Character.isLetter(i)) {
      initial.addTransition(new Transition(i, i, accept));
    }
  }
  Automaton single = new Automaton(initial);
  single.reduce();
  Automaton repeat = BasicOperations.repeat(single);
  jvmLetter = new CharacterRunAutomaton(repeat);
}
项目:NYBC    文件:AnalyzingSuggester.java   
private void copyDestTransitions(State from, State to, List<Transition> transitions) {
  if (to.isAccept()) {
    from.setAccept(true);
  }
  for(Transition t : to.getTransitions()) {
    transitions.add(t);
  }
}
项目:NYBC    文件:TokenStreamToAutomaton.java   
private static void addHoles(State startState, RollingBuffer<Position> positions, int pos) {
  Position posData = positions.get(pos);
  Position prevPosData = positions.get(pos-1);

  while(posData.arriving == null || prevPosData.leaving == null) {
    if (posData.arriving == null) {
      posData.arriving = new State();
      posData.arriving.addTransition(new Transition(POS_SEP, posData.leaving));
    }
    if (prevPosData.leaving == null) {
      if (pos == 1) {
        prevPosData.leaving = startState;
      } else {
        prevPosData.leaving = new State();
      }
      if (prevPosData.arriving != null) {
        prevPosData.arriving.addTransition(new Transition(POS_SEP, prevPosData.leaving));
      }
    }
    prevPosData.leaving.addTransition(new Transition(HOLE, posData.arriving));
    pos--;
    if (pos <= 0) {
      break;
    }
    posData = prevPosData;
    prevPosData = positions.get(pos-1);
  }
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
private void copyDestTransitions(State from, State to, List<Transition> transitions) {
  if (to.isAccept()) {
    from.setAccept(true);
  }
  for(Transition t : to.getTransitions()) {
    transitions.add(t);
  }
}
项目:read-open-source-code    文件:TokenStreamToAutomaton.java   
private static void addHoles(State startState, RollingBuffer<Position> positions, int pos) {
  Position posData = positions.get(pos);
  Position prevPosData = positions.get(pos-1);

  while(posData.arriving == null || prevPosData.leaving == null) {
    if (posData.arriving == null) {
      posData.arriving = new State();
      posData.arriving.addTransition(new Transition(POS_SEP, posData.leaving));
    }
    if (prevPosData.leaving == null) {
      if (pos == 1) {
        prevPosData.leaving = startState;
      } else {
        prevPosData.leaving = new State();
      }
      if (prevPosData.arriving != null) {
        prevPosData.arriving.addTransition(new Transition(POS_SEP, prevPosData.leaving));
      }
    }
    prevPosData.leaving.addTransition(new Transition(HOLE, posData.arriving));
    pos--;
    if (pos <= 0) {
      break;
    }
    posData = prevPosData;
    prevPosData = positions.get(pos-1);
  }
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
private void copyDestTransitions(State from, State to, List<Transition> transitions) {
  if (to.isAccept()) {
    from.setAccept(true);
  }
  for(Transition t : to.getTransitions()) {
    transitions.add(t);
  }
}
项目:read-open-source-code    文件:TokenStreamToAutomaton.java   
private static void addHoles(State startState, RollingBuffer<Position> positions, int pos) {
  Position posData = positions.get(pos);
  Position prevPosData = positions.get(pos-1);

  while(posData.arriving == null || prevPosData.leaving == null) {
    if (posData.arriving == null) {
      posData.arriving = new State();
      posData.arriving.addTransition(new Transition(POS_SEP, posData.leaving));
    }
    if (prevPosData.leaving == null) {
      if (pos == 1) {
        prevPosData.leaving = startState;
      } else {
        prevPosData.leaving = new State();
      }
      if (prevPosData.arriving != null) {
        prevPosData.arriving.addTransition(new Transition(POS_SEP, prevPosData.leaving));
      }
    }
    prevPosData.leaving.addTransition(new Transition(HOLE, posData.arriving));
    pos--;
    if (pos <= 0) {
      break;
    }
    posData = prevPosData;
    prevPosData = positions.get(pos-1);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:AnalyzingSuggester.java   
private void copyDestTransitions(State from, State to, List<Transition> transitions) {
  if (to.isAccept()) {
    from.setAccept(true);
  }
  for(Transition t : to.getTransitions()) {
    transitions.add(t);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TokenStreamToAutomaton.java   
private static void addHoles(State startState, RollingBuffer<Position> positions, int pos) {
  Position posData = positions.get(pos);
  Position prevPosData = positions.get(pos-1);

  while(posData.arriving == null || prevPosData.leaving == null) {
    if (posData.arriving == null) {
      posData.arriving = new State();
      posData.arriving.addTransition(new Transition(POS_SEP, posData.leaving));
    }
    if (prevPosData.leaving == null) {
      if (pos == 1) {
        prevPosData.leaving = startState;
      } else {
        prevPosData.leaving = new State();
      }
      if (prevPosData.arriving != null) {
        prevPosData.arriving.addTransition(new Transition(POS_SEP, prevPosData.leaving));
      }
    }
    prevPosData.leaving.addTransition(new Transition(HOLE, posData.arriving));
    pos--;
    if (pos <= 0) {
      break;
    }
    posData = prevPosData;
    prevPosData = positions.get(pos-1);
  }
}
项目:search    文件:TermAutomatonQuery.java   
/**
 * Call this once you are done adding states/transitions.
 * @param maxDeterminizedStates Maximum number of states created when
 *   determinizing the automaton.  Higher numbers allow this operation to
 *   consume more memory but allow more complex automatons.
 */
public void finish(int maxDeterminizedStates) {
  Automaton automaton = builder.finish();

  // System.out.println("before det:\n" + automaton.toDot());

  Transition t = new Transition();

  // TODO: should we add "eps back to initial node" for all states,
  // and det that?  then we don't need to revisit initial node at
  // every position?  but automaton could blow up?  And, this makes it
  // harder to skip useless positions at search time?

  if (anyTermID != -1) {

    // Make sure there are no leading or trailing ANY:
    int count = automaton.initTransition(0, t);
    for(int i=0;i<count;i++) {
      automaton.getNextTransition(t);
      if (anyTermID >= t.min && anyTermID <= t.max) {
        throw new IllegalStateException("automaton cannot lead with an ANY transition");
      }
    }

    int numStates = automaton.getNumStates();
    for(int i=0;i<numStates;i++) {
      count = automaton.initTransition(i, t);
      for(int j=0;j<count;j++) {
        automaton.getNextTransition(t);
        if (automaton.isAccept(t.dest) && anyTermID >= t.min && anyTermID <= t.max) {
          throw new IllegalStateException("automaton cannot end with an ANY transition");
        }
      }
    }

    int termCount = termToID.size();

    // We have to carefully translate these transitions so automaton
    // realizes they also match all other terms:
    Automaton newAutomaton = new Automaton();
    for(int i=0;i<numStates;i++) {
      newAutomaton.createState();
      newAutomaton.setAccept(i, automaton.isAccept(i));
    }

    for(int i=0;i<numStates;i++) {
      count = automaton.initTransition(i, t);
      for(int j=0;j<count;j++) {
        automaton.getNextTransition(t);
        int min, max;
        if (t.min <= anyTermID && anyTermID <= t.max) {
          // Match any term
          min = 0;
          max = termCount-1;
        } else {
          min = t.min;
          max = t.max;
        }
        newAutomaton.addTransition(t.source, t.dest, min, max);
      }
    }
    newAutomaton.finishState();
    automaton = newAutomaton;
  }

  det = Operations.removeDeadStates(Operations.determinize(automaton,
    maxDeterminizedStates));
}
项目:NYBC    文件:AutomatonTermsEnum.java   
/**
 * Returns the next String in lexicographic order that will not put
 * the machine into a reject state. 
 * 
 * This method traverses the DFA from the given position in the String,
 * starting at the given state.
 * 
 * If this cannot satisfy the machine, returns false. This method will
 * walk the minimal path, in lexicographic order, as long as possible.
 * 
 * If this method returns false, then there might still be more solutions,
 * it is necessary to backtrack to find out.
 * 
 * @param state current non-reject state
 * @param position useful portion of the string
 * @return true if more possible solutions exist for the DFA from this
 *         position
 */
private boolean nextString(int state, int position) {
  /* 
   * the next lexicographic character must be greater than the existing
   * character, if it exists.
   */
  int c = 0;
  if (position < seekBytesRef.length) {
    c = seekBytesRef.bytes[position] & 0xff;
    // if the next byte is 0xff and is not part of the useful portion,
    // then by definition it puts us in a reject state, and therefore this
    // path is dead. there cannot be any higher transitions. backtrack.
    if (c++ == 0xff)
      return false;
  }

  seekBytesRef.length = position;
  visited[state] = curGen;

  Transition transitions[] = allTransitions[state];

  // find the minimal path (lexicographic order) that is >= c

  for (int i = 0; i < transitions.length; i++) {
    Transition transition = transitions[i];
    if (transition.getMax() >= c) {
      int nextChar = Math.max(c, transition.getMin());
      // append either the next sequential char, or the minimum transition
      seekBytesRef.grow(seekBytesRef.length + 1);
      seekBytesRef.length++;
      seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) nextChar;
      state = transition.getDest().getNumber();
      /* 
       * as long as is possible, continue down the minimal path in
       * lexicographic order. if a loop or accept state is encountered, stop.
       */
      while (visited[state] != curGen && !runAutomaton.isAccept(state)) {
        visited[state] = curGen;
        /* 
         * Note: we work with a DFA with no transitions to dead states.
         * so the below is ok, if it is not an accept state,
         * then there MUST be at least one transition.
         */
        transition = allTransitions[state][0];
        state = transition.getDest().getNumber();

        // append the minimum transition
        seekBytesRef.grow(seekBytesRef.length + 1);
        seekBytesRef.length++;
        seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) transition.getMin();

        // we found a loop, record it for faster enumeration
        if (!finite && !linear && visited[state] == curGen) {
          setLinear(seekBytesRef.length-1);
        }
      }
      return true;
    }
  }
  return false;
}
项目:read-open-source-code    文件:AutomatonTermsEnum.java   
/**
 * Returns the next String in lexicographic order that will not put
 * the machine into a reject state. 
 * 
 * This method traverses the DFA from the given position in the String,
 * starting at the given state.
 * 
 * If this cannot satisfy the machine, returns false. This method will
 * walk the minimal path, in lexicographic order, as long as possible.
 * 
 * If this method returns false, then there might still be more solutions,
 * it is necessary to backtrack to find out.
 * 
 * @param state current non-reject state
 * @param position useful portion of the string
 * @return true if more possible solutions exist for the DFA from this
 *         position
 */
private boolean nextString(int state, int position) {
  /* 
   * the next lexicographic character must be greater than the existing
   * character, if it exists.
   */
  int c = 0;
  if (position < seekBytesRef.length) {
    c = seekBytesRef.bytes[position] & 0xff;
    // if the next byte is 0xff and is not part of the useful portion,
    // then by definition it puts us in a reject state, and therefore this
    // path is dead. there cannot be any higher transitions. backtrack.
    if (c++ == 0xff)
      return false;
  }

  seekBytesRef.length = position;
  visited[state] = curGen;

  Transition transitions[] = allTransitions[state];

  // find the minimal path (lexicographic order) that is >= c

  for (int i = 0; i < transitions.length; i++) {
    Transition transition = transitions[i];
    if (transition.getMax() >= c) {
      int nextChar = Math.max(c, transition.getMin());
      // append either the next sequential char, or the minimum transition
      seekBytesRef.grow(seekBytesRef.length + 1);
      seekBytesRef.length++;
      seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) nextChar;
      state = transition.getDest().getNumber();
      /* 
       * as long as is possible, continue down the minimal path in
       * lexicographic order. if a loop or accept state is encountered, stop.
       */
      while (visited[state] != curGen && !runAutomaton.isAccept(state)) {
        visited[state] = curGen;
        /* 
         * Note: we work with a DFA with no transitions to dead states.
         * so the below is ok, if it is not an accept state,
         * then there MUST be at least one transition.
         */
        transition = allTransitions[state][0];
        state = transition.getDest().getNumber();

        // append the minimum transition
        seekBytesRef.grow(seekBytesRef.length + 1);
        seekBytesRef.length++;
        seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) transition.getMin();

        // we found a loop, record it for faster enumeration
        if (!finite && !linear && visited[state] == curGen) {
          setLinear(seekBytesRef.length-1);
        }
      }
      return true;
    }
  }
  return false;
}
项目:read-open-source-code    文件:AutomatonTermsEnum.java   
/**
 * Returns the next String in lexicographic order that will not put
 * the machine into a reject state. 
 * 
 * This method traverses the DFA from the given position in the String,
 * starting at the given state.
 * 
 * If this cannot satisfy the machine, returns false. This method will
 * walk the minimal path, in lexicographic order, as long as possible.
 * 
 * If this method returns false, then there might still be more solutions,
 * it is necessary to backtrack to find out.
 * 
 * @param state current non-reject state
 * @param position useful portion of the string
 * @return true if more possible solutions exist for the DFA from this
 *         position
 */
private boolean nextString(int state, int position) {
  /* 
   * the next lexicographic character must be greater than the existing
   * character, if it exists.
   */
  int c = 0;
  if (position < seekBytesRef.length) {
    c = seekBytesRef.bytes[position] & 0xff;
    // if the next byte is 0xff and is not part of the useful portion,
    // then by definition it puts us in a reject state, and therefore this
    // path is dead. there cannot be any higher transitions. backtrack.
    if (c++ == 0xff)
      return false;
  }

  seekBytesRef.length = position;
  visited[state] = curGen;

  Transition transitions[] = allTransitions[state];

  // find the minimal path (lexicographic order) that is >= c

  for (int i = 0; i < transitions.length; i++) {
    Transition transition = transitions[i];
    if (transition.getMax() >= c) {
      int nextChar = Math.max(c, transition.getMin());
      // append either the next sequential char, or the minimum transition
      seekBytesRef.grow(seekBytesRef.length + 1);
      seekBytesRef.length++;
      seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) nextChar;
      state = transition.getDest().getNumber();
      /* 
       * as long as is possible, continue down the minimal path in
       * lexicographic order. if a loop or accept state is encountered, stop.
       */
      while (visited[state] != curGen && !runAutomaton.isAccept(state)) {
        visited[state] = curGen;
        /* 
         * Note: we work with a DFA with no transitions to dead states.
         * so the below is ok, if it is not an accept state,
         * then there MUST be at least one transition.
         */
        transition = allTransitions[state][0];
        state = transition.getDest().getNumber();

        // append the minimum transition
        seekBytesRef.grow(seekBytesRef.length + 1);
        seekBytesRef.length++;
        seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) transition.getMin();

        // we found a loop, record it for faster enumeration
        if (!finite && !linear && visited[state] == curGen) {
          setLinear(seekBytesRef.length-1);
        }
      }
      return true;
    }
  }
  return false;
}
项目:read-open-source-code    文件:TermAutomatonQuery.java   
/**
 * Call this once you are done adding states/transitions.
 * @param maxDeterminizedStates Maximum number of states created when
 *   determinizing the automaton.  Higher numbers allow this operation to
 *   consume more memory but allow more complex automatons.
 */
public void finish(int maxDeterminizedStates) {
  Automaton automaton = builder.finish();

  // System.out.println("before det:\n" + automaton.toDot());

  Transition t = new Transition();

  // TODO: should we add "eps back to initial node" for all states,
  // and det that?  then we don't need to revisit initial node at
  // every position?  but automaton could blow up?  And, this makes it
  // harder to skip useless positions at search time?

  if (anyTermID != -1) {

    // Make sure there are no leading or trailing ANY:
    int count = automaton.initTransition(0, t);
    for(int i=0;i<count;i++) {
      automaton.getNextTransition(t);
      if (anyTermID >= t.min && anyTermID <= t.max) {
        throw new IllegalStateException("automaton cannot lead with an ANY transition");
      }
    }

    int numStates = automaton.getNumStates();
    for(int i=0;i<numStates;i++) {
      count = automaton.initTransition(i, t);
      for(int j=0;j<count;j++) {
        automaton.getNextTransition(t);
        if (automaton.isAccept(t.dest) && anyTermID >= t.min && anyTermID <= t.max) {
          throw new IllegalStateException("automaton cannot end with an ANY transition");
        }
      }
    }

    int termCount = termToID.size();

    // We have to carefully translate these transitions so automaton
    // realizes they also match all other terms:
    Automaton newAutomaton = new Automaton();
    for(int i=0;i<numStates;i++) {
      newAutomaton.createState();
      newAutomaton.setAccept(i, automaton.isAccept(i));
    }

    for(int i=0;i<numStates;i++) {
      count = automaton.initTransition(i, t);
      for(int j=0;j<count;j++) {
        automaton.getNextTransition(t);
        int min, max;
        if (t.min <= anyTermID && anyTermID <= t.max) {
          // Match any term
          min = 0;
          max = termCount-1;
        } else {
          min = t.min;
          max = t.max;
        }
        newAutomaton.addTransition(t.source, t.dest, min, max);
      }
    }
    newAutomaton.finishState();
    automaton = newAutomaton;
  }

  det = Operations.removeDeadStates(Operations.determinize(automaton,
    maxDeterminizedStates));
}
项目:Maskana-Gestor-de-Conocimiento    文件:AutomatonTermsEnum.java   
/**
 * Returns the next String in lexicographic order that will not put
 * the machine into a reject state. 
 * 
 * This method traverses the DFA from the given position in the String,
 * starting at the given state.
 * 
 * If this cannot satisfy the machine, returns false. This method will
 * walk the minimal path, in lexicographic order, as long as possible.
 * 
 * If this method returns false, then there might still be more solutions,
 * it is necessary to backtrack to find out.
 * 
 * @param state current non-reject state
 * @param position useful portion of the string
 * @return true if more possible solutions exist for the DFA from this
 *         position
 */
private boolean nextString(int state, int position) {
  /* 
   * the next lexicographic character must be greater than the existing
   * character, if it exists.
   */
  int c = 0;
  if (position < seekBytesRef.length) {
    c = seekBytesRef.bytes[position] & 0xff;
    // if the next byte is 0xff and is not part of the useful portion,
    // then by definition it puts us in a reject state, and therefore this
    // path is dead. there cannot be any higher transitions. backtrack.
    if (c++ == 0xff)
      return false;
  }

  seekBytesRef.length = position;
  visited[state] = curGen;

  Transition transitions[] = allTransitions[state];

  // find the minimal path (lexicographic order) that is >= c

  for (int i = 0; i < transitions.length; i++) {
    Transition transition = transitions[i];
    if (transition.getMax() >= c) {
      int nextChar = Math.max(c, transition.getMin());
      // append either the next sequential char, or the minimum transition
      seekBytesRef.grow(seekBytesRef.length + 1);
      seekBytesRef.length++;
      seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) nextChar;
      state = transition.getDest().getNumber();
      /* 
       * as long as is possible, continue down the minimal path in
       * lexicographic order. if a loop or accept state is encountered, stop.
       */
      while (visited[state] != curGen && !runAutomaton.isAccept(state)) {
        visited[state] = curGen;
        /* 
         * Note: we work with a DFA with no transitions to dead states.
         * so the below is ok, if it is not an accept state,
         * then there MUST be at least one transition.
         */
        transition = allTransitions[state][0];
        state = transition.getDest().getNumber();

        // append the minimum transition
        seekBytesRef.grow(seekBytesRef.length + 1);
        seekBytesRef.length++;
        seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) transition.getMin();

        // we found a loop, record it for faster enumeration
        if (!finite && !linear && visited[state] == curGen) {
          setLinear(seekBytesRef.length-1);
        }
      }
      return true;
    }
  }
  return false;
}