Java 类org.apache.lucene.util.automaton.BasicOperations 实例源码

项目:NYBC    文件:AnalyzingSuggester.java   
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
  // TODO: is there a Reader from a CharSequence?
  // Turn tokenstream into automaton:
  TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()));
  Automaton automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
  ts.end();
  ts.close();

  // TODO: we could use the end offset to "guess"
  // whether the final token was a partial token; this
  // would only be a heuristic ... but maybe an OK one.
  // This way we could eg differentiate "net" from "net ",
  // which we can't today...

  replaceSep(automaton);

  // TODO: we can optimize this somewhat by determinizing
  // while we convert
  BasicOperations.determinize(automaton);
  return automaton;
}
项目:NYBC    文件:TestDuelingAnalyzers.java   
@Override
public void setUp() throws Exception {
  super.setUp();
  // build an automaton matching this jvm's letter definition
  State initial = new State();
  State accept = new State();
  accept.setAccept(true);
  for (int i = 0; i <= 0x10FFFF; i++) {
    if (Character.isLetter(i)) {
      initial.addTransition(new Transition(i, i, accept));
    }
  }
  Automaton single = new Automaton(initial);
  single.reduce();
  Automaton repeat = BasicOperations.repeat(single);
  jvmLetter = new CharacterRunAutomaton(repeat);
}
项目:NYBC    文件:AutomatonQuery.java   
@Override
public boolean equals(Object obj) {
  if (this == obj)
    return true;
  if (!super.equals(obj))
    return false;
  if (getClass() != obj.getClass())
    return false;
  AutomatonQuery other = (AutomatonQuery) obj;
  if (automaton == null) {
    if (other.automaton != null)
      return false;
  } else if (!BasicOperations.sameLanguage(automaton, other.automaton))
    return false;
  if (term == null) {
    if (other.term != null)
      return false;
  } else if (!term.equals(other.term))
    return false;
  return true;
}
项目:NYBC    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:NYBC    文件:TestRegexpQuery.java   
public void testCustomProvider() throws IOException {
  AutomatonProvider myProvider = new AutomatonProvider() {
    // automaton that matches quick or brown
    private Automaton quickBrownAutomaton = BasicOperations.union(Arrays
        .asList(BasicAutomata.makeString("quick"),
        BasicAutomata.makeString("brown"),
        BasicAutomata.makeString("bob")));

    @Override
    public Automaton getAutomaton(String name) {
      if (name.equals("quickBrown")) return quickBrownAutomaton;
      else return null;
    }
  };
  RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
      myProvider);
  assertEquals(1, searcher.search(query, 5).totalHits);
}
项目:NYBC    文件:TestAutomatonQuery.java   
/**
 * Test some very simple automata.
 */
public void testBasicAutomata() throws IOException {
  assertAutomatonHits(0, BasicAutomata.makeEmpty());
  assertAutomatonHits(0, BasicAutomata.makeEmptyString());
  assertAutomatonHits(2, BasicAutomata.makeAnyChar());
  assertAutomatonHits(3, BasicAutomata.makeAnyString());
  assertAutomatonHits(2, BasicAutomata.makeString("doc"));
  assertAutomatonHits(1, BasicAutomata.makeChar('a'));
  assertAutomatonHits(2, BasicAutomata.makeCharRange('a', 'b'));
  assertAutomatonHits(2, BasicAutomata.makeInterval(1233, 2346, 0));
  assertAutomatonHits(1, BasicAutomata.makeInterval(0, 2000, 0));
  assertAutomatonHits(2, BasicOperations.union(BasicAutomata.makeChar('a'),
      BasicAutomata.makeChar('b')));
  assertAutomatonHits(0, BasicOperations.intersection(BasicAutomata
      .makeChar('a'), BasicAutomata.makeChar('b')));
  assertAutomatonHits(1, BasicOperations.minus(BasicAutomata.makeCharRange('a', 'b'), 
      BasicAutomata.makeChar('a')));
}
项目:NYBC    文件:TestGraphTokenizers.java   
public void testOverlappedTokensLattice() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("xyz", 0, 2),
        token("def", 1, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("xyz");
    final Automaton a2 = join("abc", "def");

    final Automaton expected = BasicOperations.union(a1, a2);
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:TestGraphTokenizers.java   
public void testSynOverHole() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("a", 1, 1),
        token("X", 0, 2),
        token("b", 2, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicOperations.union(
                                               join(s2a("a"), SEP_A, HOLE_A),
                                               BasicAutomata.makeString("X"));
    final Automaton expected = BasicOperations.concatenate(a1,
                                                           join(SEP_A, s2a("b")));
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:TestGraphTokenizers.java   
public void testOverlappedTokensLattice2() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("xyz", 0, 3),
        token("def", 1, 1),
        token("ghi", 1, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("xyz");
    final Automaton a2 = join("abc", "def", "ghi");
    final Automaton expected = BasicOperations.union(a1, a2);
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:read-open-source-code    文件:AnalyzingSuggester.java   
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
  // TODO: is there a Reader from a CharSequence?
  // Turn tokenstream into automaton:
  Automaton automaton = null;
  TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
  try {
    automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }

  // TODO: we could use the end offset to "guess"
  // whether the final token was a partial token; this
  // would only be a heuristic ... but maybe an OK one.
  // This way we could eg differentiate "net" from "net ",
  // which we can't today...

  replaceSep(automaton);

  // TODO: we can optimize this somewhat by determinizing
  // while we convert
  BasicOperations.determinize(automaton);
  return automaton;
}
项目:read-open-source-code    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
  // TODO: is there a Reader from a CharSequence?
  // Turn tokenstream into automaton:
  Automaton automaton = null;
  TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
  try {
    automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }

  // TODO: we could use the end offset to "guess"
  // whether the final token was a partial token; this
  // would only be a heuristic ... but maybe an OK one.
  // This way we could eg differentiate "net" from "net ",
  // which we can't today...

  replaceSep(automaton);

  // TODO: we can optimize this somewhat by determinizing
  // while we convert
  BasicOperations.determinize(automaton);
  return automaton;
}
项目:read-open-source-code    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:Maskana-Gestor-de-Conocimiento    文件:AnalyzingSuggester.java   
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
  // TODO: is there a Reader from a CharSequence?
  // Turn tokenstream into automaton:
  Automaton automaton = null;
  TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
  try {
    automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }

  // TODO: we could use the end offset to "guess"
  // whether the final token was a partial token; this
  // would only be a heuristic ... but maybe an OK one.
  // This way we could eg differentiate "net" from "net ",
  // which we can't today...

  replaceSep(automaton);

  // TODO: we can optimize this somewhat by determinizing
  // while we convert
  BasicOperations.determinize(automaton);
  return automaton;
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestDuelingAnalyzers.java   
@Override
public void setUp() throws Exception {
  super.setUp();
  // build an automaton matching this jvm's letter definition
  State initial = new State();
  State accept = new State();
  accept.setAccept(true);
  for (int i = 0; i <= 0x10FFFF; i++) {
    if (Character.isLetter(i)) {
      initial.addTransition(new Transition(i, i, accept));
    }
  }
  Automaton single = new Automaton(initial);
  single.reduce();
  Automaton repeat = BasicOperations.repeat(single);
  jvmLetter = new CharacterRunAutomaton(repeat);
}
项目:Maskana-Gestor-de-Conocimiento    文件:AutomatonQuery.java   
@Override
public boolean equals(Object obj) {
  if (this == obj)
    return true;
  if (!super.equals(obj))
    return false;
  if (getClass() != obj.getClass())
    return false;
  AutomatonQuery other = (AutomatonQuery) obj;
  if (automaton == null) {
    if (other.automaton != null)
      return false;
  } else if (!BasicOperations.sameLanguage(automaton, other.automaton))
    return false;
  if (term == null) {
    if (other.term != null)
      return false;
  } else if (!term.equals(other.term))
    return false;
  return true;
}
项目:Maskana-Gestor-de-Conocimiento    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestRegexpQuery.java   
public void testCustomProvider() throws IOException {
  AutomatonProvider myProvider = new AutomatonProvider() {
    // automaton that matches quick or brown
    private Automaton quickBrownAutomaton = BasicOperations.union(Arrays
        .asList(BasicAutomata.makeString("quick"),
        BasicAutomata.makeString("brown"),
        BasicAutomata.makeString("bob")));

    @Override
    public Automaton getAutomaton(String name) {
      if (name.equals("quickBrown")) return quickBrownAutomaton;
      else return null;
    }
  };
  RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
      myProvider);
  assertEquals(1, searcher.search(query, 5).totalHits);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestAutomatonQuery.java   
/**
 * Test some very simple automata.
 */
public void testBasicAutomata() throws IOException {
  assertAutomatonHits(0, BasicAutomata.makeEmpty());
  assertAutomatonHits(0, BasicAutomata.makeEmptyString());
  assertAutomatonHits(2, BasicAutomata.makeAnyChar());
  assertAutomatonHits(3, BasicAutomata.makeAnyString());
  assertAutomatonHits(2, BasicAutomata.makeString("doc"));
  assertAutomatonHits(1, BasicAutomata.makeChar('a'));
  assertAutomatonHits(2, BasicAutomata.makeCharRange('a', 'b'));
  assertAutomatonHits(2, BasicAutomata.makeInterval(1233, 2346, 0));
  assertAutomatonHits(1, BasicAutomata.makeInterval(0, 2000, 0));
  assertAutomatonHits(2, BasicOperations.union(BasicAutomata.makeChar('a'),
      BasicAutomata.makeChar('b')));
  assertAutomatonHits(0, BasicOperations.intersection(BasicAutomata
      .makeChar('a'), BasicAutomata.makeChar('b')));
  assertAutomatonHits(1, BasicOperations.minus(BasicAutomata.makeCharRange('a', 'b'), 
      BasicAutomata.makeChar('a')));
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestGraphTokenizers.java   
public void testOverlappedTokensLattice() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("xyz", 0, 2),
        token("def", 1, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("xyz");
    final Automaton a2 = join("abc", "def");

    final Automaton expected = BasicOperations.union(a1, a2);
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:Maskana-Gestor-de-Conocimiento    文件:TestGraphTokenizers.java   
public void testSynOverHole() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("a", 1, 1),
        token("X", 0, 2),
        token("b", 2, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicOperations.union(
                                               join(s2a("a"), SEP_A, HOLE_A),
                                               BasicAutomata.makeString("X"));
    final Automaton expected = BasicOperations.concatenate(a1,
                                                           join(SEP_A, s2a("b")));
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:Maskana-Gestor-de-Conocimiento    文件:TestGraphTokenizers.java   
public void testOverlappedTokensLattice2() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("xyz", 0, 3),
        token("def", 1, 1),
        token("ghi", 1, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("xyz");
    final Automaton a2 = join("abc", "def", "ghi");
    final Automaton expected = BasicOperations.union(a1, a2);
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:TestAutomatonQuery.java   
/**
 * Test that a nondeterministic automaton works correctly. (It should will be
 * determinized)
 */
public void testNFA() throws IOException {
  // accept this or three, the union is an NFA (two transitions for 't' from
  // initial state)
  Automaton nfa = BasicOperations.union(BasicAutomata.makeString("this"),
      BasicAutomata.makeString("three"));
  assertAutomatonHits(2, nfa);
}
项目:NYBC    文件:TestAutomatonQuery.java   
public void testEquals() {
  AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), BasicAutomata
      .makeString("foobar"));
  // reference to a1
  AutomatonQuery a2 = a1;
  // same as a1 (accepts the same language, same term)
  AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"), BasicOperations
      .concatenate(BasicAutomata.makeString("foo"), BasicAutomata
          .makeString("bar")));
  // different than a1 (same term, but different language)
  AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"), BasicAutomata
      .makeString("different"));
  // different than a1 (different term, same language)
  AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"), BasicAutomata
      .makeString("foobar"));

  assertEquals(a1, a2);

  assertEquals(a1, a3);

  // different class
  AutomatonQuery w1 = new WildcardQuery(newTerm("foobar"));
  // different class
  AutomatonQuery w2 = new RegexpQuery(newTerm("foobar"));

  assertFalse(a1.equals(w1));
  assertFalse(a1.equals(w2));
  assertFalse(w1.equals(w2));
  assertFalse(a1.equals(a4));
  assertFalse(a1.equals(a5));
  assertFalse(a1.equals(null));
}
项目:NYBC    文件:TestAutomatonQuery.java   
/**
 * Test that rewriting to a prefix query works as expected, preserves
 * MultiTermQuery semantics.
 */
public void testRewritePrefix() throws IOException {
  Automaton pfx = BasicAutomata.makeString("do");
  pfx.expandSingleton(); // expand singleton representation for testing
  Automaton prefixAutomaton = BasicOperations.concatenate(pfx, BasicAutomata
      .makeAnyString());
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
  Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
  assertTrue(aq.getTermsEnum(terms) instanceof PrefixTermsEnum);
  assertEquals(3, automatonQueryNrHits(aq));
}
项目:NYBC    文件:TestMockAnalyzer.java   
/** Test a configuration that behaves a lot like KeepWordFilter */
public void testKeep() throws Exception {
  CharacterRunAutomaton keepWords = 
    new CharacterRunAutomaton(
        BasicOperations.complement(
            Automaton.union(
                Arrays.asList(BasicAutomata.makeString("foo"), BasicAutomata.makeString("bar")))));
  Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, keepWords, true);
  assertAnalyzesTo(a, "quick foo brown bar bar fox foo",
      new String[] { "foo", "bar", "bar", "foo" },
      new int[] { 2, 2, 1, 2 });
}
项目:NYBC    文件:TestGraphTokenizers.java   
public void testSingleToken() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton expected = BasicAutomata.makeString("abc");
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:TestGraphTokenizers.java   
public void testMultipleHoles() throws Exception {
  final TokenStream ts = new CannedTokenStream(
    new Token[] {
      token("a", 1, 1),
      token("b", 3, 1),
    });
  final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
  final Automaton expected = join(s2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, s2a("b")); 
  assertTrue(BasicOperations.sameLanguage(expected, actual));
}
项目:NYBC    文件:TestGraphTokenizers.java   
public void testSynOverMultipleHoles() throws Exception {
  final TokenStream ts = new CannedTokenStream(
    new Token[] {
      token("a", 1, 1),
      token("x", 0, 3),
      token("b", 3, 1),
    });
  final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
  final Automaton a1 = join(s2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, s2a("b")); 
  final Automaton a2 = join(s2a("x"), SEP_A, s2a("b")); 
  final Automaton expected = BasicOperations.union(a1, a2);
  assertTrue(BasicOperations.sameLanguage(expected, actual));
}
项目:NYBC    文件:TestGraphTokenizers.java   
private Automaton join(String ... strings) {
  List<Automaton> as = new ArrayList<Automaton>();
  for(String s : strings) {
    as.add(BasicAutomata.makeString(s));
    as.add(SEP_A);
  }
  as.remove(as.size()-1);
  return BasicOperations.concatenate(as);
}
项目:NYBC    文件:TestGraphTokenizers.java   
public void testTwoTokens() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("def", 1, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton expected =  join("abc", "def");

    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:TestGraphTokenizers.java   
public void testHole() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("def", 2, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);

    final Automaton expected = join(s2a("abc"), SEP_A, HOLE_A, SEP_A, s2a("def"));

    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:TestGraphTokenizers.java   
public void testOverlappedTokensSausage() throws Exception {

    // Two tokens on top of each other (sausage):
    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("xyz", 0, 1)
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("abc");
    final Automaton a2 = BasicAutomata.makeString("xyz");
    final Automaton expected = BasicOperations.union(a1, a2);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:TestGraphTokenizers.java   
public void testSynOverHole2() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("xyz", 1, 1),
        token("abc", 0, 3),
        token("def", 2, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton expected = BasicOperations.union(
                                                     join(s2a("xyz"), SEP_A, HOLE_A, SEP_A, s2a("def")),
                                                     BasicAutomata.makeString("abc"));
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:TestGraphTokenizers.java   
public void testStartsWithHole() throws Exception {
  final TokenStream ts = new CannedTokenStream(
    new Token[] {
      token("abc", 2, 1),
    });
  final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
  final Automaton expected = join(HOLE_A, SEP_A, s2a("abc"));
  //toDot(actual);
  assertTrue(BasicOperations.sameLanguage(expected, actual));
}
项目:NYBC    文件:TestGraphTokenizers.java   
public void testSynHangingOverEnd() throws Exception {
  final TokenStream ts = new CannedTokenStream(
    new Token[] {
      token("a", 1, 1),
      token("X", 0, 10),
    });
  final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
  final Automaton expected = BasicOperations.union(BasicAutomata.makeString("a"),
                                                   BasicAutomata.makeString("X"));
  assertTrue(BasicOperations.sameLanguage(expected, actual));
}
项目:read-open-source-code    文件:FuzzySuggester.java   
@Override
protected Automaton convertAutomaton(Automaton a) {
  if (unicodeAware) {
    Automaton utf8automaton = new UTF32ToUTF8().convert(a);
    BasicOperations.determinize(utf8automaton);
    return utf8automaton;
  } else {
    return a;
  }
}
项目:read-open-source-code    文件:FuzzySuggester.java   
@Override
protected Automaton convertAutomaton(Automaton a) {
  if (unicodeAware) {
    Automaton utf8automaton = new UTF32ToUTF8().convert(a);
    BasicOperations.determinize(utf8automaton);
    return utf8automaton;
  } else {
    return a;
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:FuzzySuggester.java   
@Override
protected Automaton convertAutomaton(Automaton a) {
  if (unicodeAware) {
    Automaton utf8automaton = new UTF32ToUTF8().convert(a);
    BasicOperations.determinize(utf8automaton);
    return utf8automaton;
  } else {
    return a;
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestAutomatonQuery.java   
/**
 * Test that a nondeterministic automaton works correctly. (It should will be
 * determinized)
 */
public void testNFA() throws IOException {
  // accept this or three, the union is an NFA (two transitions for 't' from
  // initial state)
  Automaton nfa = BasicOperations.union(BasicAutomata.makeString("this"),
      BasicAutomata.makeString("three"));
  assertAutomatonHits(2, nfa);
}