Java 类org.apache.lucene.util.automaton.BasicAutomata 实例源码

项目:NYBC    文件:TestPrecedenceQueryParser.java   
public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet, true);

  PrecedenceQueryParser qp = new PrecedenceQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);
  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).parse("the^3",
      "field");
  assertNotNull(q);
}
项目:NYBC    文件:TestQPHelper.java   
public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet, true);
  StandardQueryParser qp = new StandardQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);

  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  StandardQueryParser qp2 = new StandardQueryParser();
  qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true));

  q = qp2.parse("the^3", "field");
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertEquals("", q.toString());
  assertEquals(1.0f, q.getBoost(), 0.01f);
}
项目:NYBC    文件:QueryParserTestBase.java   
public void testBoost()
  throws Exception {
  CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords, true);
  CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer);
  Query q = getQuery("on^1.0",qp);
  assertNotNull(q);
  q = getQuery("\"hello\"^2.0",qp);
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("hello^2.0",qp);
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("\"on\"^1.0",qp);
  assertNotNull(q);

  Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); 
  CommonQueryParserConfiguration qp2 = getParserConfig(a2);
  q = getQuery("the^3", qp2);
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertEquals("", q.toString());
  assertEquals(1.0f, q.getBoost(), 0.01f);
}
项目:NYBC    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:NYBC    文件:TestRegexpQuery.java   
public void testCustomProvider() throws IOException {
  AutomatonProvider myProvider = new AutomatonProvider() {
    // automaton that matches quick or brown
    private Automaton quickBrownAutomaton = BasicOperations.union(Arrays
        .asList(BasicAutomata.makeString("quick"),
        BasicAutomata.makeString("brown"),
        BasicAutomata.makeString("bob")));

    @Override
    public Automaton getAutomaton(String name) {
      if (name.equals("quickBrown")) return quickBrownAutomaton;
      else return null;
    }
  };
  RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
      myProvider);
  assertEquals(1, searcher.search(query, 5).totalHits);
}
项目:NYBC    文件:TestAutomatonQuery.java   
/**
 * Test some very simple automata.
 */
public void testBasicAutomata() throws IOException {
  assertAutomatonHits(0, BasicAutomata.makeEmpty());
  assertAutomatonHits(0, BasicAutomata.makeEmptyString());
  assertAutomatonHits(2, BasicAutomata.makeAnyChar());
  assertAutomatonHits(3, BasicAutomata.makeAnyString());
  assertAutomatonHits(2, BasicAutomata.makeString("doc"));
  assertAutomatonHits(1, BasicAutomata.makeChar('a'));
  assertAutomatonHits(2, BasicAutomata.makeCharRange('a', 'b'));
  assertAutomatonHits(2, BasicAutomata.makeInterval(1233, 2346, 0));
  assertAutomatonHits(1, BasicAutomata.makeInterval(0, 2000, 0));
  assertAutomatonHits(2, BasicOperations.union(BasicAutomata.makeChar('a'),
      BasicAutomata.makeChar('b')));
  assertAutomatonHits(0, BasicOperations.intersection(BasicAutomata
      .makeChar('a'), BasicAutomata.makeChar('b')));
  assertAutomatonHits(1, BasicOperations.minus(BasicAutomata.makeCharRange('a', 'b'), 
      BasicAutomata.makeChar('a')));
}
项目:NYBC    文件:TestGraphTokenizers.java   
public void testOverlappedTokensLattice() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("xyz", 0, 2),
        token("def", 1, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("xyz");
    final Automaton a2 = join("abc", "def");

    final Automaton expected = BasicOperations.union(a1, a2);
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:TestGraphTokenizers.java   
public void testSynOverHole() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("a", 1, 1),
        token("X", 0, 2),
        token("b", 2, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicOperations.union(
                                               join(s2a("a"), SEP_A, HOLE_A),
                                               BasicAutomata.makeString("X"));
    final Automaton expected = BasicOperations.concatenate(a1,
                                                           join(SEP_A, s2a("b")));
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:TestGraphTokenizers.java   
public void testOverlappedTokensLattice2() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("xyz", 0, 3),
        token("def", 1, 1),
        token("ghi", 1, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("xyz");
    final Automaton a2 = join("abc", "def", "ghi");
    final Automaton expected = BasicOperations.union(a1, a2);
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:read-open-source-code    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:read-open-source-code    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestPrecedenceQueryParser.java   
public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);

  PrecedenceQueryParser qp = new PrecedenceQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);
  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).parse("the^3",
      "field");
  assertNotNull(q);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestQPHelper.java   
public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
  StandardQueryParser qp = new StandardQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);

  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  StandardQueryParser qp2 = new StandardQueryParser();
  qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));

  q = qp2.parse("the^3", "field");
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertEquals("", q.toString());
  assertEquals(1.0f, q.getBoost(), 0.01f);
}
项目:Maskana-Gestor-de-Conocimiento    文件:QueryParserTestBase.java   
public void testBoost()
  throws Exception {
  CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
  CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer);
  Query q = getQuery("on^1.0",qp);
  assertNotNull(q);
  q = getQuery("\"hello\"^2.0",qp);
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("hello^2.0",qp);
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("\"on\"^1.0",qp);
  assertNotNull(q);

  Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); 
  CommonQueryParserConfiguration qp2 = getParserConfig(a2);
  q = getQuery("the^3", qp2);
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertEquals("", q.toString());
  assertEquals(1.0f, q.getBoost(), 0.01f);
}
项目:Maskana-Gestor-de-Conocimiento    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestRegexpQuery.java   
public void testCustomProvider() throws IOException {
  AutomatonProvider myProvider = new AutomatonProvider() {
    // automaton that matches quick or brown
    private Automaton quickBrownAutomaton = BasicOperations.union(Arrays
        .asList(BasicAutomata.makeString("quick"),
        BasicAutomata.makeString("brown"),
        BasicAutomata.makeString("bob")));

    @Override
    public Automaton getAutomaton(String name) {
      if (name.equals("quickBrown")) return quickBrownAutomaton;
      else return null;
    }
  };
  RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
      myProvider);
  assertEquals(1, searcher.search(query, 5).totalHits);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestAutomatonQuery.java   
/**
 * Test some very simple automata.
 */
public void testBasicAutomata() throws IOException {
  assertAutomatonHits(0, BasicAutomata.makeEmpty());
  assertAutomatonHits(0, BasicAutomata.makeEmptyString());
  assertAutomatonHits(2, BasicAutomata.makeAnyChar());
  assertAutomatonHits(3, BasicAutomata.makeAnyString());
  assertAutomatonHits(2, BasicAutomata.makeString("doc"));
  assertAutomatonHits(1, BasicAutomata.makeChar('a'));
  assertAutomatonHits(2, BasicAutomata.makeCharRange('a', 'b'));
  assertAutomatonHits(2, BasicAutomata.makeInterval(1233, 2346, 0));
  assertAutomatonHits(1, BasicAutomata.makeInterval(0, 2000, 0));
  assertAutomatonHits(2, BasicOperations.union(BasicAutomata.makeChar('a'),
      BasicAutomata.makeChar('b')));
  assertAutomatonHits(0, BasicOperations.intersection(BasicAutomata
      .makeChar('a'), BasicAutomata.makeChar('b')));
  assertAutomatonHits(1, BasicOperations.minus(BasicAutomata.makeCharRange('a', 'b'), 
      BasicAutomata.makeChar('a')));
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestGraphTokenizers.java   
public void testOverlappedTokensLattice() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("xyz", 0, 2),
        token("def", 1, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("xyz");
    final Automaton a2 = join("abc", "def");

    final Automaton expected = BasicOperations.union(a1, a2);
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:Maskana-Gestor-de-Conocimiento    文件:TestGraphTokenizers.java   
public void testSynOverHole() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("a", 1, 1),
        token("X", 0, 2),
        token("b", 2, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicOperations.union(
                                               join(s2a("a"), SEP_A, HOLE_A),
                                               BasicAutomata.makeString("X"));
    final Automaton expected = BasicOperations.concatenate(a1,
                                                           join(SEP_A, s2a("b")));
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:Maskana-Gestor-de-Conocimiento    文件:TestGraphTokenizers.java   
public void testOverlappedTokensLattice2() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("xyz", 0, 3),
        token("def", 1, 1),
        token("ghi", 1, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("xyz");
    final Automaton a2 = join("abc", "def", "ghi");
    final Automaton expected = BasicOperations.union(a1, a2);
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:SearchEquivalenceTestBase.java   
@BeforeClass
public static void beforeClass() throws Exception {
  Random random = random();
  directory = newDirectory();
  stopword = "" + randomChar();
  CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.makeString(stopword));
  analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset, true);
  RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer);
  Document doc = new Document();
  Field id = new StringField("id", "", Field.Store.NO);
  Field field = new TextField("field", "", Field.Store.NO);
  doc.add(id);
  doc.add(field);

  // index some docs
  int numDocs = atLeast(1000);
  for (int i = 0; i < numDocs; i++) {
    id.setStringValue(Integer.toString(i));
    field.setStringValue(randomFieldContents());
    iw.addDocument(doc);
  }

  // delete some docs
  int numDeletes = numDocs/20;
  for (int i = 0; i < numDeletes; i++) {
    Term toDelete = new Term("id", Integer.toString(random.nextInt(numDocs)));
    if (random.nextBoolean()) {
      iw.deleteDocuments(toDelete);
    } else {
      iw.deleteDocuments(new TermQuery(toDelete));
    }
  }

  reader = iw.getReader();
  s1 = newSearcher(reader);
  s2 = newSearcher(reader);
  iw.close();
}
项目:NYBC    文件:TestAutomatonQuery.java   
/**
 * Test that a nondeterministic automaton works correctly. (It should will be
 * determinized)
 */
public void testNFA() throws IOException {
  // accept this or three, the union is an NFA (two transitions for 't' from
  // initial state)
  Automaton nfa = BasicOperations.union(BasicAutomata.makeString("this"),
      BasicAutomata.makeString("three"));
  assertAutomatonHits(2, nfa);
}
项目:NYBC    文件:TestAutomatonQuery.java   
public void testEquals() {
  AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), BasicAutomata
      .makeString("foobar"));
  // reference to a1
  AutomatonQuery a2 = a1;
  // same as a1 (accepts the same language, same term)
  AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"), BasicOperations
      .concatenate(BasicAutomata.makeString("foo"), BasicAutomata
          .makeString("bar")));
  // different than a1 (same term, but different language)
  AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"), BasicAutomata
      .makeString("different"));
  // different than a1 (different term, same language)
  AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"), BasicAutomata
      .makeString("foobar"));

  assertEquals(a1, a2);

  assertEquals(a1, a3);

  // different class
  AutomatonQuery w1 = new WildcardQuery(newTerm("foobar"));
  // different class
  AutomatonQuery w2 = new RegexpQuery(newTerm("foobar"));

  assertFalse(a1.equals(w1));
  assertFalse(a1.equals(w2));
  assertFalse(w1.equals(w2));
  assertFalse(a1.equals(a4));
  assertFalse(a1.equals(a5));
  assertFalse(a1.equals(null));
}
项目:NYBC    文件:TestAutomatonQuery.java   
/**
 * Test that rewriting to a single term works as expected, preserves
 * MultiTermQuery semantics.
 */
public void testRewriteSingleTerm() throws IOException {
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata
      .makeString("piece"));
  Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
  assertTrue(aq.getTermsEnum(terms) instanceof SingleTermsEnum);
  assertEquals(1, automatonQueryNrHits(aq));
}
项目:NYBC    文件:TestAutomatonQuery.java   
/**
 * Test that rewriting to a prefix query works as expected, preserves
 * MultiTermQuery semantics.
 */
public void testRewritePrefix() throws IOException {
  Automaton pfx = BasicAutomata.makeString("do");
  pfx.expandSingleton(); // expand singleton representation for testing
  Automaton prefixAutomaton = BasicOperations.concatenate(pfx, BasicAutomata
      .makeAnyString());
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
  Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
  assertTrue(aq.getTermsEnum(terms) instanceof PrefixTermsEnum);
  assertEquals(3, automatonQueryNrHits(aq));
}
项目:NYBC    文件:TestAutomatonQuery.java   
/**
 * Test handling of the empty language
 */
public void testEmptyOptimization() throws IOException {
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata
      .makeEmpty());
  // not yet available: assertTrue(aq.getEnum(searcher.getIndexReader())
  // instanceof EmptyTermEnum);
  Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
  assertSame(TermsEnum.EMPTY, aq.getTermsEnum(terms));
  assertEquals(0, automatonQueryNrHits(aq));
}
项目:NYBC    文件:TestMockAnalyzer.java   
/** Test a configuration that behaves a lot like KeepWordFilter */
public void testKeep() throws Exception {
  CharacterRunAutomaton keepWords = 
    new CharacterRunAutomaton(
        BasicOperations.complement(
            Automaton.union(
                Arrays.asList(BasicAutomata.makeString("foo"), BasicAutomata.makeString("bar")))));
  Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, keepWords, true);
  assertAnalyzesTo(a, "quick foo brown bar bar fox foo",
      new String[] { "foo", "bar", "bar", "foo" },
      new int[] { 2, 2, 1, 2 });
}
项目:NYBC    文件:TestGraphTokenizers.java   
public void testSingleToken() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton expected = BasicAutomata.makeString("abc");
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:TestGraphTokenizers.java   
private Automaton join(String ... strings) {
  List<Automaton> as = new ArrayList<Automaton>();
  for(String s : strings) {
    as.add(BasicAutomata.makeString(s));
    as.add(SEP_A);
  }
  as.remove(as.size()-1);
  return BasicOperations.concatenate(as);
}
项目:NYBC    文件:TestGraphTokenizers.java   
public void testOverlappedTokensSausage() throws Exception {

    // Two tokens on top of each other (sausage):
    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("xyz", 0, 1)
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("abc");
    final Automaton a2 = BasicAutomata.makeString("xyz");
    final Automaton expected = BasicOperations.union(a1, a2);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:TestGraphTokenizers.java   
public void testSynOverHole2() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("xyz", 1, 1),
        token("abc", 0, 3),
        token("def", 2, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton expected = BasicOperations.union(
                                                     join(s2a("xyz"), SEP_A, HOLE_A, SEP_A, s2a("def")),
                                                     BasicAutomata.makeString("abc"));
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
项目:NYBC    文件:TestGraphTokenizers.java   
public void testSynHangingOverEnd() throws Exception {
  final TokenStream ts = new CannedTokenStream(
    new Token[] {
      token("a", 1, 1),
      token("X", 0, 10),
    });
  final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
  final Automaton expected = BasicOperations.union(BasicAutomata.makeString("a"),
                                                   BasicAutomata.makeString("X"));
  assertTrue(BasicOperations.sameLanguage(expected, actual));
}
项目:Maskana-Gestor-de-Conocimiento    文件:SearchEquivalenceTestBase.java   
@BeforeClass
public static void beforeClass() throws Exception {
  Random random = random();
  directory = newDirectory();
  stopword = "" + randomChar();
  CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.makeString(stopword));
  analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
  RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer);
  Document doc = new Document();
  Field id = new StringField("id", "", Field.Store.NO);
  Field field = new TextField("field", "", Field.Store.NO);
  doc.add(id);
  doc.add(field);

  // index some docs
  int numDocs = atLeast(1000);
  for (int i = 0; i < numDocs; i++) {
    id.setStringValue(Integer.toString(i));
    field.setStringValue(randomFieldContents());
    iw.addDocument(doc);
  }

  // delete some docs
  int numDeletes = numDocs/20;
  for (int i = 0; i < numDeletes; i++) {
    Term toDelete = new Term("id", Integer.toString(random.nextInt(numDocs)));
    if (random.nextBoolean()) {
      iw.deleteDocuments(toDelete);
    } else {
      iw.deleteDocuments(new TermQuery(toDelete));
    }
  }

  reader = iw.getReader();
  s1 = newSearcher(reader);
  s2 = newSearcher(reader);
  iw.close();
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestAutomatonQuery.java   
/**
 * Test that a nondeterministic automaton works correctly. (It should will be
 * determinized)
 */
public void testNFA() throws IOException {
  // accept this or three, the union is an NFA (two transitions for 't' from
  // initial state)
  Automaton nfa = BasicOperations.union(BasicAutomata.makeString("this"),
      BasicAutomata.makeString("three"));
  assertAutomatonHits(2, nfa);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestAutomatonQuery.java   
public void testEquals() {
  AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), BasicAutomata
      .makeString("foobar"));
  // reference to a1
  AutomatonQuery a2 = a1;
  // same as a1 (accepts the same language, same term)
  AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"), BasicOperations
      .concatenate(BasicAutomata.makeString("foo"), BasicAutomata
          .makeString("bar")));
  // different than a1 (same term, but different language)
  AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"), BasicAutomata
      .makeString("different"));
  // different than a1 (different term, same language)
  AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"), BasicAutomata
      .makeString("foobar"));

  assertEquals(a1, a2);

  assertEquals(a1, a3);

  // different class
  AutomatonQuery w1 = new WildcardQuery(newTerm("foobar"));
  // different class
  AutomatonQuery w2 = new RegexpQuery(newTerm("foobar"));

  assertFalse(a1.equals(w1));
  assertFalse(a1.equals(w2));
  assertFalse(w1.equals(w2));
  assertFalse(a1.equals(a4));
  assertFalse(a1.equals(a5));
  assertFalse(a1.equals(null));
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestAutomatonQuery.java   
/**
 * Test that rewriting to a single term works as expected, preserves
 * MultiTermQuery semantics.
 */
public void testRewriteSingleTerm() throws IOException {
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata
      .makeString("piece"));
  Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
  assertTrue(aq.getTermsEnum(terms) instanceof SingleTermsEnum);
  assertEquals(1, automatonQueryNrHits(aq));
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestAutomatonQuery.java   
/**
 * Test that rewriting to a prefix query works as expected, preserves
 * MultiTermQuery semantics.
 */
public void testRewritePrefix() throws IOException {
  Automaton pfx = BasicAutomata.makeString("do");
  pfx.expandSingleton(); // expand singleton representation for testing
  Automaton prefixAutomaton = BasicOperations.concatenate(pfx, BasicAutomata
      .makeAnyString());
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
  Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
  assertTrue(aq.getTermsEnum(terms) instanceof PrefixTermsEnum);
  assertEquals(3, automatonQueryNrHits(aq));
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestAutomatonQuery.java   
/**
 * Test handling of the empty language
 */
public void testEmptyOptimization() throws IOException {
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata
      .makeEmpty());
  // not yet available: assertTrue(aq.getEnum(searcher.getIndexReader())
  // instanceof EmptyTermEnum);
  Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
  assertSame(TermsEnum.EMPTY, aq.getTermsEnum(terms));
  assertEquals(0, automatonQueryNrHits(aq));
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestIndexWriter.java   
public void testStopwordsPosIncHole2() throws Exception {
  // use two stopfilters for testing here
  Directory dir = newDirectory();
  final Automaton secondSet = BasicAutomata.makeString("foobar");
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader);
      TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
      stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet));
      return new TokenStreamComponents(tokenizer, stream);
    }
  };
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
  Document doc = new Document();
  doc.add(new TextField("body", "just a foobar", Field.Store.NO));
  doc.add(new TextField("body", "test of gaps", Field.Store.NO));
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher is = newSearcher(ir);
  PhraseQuery pq = new PhraseQuery();
  pq.add(new Term("body", "just"), 0);
  pq.add(new Term("body", "test"), 3);
  // body:"just ? ? test"
  assertEquals(1, is.search(pq, 5).totalHits);
  ir.close();
  dir.close();
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestMockAnalyzer.java   
/** Test a configuration that behaves a lot like KeepWordFilter */
public void testKeep() throws Exception {
  CharacterRunAutomaton keepWords = 
    new CharacterRunAutomaton(
        BasicOperations.complement(
            Automaton.union(
                Arrays.asList(BasicAutomata.makeString("foo"), BasicAutomata.makeString("bar")))));
  Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, keepWords);
  assertAnalyzesTo(a, "quick foo brown bar bar fox foo",
      new String[] { "foo", "bar", "bar", "foo" },
      new int[] { 2, 2, 1, 2 });
}