Java 类org.apache.lucene.util.automaton.CharacterRunAutomaton 实例源码

项目:lams    文件:Stemmer.java   
/** checks condition of the concatenation of two strings */
// note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem
// but this is a little bit more complicated.
private boolean checkCondition(int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) {
  if (condition != 0) {
    CharacterRunAutomaton pattern = dictionary.patterns.get(condition);
    int state = pattern.getInitialState();
    for (int i = c1off; i < c1off + c1len; i++) {
      state = pattern.step(state, c1[i]);
      if (state == -1) {
        return false;
      }
    }
    for (int i = c2off; i < c2off + c2len; i++) {
      state = pattern.step(state, c2[i]);
      if (state == -1) {
        return false;
      }
    }
    return pattern.isAccept(state);
  }
  return true;
}
项目:search    文件:HighlighterTest.java   
public void testMaxSizeEndHighlight() throws Exception {
  TestHighlightRunner helper = new TestHighlightRunner() {
    @Override
    public void run() throws Exception {
      CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
      TermQuery query = new TermQuery(new Term("text", "searchterm"));

      String text = "this is a text with searchterm in it";
      SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
      Highlighter hg = getHighlighter(query, "text", fm);
      hg.setTextFragmenter(new NullFragmenter());
      hg.setMaxDocCharsToAnalyze(36);
      String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords), "text", text);
      assertTrue(
          "Matched text should contain remainder of text after highlighted query ",
          match.endsWith("in it"));
    }
  };
  helper.start();
}
项目:search    文件:Stemmer.java   
/** checks condition of the concatenation of two strings */
// note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem
// but this is a little bit more complicated.
private boolean checkCondition(int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) {
  if (condition != 0) {
    CharacterRunAutomaton pattern = dictionary.patterns.get(condition);
    int state = pattern.getInitialState();
    for (int i = c1off; i < c1off + c1len; i++) {
      state = pattern.step(state, c1[i]);
      if (state == -1) {
        return false;
      }
    }
    for (int i = c2off; i < c2off + c2len; i++) {
      state = pattern.step(state, c2[i]);
      if (state == -1) {
        return false;
      }
    }
    return pattern.isAccept(state);
  }
  return true;
}
项目:search    文件:TestDuelingAnalyzers.java   
@Override
public void setUp() throws Exception {
  super.setUp();
  Automaton single = new Automaton();
  int initial = single.createState();
  int accept = single.createState();
  single.setAccept(accept, true);

  // build an automaton matching this jvm's letter definition
  for (int i = 0; i <= 0x10FFFF; i++) {
    if (Character.isLetter(i)) {
      single.addTransition(initial, accept, i);
    }
  }
  Automaton repeat = Operations.repeat(single);
  jvmLetter = new CharacterRunAutomaton(repeat);
}
项目:search    文件:TestPrecedenceQueryParser.java   
public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);

  PrecedenceQueryParser qp = new PrecedenceQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);
  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).parse("the^3",
      "field");
  assertNotNull(q);
}
项目:search    文件:TestQPHelper.java   
public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
  StandardQueryParser qp = new StandardQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);

  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  StandardQueryParser qp2 = new StandardQueryParser();
  qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));

  q = qp2.parse("the^3", "field");
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertEquals("", q.toString());
  assertEquals(1.0f, q.getBoost(), 0.01f);
}
项目:search    文件:TestQPHelper.java   
public void testStopwords() throws Exception {
  StandardQueryParser qp = new StandardQueryParser();
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
  qp.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));

  Query result = qp.parse("a:the OR a:foo", "a");
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: "
      + 0, ((BooleanQuery) result).clauses().size() == 0);
  result = qp.parse("a:woo OR a:the", "a");
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a TermQuery", result instanceof TermQuery);
  result = qp.parse(
      "(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",
      "a");
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  if (VERBOSE)
    System.out.println("Result: " + result);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: "
      + 2, ((BooleanQuery) result).clauses().size() == 2);
}
项目:search    文件:QueryParserTestBase.java   
public void testBoost()
  throws Exception {
  CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
  CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer);
  Query q = getQuery("on^1.0",qp);
  assertNotNull(q);
  q = getQuery("\"hello\"^2.0",qp);
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("hello^2.0",qp);
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("\"on\"^1.0",qp);
  assertNotNull(q);

  Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); 
  CommonQueryParserConfiguration qp2 = getParserConfig(a2);
  q = getQuery("the^3", qp2);
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertEquals("", q.toString());
  assertEquals(1.0f, q.getBoost(), 0.01f);
}
项目:search    文件:QueryParserTestBase.java   
public void testStopwords() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
  CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
  Query result = getQuery("field:the OR field:foo",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0);
  result = getQuery("field:woo OR field:the",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a TermQuery", result instanceof TermQuery);
  result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  if (VERBOSE) System.out.println("Result: " + result);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2);
}
项目:search    文件:TestMockAnalyzer.java   
/** Test a configuration where two characters makes a term */
public void testTwoChars() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp("..").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "foobar",
      new String[] { "fo", "ob", "ar"},
      new int[] { 0, 2, 4 },
      new int[] { 2, 4, 6 }
  );
  // make sure when last term is a "partial" match that end() is correct
  assertTokenStreamContents(a.tokenStream("bogus", "fooba"),
      new String[] { "fo", "ob" },
      new int[] { 0, 2 },
      new int[] { 2, 4 },
      new int[] { 1, 1 },
      new Integer(5)
  );
  checkRandomData(random(), a, 100);
}
项目:search    文件:TestMockAnalyzer.java   
/** Test a configuration where three characters makes a term */
public void testThreeChars() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp("...").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "foobar",
      new String[] { "foo", "bar"},
      new int[] { 0, 3 },
      new int[] { 3, 6 }
  );
  // make sure when last term is a "partial" match that end() is correct
  assertTokenStreamContents(a.tokenStream("bogus", "fooba"),
      new String[] { "foo" },
      new int[] { 0 },
      new int[] { 3 },
      new int[] { 1 },
      new Integer(5)
  );
  checkRandomData(random(), a, 100);
}
项目:search    文件:TestMockAnalyzer.java   
/** Test a configuration where word starts with one uppercase */
public void testUppercase() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp("[A-Z][a-z]*").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "FooBarBAZ",
      new String[] { "Foo", "Bar", "B", "A", "Z"},
      new int[] { 0, 3, 6, 7, 8 },
      new int[] { 3, 6, 7, 8, 9 }
  );
  assertAnalyzesTo(a, "aFooBar",
      new String[] { "Foo", "Bar" },
      new int[] { 1, 4 },
      new int[] { 4, 7 }
  );
  checkRandomData(random(), a, 100);
}
项目:search    文件:TestMockAnalyzer.java   
/** blast some random strings through differently configured tokenizers */
public void testRandomRegexps() throws Exception {
  int iters = atLeast(30);
  for (int i = 0; i < iters; i++) {
    final CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.randomAutomaton(random()), Integer.MAX_VALUE);
    final boolean lowercase = random().nextBoolean();
    final int limit = TestUtil.nextInt(random(), 0, 500);
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer t = new MockTokenizer(reader, dfa, lowercase, limit);
        return new TokenStreamComponents(t, t);
      }
    };
    checkRandomData(random(), a, 100);
    a.close();
  }
}
项目:resource-query-parser    文件:QueryParserTestBase.java   
@Test
public void testBoost() throws Exception {
    CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on"));
    Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
    QueryParser qp = getParserConfig(oneStopAnalyzer);
    Query q = getQuery("on^1.0", qp);
    Assert.assertNotNull(q);
    q = getQuery("\"hello\"^2.0", qp);
    Assert.assertNotNull(q);
    Assert.assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
    q = getQuery("hello^2.0", qp);
    Assert.assertNotNull(q);
    Assert.assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
    q = getQuery("\"on\"^1.0", qp);
    Assert.assertNotNull(q);

    Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
    QueryParser qp2 = getParserConfig(a2);
    q = getQuery("the^3", qp2);
    // "the" is a stop word so the result is an empty query:
    Assert.assertNotNull(q);
    assertMatchNoDocsQuery(q);
    Assert.assertFalse(q instanceof BoostQuery);
}
项目:resource-query-parser    文件:QueryParserTestBase.java   
@Test
public void testStopwords() throws Exception {
    CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
    QueryParser qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
    Query result = getQuery("field:the OR field:foo", qp);
    Assert.assertNotNull("result is null and it shouldn't be", result);
    Assert.assertTrue("result is not a BooleanQuery",
            result instanceof BooleanQuery || result instanceof MatchNoDocsQuery);
    if (result instanceof BooleanQuery) {
        Assert.assertEquals(0, ((BooleanQuery) result).clauses().size());
    }
    result = getQuery("field:woo OR field:the", qp);
    Assert.assertNotNull("result is null and it shouldn't be", result);
    Assert.assertTrue("result is not a TermQuery", result instanceof TermQuery);
    result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)", qp);
    Assert.assertNotNull("result is null and it shouldn't be", result);
    Assert.assertTrue("result is not a BoostQuery", result instanceof BoostQuery);
    result = ((BoostQuery) result).getQuery();
    Assert.assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
    if (VERBOSE)
        System.out.println("Result: " + result);
    Assert.assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2,
            ((BooleanQuery) result).clauses().size() == 2);
}
项目:NYBC    文件:HighlighterTest.java   
public void testMaxSizeEndHighlight() throws Exception {
  TestHighlightRunner helper = new TestHighlightRunner() {
    @Override
    public void run() throws Exception {
      CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
      TermQuery query = new TermQuery(new Term("text", "searchterm"));

      String text = "this is a text with searchterm in it";
      SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
      Highlighter hg = getHighlighter(query, "text", fm);
      hg.setTextFragmenter(new NullFragmenter());
      hg.setMaxDocCharsToAnalyze(36);
      String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords, true), "text", text);
      assertTrue(
          "Matched text should contain remainder of text after highlighted query ",
          match.endsWith("in it"));
    }
  };
  helper.start();
}
项目:NYBC    文件:TestDuelingAnalyzers.java   
@Override
public void setUp() throws Exception {
  super.setUp();
  // build an automaton matching this jvm's letter definition
  State initial = new State();
  State accept = new State();
  accept.setAccept(true);
  for (int i = 0; i <= 0x10FFFF; i++) {
    if (Character.isLetter(i)) {
      initial.addTransition(new Transition(i, i, accept));
    }
  }
  Automaton single = new Automaton(initial);
  single.reduce();
  Automaton repeat = BasicOperations.repeat(single);
  jvmLetter = new CharacterRunAutomaton(repeat);
}
项目:NYBC    文件:TestPrecedenceQueryParser.java   
public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet, true);

  PrecedenceQueryParser qp = new PrecedenceQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);
  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).parse("the^3",
      "field");
  assertNotNull(q);
}
项目:NYBC    文件:TestQPHelper.java   
public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet, true);
  StandardQueryParser qp = new StandardQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);

  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  StandardQueryParser qp2 = new StandardQueryParser();
  qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true));

  q = qp2.parse("the^3", "field");
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertEquals("", q.toString());
  assertEquals(1.0f, q.getBoost(), 0.01f);
}
项目:NYBC    文件:TestQPHelper.java   
public void testStopwords() throws Exception {
  StandardQueryParser qp = new StandardQueryParser();
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
  qp.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet, true));

  Query result = qp.parse("a:the OR a:foo", "a");
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: "
      + 0, ((BooleanQuery) result).clauses().size() == 0);
  result = qp.parse("a:woo OR a:the", "a");
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a TermQuery", result instanceof TermQuery);
  result = qp.parse(
      "(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",
      "a");
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  if (VERBOSE)
    System.out.println("Result: " + result);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: "
      + 2, ((BooleanQuery) result).clauses().size() == 2);
}
项目:NYBC    文件:QueryParserTestBase.java   
public void testBoost()
  throws Exception {
  CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords, true);
  CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer);
  Query q = getQuery("on^1.0",qp);
  assertNotNull(q);
  q = getQuery("\"hello\"^2.0",qp);
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("hello^2.0",qp);
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("\"on\"^1.0",qp);
  assertNotNull(q);

  Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); 
  CommonQueryParserConfiguration qp2 = getParserConfig(a2);
  q = getQuery("the^3", qp2);
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertEquals("", q.toString());
  assertEquals(1.0f, q.getBoost(), 0.01f);
}
项目:NYBC    文件:QueryParserTestBase.java   
public void testStopwords() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
  CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet, true));
  Query result = getQuery("field:the OR field:foo",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0);
  result = getQuery("field:woo OR field:the",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a TermQuery", result instanceof TermQuery);
  result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  if (VERBOSE) System.out.println("Result: " + result);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2);
}
项目:read-open-source-code    文件:Stemmer.java   
/** checks condition of the concatenation of two strings */
// note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem
// but this is a little bit more complicated.
private boolean checkCondition(int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) {
  if (condition != 0) {
    CharacterRunAutomaton pattern = dictionary.patterns.get(condition);
    int state = pattern.getInitialState();
    for (int i = c1off; i < c1off + c1len; i++) {
      state = pattern.step(state, c1[i]);
      if (state == -1) {
        return false;
      }
    }
    for (int i = c2off; i < c2off + c2len; i++) {
      state = pattern.step(state, c2[i]);
      if (state == -1) {
        return false;
      }
    }
    return pattern.isAccept(state);
  }
  return true;
}
项目:lucene-addons    文件:QueryParserTestBase.java   
public void testBoost()
    throws Exception {
  CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
  CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer);
  Query q = getQuery("on^1.0",qp);
  assertNotNull(q);
  q = getQuery("\"hello\"^2.0",qp);
  assertNotNull(q);

  assertEquals(getBoost(q), (float) 2.0, (float) 0.5);
  q = getQuery("hello^2.0",qp);
  assertNotNull(q);
  assertEquals(((BoostQuery)q).getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("\"on\"^1.0",qp);
  assertNotNull(q);

  Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
  CommonQueryParserConfiguration qp2 = getParserConfig(a2);
  q = getQuery("the^3", qp2);
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertEmpty(q);
  assertEquals(1.0f, getBoost(q), 0.01f);
}
项目:lucene-addons    文件:QueryParserTestBase.java   
public void testStopwords() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
  CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
  Query result = getQuery("field:the OR field:foo",qp);
  assertNotNull("result is null and it shouldn't be", result);
  System.out.println(result.getClass());
  assertTrue("result is not a BooleanQuery", result instanceof SpanOrQuery || result instanceof BooleanQuery || result instanceof MatchNoDocsQuery);
  if (result instanceof BooleanQuery) {
    assertEquals(0, ((BooleanQuery) result).clauses().size());
  }
  result = getQuery("field:woo OR field:the",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a TermQuery", result instanceof TermQuery);
  result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BoostQuery", result instanceof BoostQuery);
  result = ((BoostQuery) result).getQuery();
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  if (VERBOSE) System.out.println("Result: " + result);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2);
}
项目:Maskana-Gestor-de-Conocimiento    文件:HighlighterTest.java   
public void testMaxSizeEndHighlight() throws Exception {
  TestHighlightRunner helper = new TestHighlightRunner() {
    @Override
    public void run() throws Exception {
      CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
      TermQuery query = new TermQuery(new Term("text", "searchterm"));

      String text = "this is a text with searchterm in it";
      SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
      Highlighter hg = getHighlighter(query, "text", fm);
      hg.setTextFragmenter(new NullFragmenter());
      hg.setMaxDocCharsToAnalyze(36);
      String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords), "text", text);
      assertTrue(
          "Matched text should contain remainder of text after highlighted query ",
          match.endsWith("in it"));
    }
  };
  helper.start();
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestDuelingAnalyzers.java   
@Override
public void setUp() throws Exception {
  super.setUp();
  // build an automaton matching this jvm's letter definition
  State initial = new State();
  State accept = new State();
  accept.setAccept(true);
  for (int i = 0; i <= 0x10FFFF; i++) {
    if (Character.isLetter(i)) {
      initial.addTransition(new Transition(i, i, accept));
    }
  }
  Automaton single = new Automaton(initial);
  single.reduce();
  Automaton repeat = BasicOperations.repeat(single);
  jvmLetter = new CharacterRunAutomaton(repeat);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestPrecedenceQueryParser.java   
public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);

  PrecedenceQueryParser qp = new PrecedenceQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);
  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).parse("the^3",
      "field");
  assertNotNull(q);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestQPHelper.java   
public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
  StandardQueryParser qp = new StandardQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);

  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  StandardQueryParser qp2 = new StandardQueryParser();
  qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));

  q = qp2.parse("the^3", "field");
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertEquals("", q.toString());
  assertEquals(1.0f, q.getBoost(), 0.01f);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestQPHelper.java   
public void testStopwords() throws Exception {
  StandardQueryParser qp = new StandardQueryParser();
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
  qp.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));

  Query result = qp.parse("a:the OR a:foo", "a");
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: "
      + 0, ((BooleanQuery) result).clauses().size() == 0);
  result = qp.parse("a:woo OR a:the", "a");
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a TermQuery", result instanceof TermQuery);
  result = qp.parse(
      "(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",
      "a");
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  if (VERBOSE)
    System.out.println("Result: " + result);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: "
      + 2, ((BooleanQuery) result).clauses().size() == 2);
}
项目:Maskana-Gestor-de-Conocimiento    文件:QueryParserTestBase.java   
public void testBoost()
  throws Exception {
  CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
  CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer);
  Query q = getQuery("on^1.0",qp);
  assertNotNull(q);
  q = getQuery("\"hello\"^2.0",qp);
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("hello^2.0",qp);
  assertNotNull(q);
  assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("\"on\"^1.0",qp);
  assertNotNull(q);

  Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); 
  CommonQueryParserConfiguration qp2 = getParserConfig(a2);
  q = getQuery("the^3", qp2);
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertEquals("", q.toString());
  assertEquals(1.0f, q.getBoost(), 0.01f);
}
项目:Maskana-Gestor-de-Conocimiento    文件:QueryParserTestBase.java   
public void testStopwords() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
  CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
  Query result = getQuery("field:the OR field:foo",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0);
  result = getQuery("field:woo OR field:the",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a TermQuery", result instanceof TermQuery);
  result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  if (VERBOSE) System.out.println("Result: " + result);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestMockAnalyzer.java   
/** Test a configuration where two characters makes a term */
public void testTwoChars() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp("..").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "foobar",
      new String[] { "fo", "ob", "ar"},
      new int[] { 0, 2, 4 },
      new int[] { 2, 4, 6 }
  );
  // make sure when last term is a "partial" match that end() is correct
  assertTokenStreamContents(a.tokenStream("bogus", "fooba"),
      new String[] { "fo", "ob" },
      new int[] { 0, 2 },
      new int[] { 2, 4 },
      new int[] { 1, 1 },
      new Integer(5)
  );
  checkRandomData(random(), a, 100);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestMockAnalyzer.java   
/** Test a configuration where three characters makes a term */
public void testThreeChars() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp("...").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "foobar",
      new String[] { "foo", "bar"},
      new int[] { 0, 3 },
      new int[] { 3, 6 }
  );
  // make sure when last term is a "partial" match that end() is correct
  assertTokenStreamContents(a.tokenStream("bogus", "fooba"),
      new String[] { "foo" },
      new int[] { 0 },
      new int[] { 3 },
      new int[] { 1 },
      new Integer(5)
  );
  checkRandomData(random(), a, 100);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestMockAnalyzer.java   
/** Test a configuration where word starts with one uppercase */
public void testUppercase() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp("[A-Z][a-z]*").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "FooBarBAZ",
      new String[] { "Foo", "Bar", "B", "A", "Z"},
      new int[] { 0, 3, 6, 7, 8 },
      new int[] { 3, 6, 7, 8, 9 }
  );
  assertAnalyzesTo(a, "aFooBar",
      new String[] { "Foo", "Bar" },
      new int[] { 1, 4 },
      new int[] { 4, 7 }
  );
  checkRandomData(random(), a, 100);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestMockAnalyzer.java   
/** blast some random strings through differently configured tokenizers */
public void testRandomRegexps() throws Exception {
  int iters = atLeast(30);
  for (int i = 0; i < iters; i++) {
    final CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.randomAutomaton(random()));
    final boolean lowercase = random().nextBoolean();
    final int limit = _TestUtil.nextInt(random(), 0, 500);
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer t = new MockTokenizer(reader, dfa, lowercase, limit);
        return new TokenStreamComponents(t, t);
      }
    };
    checkRandomData(random(), a, 100);
    a.close();
  }
}
项目:elasticsearch_my    文件:TransportReindexAction.java   
static void checkRemoteWhitelist(CharacterRunAutomaton whitelist, RemoteInfo remoteInfo) {
    if (remoteInfo == null) {
        return;
    }
    String check = remoteInfo.getHost() + ':' + remoteInfo.getPort();
    if (whitelist.run(check)) {
        return;
    }
    throw new IllegalArgumentException('[' + check + "] not whitelisted in " + REMOTE_CLUSTER_WHITELIST.getKey());
}
项目:elasticsearch_my    文件:TransportReindexAction.java   
/**
 * Build the {@link CharacterRunAutomaton} that represents the reindex-from-remote whitelist and make sure that it doesn't whitelist
 * the world.
 */
static CharacterRunAutomaton buildRemoteWhitelist(List<String> whitelist) {
    if (whitelist.isEmpty()) {
        return new CharacterRunAutomaton(Automata.makeEmpty());
    }
    Automaton automaton = Regex.simpleMatchToAutomaton(whitelist.toArray(Strings.EMPTY_ARRAY));
    automaton = MinimizationOperations.minimize(automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    if (Operations.isTotal(automaton)) {
        throw new IllegalArgumentException("Refusing to start because whitelist " + whitelist + " accepts all addresses. "
                + "This would allow users to reindex-from-remote any URL they like effectively having Elasticsearch make HTTP GETs "
                + "for them.");
    }
    return new CharacterRunAutomaton(automaton);
}
项目:elasticsearch-analysis-opennlp    文件:MockTokenizer.java   
public MockTokenizer(AttributeFactory factory, CharacterRunAutomaton runAutomaton, boolean lowerCase, int maxTokenLength) {
    super(factory);
    this.runAutomaton = runAutomaton;
    this.lowerCase = lowerCase;
    this.state = runAutomaton.getInitialState();
    this.maxTokenLength = maxTokenLength;
}
项目:elasticsearch-icu    文件:MockTokenizer.java   
public MockTokenizer(AttributeFactory factory, CharacterRunAutomaton runAutomaton, boolean lowerCase, int maxTokenLength) {
    super(factory);
    this.runAutomaton = runAutomaton;
    this.lowerCase = lowerCase;
    this.state = runAutomaton.getInitialState();
    this.maxTokenLength = maxTokenLength;
    termAtt = addAttribute(CharTermAttribute.class);
    offsetAtt = addAttribute(OffsetAttribute.class);
}