Java 类org.apache.lucene.analysis.MockTokenizer 实例源码

项目:elasticsearch_my    文件:UniqueTokenFilterTests.java   
public void testSimple() throws IOException {
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(t, new UniqueTokenFilter(t));
        }
    };

    TokenStream test = analyzer.tokenStream("test", "this test with test");
    test.reset();
    CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("this"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("test"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("with"));

    assertThat(test.incrementToken(), equalTo(false));
}
项目:elasticsearch_my    文件:NGramTokenizerFactoryTests.java   
public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception {
    int iters = scaledRandomIntBetween(20, 100);
    for (int i = 0; i < iters; i++) {
        final Index index = new Index("test", "_na_");
        final String name = "ngr";
        Version v = randomVersion(random());
        Builder builder = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3);
        boolean reverse = random().nextBoolean();
        if (reverse) {
            builder.put("side", "back");
        }
        Settings settings = builder.build();
        Settings indexSettings = newAnalysisSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
        Tokenizer tokenizer = new MockTokenizer();
        tokenizer.setReader(new StringReader("foo bar"));
        TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(tokenizer);
        if (reverse) {
            assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class));
        } else {
            assertThat(edgeNGramTokenFilter, instanceOf(EdgeNGramTokenFilter.class));
        }
    }
}
项目:elasticsearch_my    文件:TokenCountFieldMapperTests.java   
public void testCountPositions() throws IOException {
    // We're looking to make sure that we:
    Token t1 = new Token();      // Don't count tokens without an increment
    t1.setPositionIncrement(0);
    Token t2 = new Token();
    t2.setPositionIncrement(1);  // Count normal tokens with one increment
    Token t3 = new Token();
    t2.setPositionIncrement(2);  // Count funny tokens with more than one increment
    int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them
    Token[] tokens = new Token[] {t1, t2, t3};
    Collections.shuffle(Arrays.asList(tokens), random());
    final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
    // TODO: we have no CannedAnalyzer?
    Analyzer analyzer = new Analyzer() {
            @Override
            public TokenStreamComponents createComponents(String fieldName) {
                return new TokenStreamComponents(new MockTokenizer(), tokenStream);
            }
        };
    assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7));
}
项目:search    文件:TestSynonymMapFilter.java   
public void testRecursion3() throws Exception {
  b = new SynonymMap.Builder(true);
  final boolean keepOrig = true;
  add("zoo zoo", "zoo", keepOrig);
  final SynonymMap map = b.build();
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
    }
  };

  assertAnalyzesTo(a, "zoo zoo $ zoo",
      new String[] { "zoo", "zoo", "zoo", "$", "zoo" },
      new int[] { 1, 0, 1, 1, 1 });
}
项目:search    文件:TestCapitalizationFilterFactory.java   
/**
 * Test that invalid arguments result in exception
 */
public void testInvalidArguments() throws Exception {
  for (final String arg : new String[]{"minWordLength", "maxTokenLength", "maxWordCount"}) {
    try {
      Reader reader = new StringReader("foo foobar super-duper-trooper");
      TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

      tokenFilterFactory("Capitalization",
          "keep", "and the it BIG",
          "onlyFirstWord", "false",
          arg, "-3",
          "okPrefix", "McK",
          "forceFirstLetter", "true").create(stream);
      fail();
    } catch (IllegalArgumentException expected) {
      assertTrue(expected.getMessage().contains(arg + " must be greater than or equal to zero")
          || expected.getMessage().contains(arg + " must be greater than zero"));
    }
  }
}
项目:search    文件:TestPatternCaptureGroupTokenFilter.java   
public void testRandomString() throws Exception {
  Analyzer a = new Analyzer() {

    @Override
    protected TokenStreamComponents createComponents(String fieldName,
        Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader,
          MockTokenizer.WHITESPACE, false);
      return new TokenStreamComponents(tokenizer,
          new PatternCaptureGroupTokenFilter(tokenizer, false,
              Pattern.compile("((..)(..))")));
    }
  };

  checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
}
项目:search    文件:TestSpansAdvanced.java   
/**
 * Initializes the tests by adding 4 identical documents to the index.
 */
@Override
public void setUp() throws Exception {
  super.setUp();
  // create test index
  mDirectory = newDirectory();
  final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory, 
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))
          .setMergePolicy(newLogMergePolicy()).setSimilarity(new DefaultSimilarity()));
  addDocument(writer, "1", "I think it should work.");
  addDocument(writer, "2", "I think it should work.");
  addDocument(writer, "3", "I think it should work.");
  addDocument(writer, "4", "I think it should work.");
  reader = writer.getReader();
  writer.close();
  searcher = newSearcher(reader);
  searcher.setSimilarity(new DefaultSimilarity());
}
项目:search    文件:TestCompoundWordTokenFilter.java   
public void testHyphenationCompoundWordsDELongestMatch() throws Exception {
  CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");

  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  // the word basket will not be added due to the longest match option
  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), 
      hyphenator, dict,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
  assertTokenStreamContents(tf, 
      new String[] { "basketballkurv", "basketball", "ball", "kurv" },
      new int[] { 1, 0, 0, 0 }
  );

}
项目:search    文件:TestPrefixAwareTokenFilter.java   
public void test() throws IOException {

    PrefixAwareTokenFilter ts;

    ts = new PrefixAwareTokenFilter(
        new SingleTokenTokenStream(createToken("a", 0, 1)),
        new SingleTokenTokenStream(createToken("b", 0, 1)));
    assertTokenStreamContents(ts, 
        new String[] { "a", "b" },
        new int[] { 0, 1 },
        new int[] { 1, 2 });

    // prefix and suffix using 2x prefix

    ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)),
        new MockTokenizer(new StringReader("hello world"), MockTokenizer.WHITESPACE, false));
    ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0)));

    assertTokenStreamContents(ts,
        new String[] { "^", "hello", "world", "$" },
        new int[] { 0, 0, 6, 11 },
        new int[] { 0, 5, 11, 11 });
  }
项目:search    文件:TestSloppyPhraseQuery.java   
private float  checkPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults) throws Exception {
  query.setSlop(slop);

  Directory ramDir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), ramDir, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
  writer.addDocument(doc);

  IndexReader reader = writer.getReader();

  IndexSearcher searcher = newSearcher(reader);
  MaxFreqCollector c = new MaxFreqCollector();
  searcher.search(query, c);
  assertEquals("slop: "+slop+"  query: "+query+"  doc: "+doc+"  Wrong number of hits", expectedNumResults, c.totalHits);

  //QueryUtils.check(query,searcher);
  writer.close();
  reader.close();
  ramDir.close();

  // returns the max Scorer.freq() found, because even though norms are omitted, many index stats are different
  // with these different tokens/distributions/lengths.. otherwise this test is very fragile.
  return c.max; 
}
项目:search    文件:TestCompoundWordTokenFilter.java   
public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception {
  CharArraySet dict = makeDictionary("ab", "cd", "ef");

  Tokenizer tokenizer = new MockTokenizer(new StringReader("abcdef"), MockTokenizer.WHITESPACE, false);
  DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(
    new WhitespaceTokenizer(
      new StringReader(
        "abcdef")
      ),
    dict,
    CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
    CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
    CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);

  assertTokenStreamContents(tf,
    new String[] { "abcdef", "ab", "cd", "ef" },
    new int[] { 0, 0, 0, 0},
    new int[] { 6, 6, 6, 6},
    new int[] { 1, 0, 0, 0}
    );
}
项目:search    文件:TestMultiTermConstantScore.java   
@BeforeClass
public static void beforeClass() throws Exception {
  String[] data = new String[] { "A 1 2 3 4 5 6", "Z       4 5 6", null,
      "B   2   4 5 6", "Y     3   5 6", null, "C     3     6",
      "X       4 5 6" };

  small = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), small, 
      newIndexWriterConfig(
          new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)).setMergePolicy(newLogMergePolicy()));

  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setTokenized(false);
  for (int i = 0; i < data.length; i++) {
    Document doc = new Document();
    doc.add(newField("id", String.valueOf(i), customType));// Field.Keyword("id",String.valueOf(i)));
    doc.add(newField("all", "all", customType));// Field.Keyword("all","all"));
    if (null != data[i]) {
      doc.add(newTextField("data", data[i], Field.Store.YES));// Field.Text("data",data[i]));
    }
    writer.addDocument(doc);
  }

  reader = writer.getReader();
  writer.close();
}
项目:search    文件:TestMoreLikeThis.java   
private Map<String,Float> getOriginalValues() throws IOException {
  Map<String,Float> originalValues = new HashMap<>();
  MoreLikeThis mlt = new MoreLikeThis(reader);
  mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
  mlt.setMinDocFreq(1);
  mlt.setMinTermFreq(1);
  mlt.setMinWordLen(1);
  mlt.setFieldNames(new String[] {"text"});
  mlt.setBoost(true);
  BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(
      "lucene release"));
  List<BooleanClause> clauses = query.clauses();

  for (BooleanClause clause : clauses) {
    TermQuery tq = (TermQuery) clause.getQuery();
    originalValues.put(tq.getTerm().text(), tq.getBoost());
  }
  return originalValues;
}
项目:search    文件:TestMoreLikeThis.java   
public void testMultiValues() throws Exception {
  MoreLikeThis mlt = new MoreLikeThis(reader);
  mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
  mlt.setMinDocFreq(1);
  mlt.setMinTermFreq(1);
  mlt.setMinWordLen(1);
  mlt.setFieldNames(new String[] {"text"});

  BooleanQuery query = (BooleanQuery) mlt.like("text",
      new StringReader("lucene"), new StringReader("lucene release"),
      new StringReader("apache"), new StringReader("apache lucene"));
  List<BooleanClause> clauses = query.clauses();
  assertEquals("Expected 2 clauses only!", 2, clauses.size());
  for (BooleanClause clause : clauses) {
    Term term = ((TermQuery) clause.getQuery()).getTerm();
    assertTrue(Arrays.asList(new Term("text", "lucene"), new Term("text", "apache")).contains(term));
  }
}
项目:search    文件:TestSynonymMapFilter.java   
public void testRandom2GraphAfter() throws Exception {
  final int numIters = atLeast(3);
  Random random = random();
  for (int i = 0; i < numIters; i++) {
    b = new SynonymMap.Builder(random.nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random.nextBoolean();

    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        TokenStream syns = new SynonymFilter(tokenizer, map, ignoreCase);
        TokenStream graph = new MockGraphTokenFilter(random(), syns);
        return new TokenStreamComponents(tokenizer, graph);
      }
    };

    checkRandomData(random, analyzer, 100);
  }
}
项目:search    文件:TestRemoveDuplicatesTokenFilter.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  final int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random().nextBoolean();

    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        TokenStream stream = new SynonymFilter(tokenizer, map, ignoreCase);
        return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
      }
    };

    checkRandomData(random(), analyzer, 200);
  }
}
项目:search    文件:TestSuggestStopFilter.java   
public void testMultipleStopWordsEnd() throws Exception {

  CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a");
  TokenStream stream = new MockTokenizer(new StringReader("go to a the"));
  TokenStream filter = new SuggestStopFilter(stream, stopWords);

  filter = new SuggestStopFilter(stream, stopWords);
  assertTokenStreamContents(filter,
                            new String[] { "go", "the"},
                            new int[] {0, 8},
                            new int[] {2, 11},
                            null,
                            new int[] {1, 3},
                            null,
                            11,
                            new boolean[] {false, true},
                            true);
}
项目:search    文件:TestQueryParser.java   
public void testFuzzySlopeExtendability() throws ParseException {
  QueryParser qp = new QueryParser("a",  new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {

    @Override
    Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage)
        throws ParseException {

      if(fuzzySlop.image.endsWith("€")) {
        float fms = fuzzyMinSim;
        try {
          fms = Float.valueOf(fuzzySlop.image.substring(1, fuzzySlop.image.length()-1)).floatValue();
        } catch (Exception ignored) { }
        float value = Float.parseFloat(termImage);
        return getRangeQuery(qfield, Float.toString(value-fms/2.f), Float.toString(value+fms/2.f), true, true);
      }
      return super.handleBareFuzzy(qfield, fuzzySlop, termImage);
    }

  };
  assertEquals(qp.parse("a:[11.95 TO 12.95]"), qp.parse("12.45~1€"));
}
项目:search    文件:TestDirectSpellChecker.java   
public void testBogusField() throws Exception {
  DirectSpellChecker spellChecker = new DirectSpellChecker();
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, 
      new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));

  for (int i = 0; i < 20; i++) {
    Document doc = new Document();
    doc.add(newTextField("numbers", English.intToEnglish(i), Field.Store.NO));
    writer.addDocument(doc);
  }

  IndexReader ir = writer.getReader();

  SuggestWord[] similar = spellChecker.suggestSimilar(new Term(
      "bogusFieldBogusField", "fvie"), 2, ir,
      SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
  assertEquals(0, similar.length);
  ir.close();
  writer.close();
  dir.close();
}
项目:search    文件:TestSmartChineseAnalyzer.java   
public void testInvalidOffset() throws Exception {
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
      filters = new WordTokenFilter(filters);
      return new TokenStreamComponents(tokenizer, filters);
    }
  };

  assertAnalyzesTo(analyzer, "mosfellsbær", 
      new String[] { "mosfellsbaer" },
      new int[]    { 0 },
      new int[]    { 11 });
}
项目:elasticsearch_my    文件:TruncateTokenFilterTests.java   
public void testSimple() throws IOException {
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(t, new TruncateTokenFilter(t, 3));
        }
    };

    TokenStream test = analyzer.tokenStream("test", "a bb ccc dddd eeeee");
    test.reset();
    CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("a"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("bb"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("ccc"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("ddd"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("eee"));

    assertThat(test.incrementToken(), equalTo(false));
}
项目:elasticsearch_my    文件:AnalysisPolishFactoryTests.java   
private void testThreadSafety(TokenFilterFactory factory) throws IOException {
    final Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer();
            return new TokenStreamComponents(tokenizer, factory.create(tokenizer));
        }
    };
    BaseTokenStreamTestCase.checkRandomData(random(), analyzer, 100);
}
项目:solr-lemmatizer    文件:LemmatizerFilterFactoryTest.java   
@Test
public void testUsingPackagedWordNetReader() throws IOException {
    Map<String, String> args = new HashMap<>();
    LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);
    StringReader reader = new StringReader("it better works");
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(reader);
    TokenStream stream = factory.create(in);
    assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
项目:solr-lemmatizer    文件:LemmatizerFilterTest.java   
@Test
public void testWithSamplePhrase() throws IOException {
    StringReader reader = new StringReader("it better works");
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(reader);
    TokenStream stream = new LemmatizerFilter(in, new WordNetLemmatizer(new PackagedWordNetReader("wordnet.zip"), new RTrie()));
    assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
项目:solr-lemmatizer    文件:LemmatizerFilterTest.java   
@Test
public void testUsingPackagedWordNetReaderFromFilterFactory() throws IOException {
    Map<String, String> args = new HashMap<>();
    LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);

    StringReader reader = new StringReader("it better works");
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(reader);
    TokenStream stream = factory.create(in);
    assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
项目:solr-lemmatizer    文件:LemmatizerFilterTest.java   
@Test
public void testUsingDirectoryWordNetReaderWithDummyPathShouldFailSilently() throws IOException {
    Map<String, String> args = new HashMap<>();
    args.put("dictPath", "/tmp");
    LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);

    StringReader reader = new StringReader("it better works");
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(reader);
    TokenStream stream = factory.create(in);
    assertTokenStreamContents(stream, new String[] { "it", "better", "works" });
}
项目:search    文件:TestSolrSynonymParser.java   
/** parse a syn file with some escaped syntax chars */
public void testEscapedStuff() throws Exception {
  String testFile = 
    "a\\=>a => b\\=>b\n" +
    "a\\,a => b\\,b";
  SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
  parser.parse(new StringReader(testFile));
  final SynonymMap map = parser.build();
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
      return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
    }
  };

  assertAnalyzesTo(analyzer, "ball", 
      new String[] { "ball" },
      new int[] { 1 });

  assertAnalyzesTo(analyzer, "a=>a",
      new String[] { "b=>b" },
      new int[] { 1 });

  assertAnalyzesTo(analyzer, "a,a",
      new String[] { "b,b" },
      new int[] { 1 });
}
项目:search    文件:TestNGramFilters.java   
/**
 * Test EdgeNGramFilterFactory with min and max gram size
 */
public void testEdgeNGramFilter2() throws Exception {
  Reader reader = new StringReader("test");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("EdgeNGram",
      "minGramSize", "1",
      "maxGramSize", "2").create(stream);
  assertTokenStreamContents(stream, 
      new String[] { "t", "te" });
}
项目:search    文件:TestNGramFilters.java   
/**
 * Test EdgeNGramFilterFactory
 */
public void testEdgeNGramFilter() throws Exception {
  Reader reader = new StringReader("test");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("EdgeNGram").create(stream);
  assertTokenStreamContents(stream, 
      new String[] { "t" });
}
项目:search    文件:TestPatternReplaceFilter.java   
public void testStripAll() throws Exception {
  String input = "aabfooaabfooabfoob ab caaaaaaaaab";
  TokenStream ts = new PatternReplaceFilter
          (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
                  Pattern.compile("a*b"),
                  null, true);
  assertTokenStreamContents(ts,
      new String[] { "foofoofoo", "", "c" });
}
项目:search    文件:TestGermanStemFilter.java   
public void testKeyword() throws IOException {
  final CharArraySet exclusionSet = new CharArraySet( asSet("sängerinnen"), false);
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
      return new TokenStreamComponents(source, new GermanStemFilter(sink));
    }
  };
  checkOneTerm(a, "sängerinnen", "sängerinnen");
}
项目:search    文件:TestElisionFilterFactory.java   
/**
 * Test setting ignoreCase=true
 */
public void testCaseInsensitive() throws Exception {
  Reader reader = new StringReader("L'avion");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("Elision",
      "articles", "frenchArticles.txt",
      "ignoreCase", "true").create(stream);
  assertTokenStreamContents(stream, new String[] { "avion" });
}
项目:search    文件:TestStandardQP.java   
@Override
public void testEscapedVsQuestionMarkAsWildcard() throws Exception {
  Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
  assertQueryEquals("a:b\\-?c", a, "a:b-?c");
  assertQueryEquals("a:b\\+?c", a, "a:b+?c");
  assertQueryEquals("a:b\\:?c", a, "a:b:?c");

  assertQueryEquals("a:b\\\\?c", a, "a:b\\?c");
}
项目:search    文件:TestMultiTermHighlighting.java   
public void testWildcardInConstantScore() throws Exception {
  Directory dir = newDirectory();
  // use simpleanalyzer for more natural tokenization (else "test." is a token)
  final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
  IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

  FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
  offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  Field body = new Field("body", "", offsetsType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  PostingsHighlighter highlighter = new PostingsHighlighter() {
    @Override
    protected Analyzer getIndexAnalyzer(String field) {
      return analyzer;
    }
  };
  ConstantScoreQuery query = new ConstantScoreQuery(new WildcardQuery(new Term("body", "te*")));
  TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits);
  String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  ir.close();
  dir.close();
}
项目:search    文件:TestRussianLightStemFilter.java   
public void testKeyword() throws IOException {
  final CharArraySet exclusionSet = new CharArraySet( asSet("энергии"), false);
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
      return new TokenStreamComponents(source, new RussianLightStemFilter(sink));
    }
  };
  checkOneTerm(a, "энергии", "энергии");
}
项目:search    文件:TestMultiTermHighlighting.java   
public void testSpanWildcard() throws Exception {
  Directory dir = newDirectory();
  // use simpleanalyzer for more natural tokenization (else "test." is a token)
  final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
  IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

  FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
  offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  Field body = new Field("body", "", offsetsType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  PostingsHighlighter highlighter = new PostingsHighlighter() {
    @Override
    protected Analyzer getIndexAnalyzer(String field) {
      return analyzer;
    }
  };
  Query query = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
  TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits);
  String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  ir.close();
  dir.close();
}
项目:search    文件:TestWordnetSynonymParser.java   
public void testSynonyms() throws Exception {
  WordnetSynonymParser parser = new WordnetSynonymParser(true, true, new MockAnalyzer(random()));
  parser.parse(new StringReader(synonymsFile));
  final SynonymMap map = parser.build();

  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
    }
  };

  /* all expansions */
  assertAnalyzesTo(analyzer, "Lost in the woods",
      new String[] { "Lost", "in", "the", "woods", "wood", "forest" },
      new int[] { 0, 5, 8, 12, 12, 12 },
      new int[] { 4, 7, 11, 17, 17, 17 },
      new int[] { 1, 1, 1, 1, 0, 0 });

  /* single quote */
  assertAnalyzesTo(analyzer, "king",
      new String[] { "king", "baron" });

  /* multi words */
  assertAnalyzesTo(analyzer, "king's evil",
      new String[] { "king's", "king's", "evil", "meany" });
}
项目:search    文件:TestCodepointCountFilterFactory.java   
public void testPositionIncrements() throws Exception {
  Reader reader = new StringReader("foo foobar super-duper-trooper");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("CodepointCount",
      "min", "4",
      "max", "10").create(stream);
  assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 });
}
项目:search    文件:TestLengthFilterFactory.java   
/** Test that invalid arguments result in exception */
public void testInvalidArguments() throws Exception {
  try {
    Reader reader = new StringReader("foo foobar super-duper-trooper");
    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
    tokenFilterFactory("Length",
        LengthFilterFactory.MIN_KEY, "5",
        LengthFilterFactory.MAX_KEY, "4").create(stream);
    fail();
  } catch (IllegalArgumentException expected) {
    assertTrue(expected.getMessage().contains("maximum length must not be greater than minimum length"));
  }
}
项目:search    文件:TestPortugueseStemFilter.java   
public void testKeyword() throws IOException {
  final CharArraySet exclusionSet = new CharArraySet( asSet("quilométricas"), false);
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
      return new TokenStreamComponents(source, new PortugueseStemFilter(sink));
    }
  };
  checkOneTerm(a, "quilométricas", "quilométricas");
}