/** simple test for longestOnly option */ public void testLongestOnly() throws IOException { MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome")); tokenizer.setEnableChecks(true); HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, true, true); assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1}); }
/** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary)); } }; checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER); }
public void testEmptyTerm() throws IOException { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader); return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary)); } }; checkOneTerm(a, "", ""); }
@Override public TokenStream create(TokenStream tokenStream) { return new HunspellStemFilter(tokenStream, dictionary, dedup, longestOnly); }
/** * Creates an instance of {@link org.apache.lucene.analysis.hunspell.HunspellStemFilter} that will filter the given * TokenStream * * @param tokenStream TokenStream that will be filtered * @return HunspellStemFilter that filters the TokenStream */ @Override public TokenStream create(TokenStream tokenStream) { return new HunspellStemFilter(tokenStream, dictionary); }