public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("あり"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.DEFAULT_MODE); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new JapaneseBaseFormFilter(sink)); } }; assertAnalyzesTo(a, "それはまだ実験段階にあります", new String[] { "それ", "は", "まだ", "実験", "段階", "に", "あり", "ます" } ); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("コーヒー"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new JapaneseKatakanaStemFilter(sink)); } }; checkOneTerm(a, "コーヒー", "コーヒー"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GermanStemFilter(sink)); } }; checkOneTerm(a, "sängerinnen", "sängerinnen"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GermanMinimalStemFilter(sink)); } }; checkOneTerm(a, "sängerinnen", "sängerinnen"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("fischen"); GermanStemFilter filter = new GermanStemFilter( new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader( "Fischen Trinken")), set)); assertTokenStreamContents(filter, new String[] { "fischen", "trink" }); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GermanLightStemFilter(sink)); } }; checkOneTerm(a, "sängerinnen", "sängerinnen"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new NorwegianMinimalStemFilter(sink)); } }; checkOneTerm(a, "sekretæren", "sekretæren"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new NorwegianLightStemFilter(sink)); } }; checkOneTerm(a, "sekretæren", "sekretæren"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("edeltäjistään"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new FinnishLightStemFilter(sink)); } }; checkOneTerm(a, "edeltäjistään", "edeltäjistään"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("ساهدهات"); ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات")); ArabicStemFilter filter = new ArabicStemFilter(new KeywordMarkerFilter(tokenStream, set)); assertTokenStreamContents(filter, new String[]{"ساهدهات"}); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("энергии"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new RussianLightStemFilter(sink)); } }; checkOneTerm(a, "энергии", "энергии"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlens"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new SwedishLightStemFilter(sink)); } }; checkOneTerm(a, "jaktkarlens", "jaktkarlens"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("babakocsi"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new HungarianLightStemFilter(sink)); } }; checkOneTerm(a, "babakocsi", "babakocsi"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("строеве"); MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false); BulgarianStemFilter filter = new BulgarianStemFilter( new KeywordMarkerFilter(tokenStream, set)); assertTokenStreamContents(filter, new String[] { "строй", "строеве" }); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("elefantes"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GalicianMinimalStemFilter(sink)); } }; checkOneTerm(a, "elefantes", "elefantes"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("Brasília"); BrazilianStemFilter filter = new BrazilianStemFilter( new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader( "Brasília Brasilia")), set)); assertTokenStreamContents(filter, new String[] { "brasília", "brasil" }); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chevaux"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new FrenchLightStemFilter(sink)); } }; checkOneTerm(a, "chevaux", "chevaux"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chevaux"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new FrenchMinimalStemFilter(sink)); } }; checkOneTerm(a, "chevaux", "chevaux"); }
/** * Simple test for KeywordAttribute */ public void testKeywordAttribute() throws IOException { MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"), MockTokenizer.WHITESPACE, true); tokenizer.setEnableChecks(true); HunspellStemFilter filter = new HunspellStemFilter(tokenizer, DICTIONARY); assertTokenStreamContents(filter, new String[]{"lucene", "lucen", "is", "awesome"}, new int[] {1, 0, 1, 1}); // assert with keywork marker tokenizer = new MockTokenizer(new StringReader("lucene is awesome"), MockTokenizer.WHITESPACE, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("Lucene"), true); filter = new HunspellStemFilter(new KeywordMarkerFilter(tokenizer, set), DICTIONARY); assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1}); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("yourselves"); Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false); TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set)); assertTokenStreamContents(filter, new String[] {"yourselves", "your"}); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new PortugueseLightStemFilter(sink)); } }; checkOneTerm(a, "quilométricas", "quilométricas"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(sink)); } }; checkOneTerm(a, "quilométricas", "quilométricas"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new PortugueseStemFilter(sink)); } }; checkOneTerm(a, "quilométricas", "quilométricas"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("hole"); CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerFilter( new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set)); assertTokenStreamContents(filter, new String[] { "hole", "desk" }); }
@Override public TokenStream create(TokenStream input) { return protectedWords == null ? input : new KeywordMarkerFilter(input, protectedWords); }