public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(asSet("あり"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new JapaneseTokenizer(newAttributeFactory(), reader, null, true, JapaneseTokenizer.DEFAULT_MODE); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new JapaneseBaseFormFilter(sink)); } }; assertAnalyzesTo(a, "それはまだ実験段階にあります", new String[] { "それ", "は", "まだ", "実験", "段階", "に", "あり", "ます" } ); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(asSet("コーヒー"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new JapaneseKatakanaStemFilter(sink)); } }; checkOneTerm(a, "コーヒー", "コーヒー"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GermanStemFilter(sink)); } }; checkOneTerm(a, "sängerinnen", "sängerinnen"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GermanMinimalStemFilter(sink)); } }; checkOneTerm(a, "sängerinnen", "sängerinnen"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet( 1, true); set.add("fischen"); GermanStemFilter filter = new GermanStemFilter( new SetKeywordMarkerFilter(new LowerCaseTokenizer(new StringReader( "Fischen Trinken")), set)); assertTokenStreamContents(filter, new String[] { "fischen", "trink" }); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GermanLightStemFilter(sink)); } }; checkOneTerm(a, "sängerinnen", "sängerinnen"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("sekretæren"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new NorwegianMinimalStemFilter(sink)); } }; checkOneTerm(a, "sekretæren", "sekretæren"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("sekretæren"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new NorwegianLightStemFilter(sink)); } }; checkOneTerm(a, "sekretæren", "sekretæren"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("edeltäjistään"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new FinnishLightStemFilter(sink)); } }; checkOneTerm(a, "edeltäjistään", "edeltäjistään"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(1, true); set.add("ساهدهات"); ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات")); ArabicStemFilter filter = new ArabicStemFilter(new SetKeywordMarkerFilter(tokenStream, set)); assertTokenStreamContents(filter, new String[]{"ساهدهات"}); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("энергии"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new RussianLightStemFilter(sink)); } }; checkOneTerm(a, "энергии", "энергии"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("jaktkarlens"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new SwedishLightStemFilter(sink)); } }; checkOneTerm(a, "jaktkarlens", "jaktkarlens"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("babakocsi"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new HungarianLightStemFilter(sink)); } }; checkOneTerm(a, "babakocsi", "babakocsi"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(1, true); set.add("строеве"); MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false); BulgarianStemFilter filter = new BulgarianStemFilter( new SetKeywordMarkerFilter(tokenStream, set)); assertTokenStreamContents(filter, new String[] { "строй", "строеве" }); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("elefantes"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GalicianMinimalStemFilter(sink)); } }; checkOneTerm(a, "elefantes", "elefantes"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(1, true); set.add("Brasília"); BrazilianStemFilter filter = new BrazilianStemFilter( new SetKeywordMarkerFilter(new LowerCaseTokenizer(new StringReader( "Brasília Brasilia")), set)); assertTokenStreamContents(filter, new String[] { "brasília", "brasil" }); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("chevaux"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new FrenchLightStemFilter(sink)); } }; checkOneTerm(a, "chevaux", "chevaux"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("chevaux"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new FrenchMinimalStemFilter(sink)); } }; checkOneTerm(a, "chevaux", "chevaux"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet( 1, true); set.add("yourselves"); Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false); TokenStream filter = new PorterStemFilter(new SetKeywordMarkerFilter(tokenizer, set)); assertTokenStreamContents(filter, new String[] {"yourselves", "your"}); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("quilométricas"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new PortugueseLightStemFilter(sink)); } }; checkOneTerm(a, "quilométricas", "quilométricas"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("quilométricas"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(sink)); } }; checkOneTerm(a, "quilométricas", "quilométricas"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet( asSet("quilométricas"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new PortugueseStemFilter(sink)); } }; checkOneTerm(a, "quilométricas", "quilométricas"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(1, true); set.add("hole"); CzechStemFilter filter = new CzechStemFilter(new SetKeywordMarkerFilter( new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set)); assertTokenStreamContents(filter, new String[] { "hole", "desk" }); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("あり"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.DEFAULT_MODE); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new JapaneseBaseFormFilter(sink)); } }; assertAnalyzesTo(a, "それはまだ実験段階にあります", new String[] { "それ", "は", "まだ", "実験", "段階", "に", "あり", "ます" } ); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("コーヒー"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new JapaneseKatakanaStemFilter(sink)); } }; checkOneTerm(a, "コーヒー", "コーヒー"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GermanStemFilter(sink)); } }; checkOneTerm(a, "sängerinnen", "sängerinnen"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GermanMinimalStemFilter(sink)); } }; checkOneTerm(a, "sängerinnen", "sängerinnen"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("fischen"); GermanStemFilter filter = new GermanStemFilter( new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader( "Fischen Trinken")), set)); assertTokenStreamContents(filter, new String[] { "fischen", "trink" }); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GermanLightStemFilter(sink)); } }; checkOneTerm(a, "sängerinnen", "sängerinnen"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new NorwegianMinimalStemFilter(sink)); } }; checkOneTerm(a, "sekretæren", "sekretæren"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new NorwegianLightStemFilter(sink)); } }; checkOneTerm(a, "sekretæren", "sekretæren"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("edeltäjistään"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new FinnishLightStemFilter(sink)); } }; checkOneTerm(a, "edeltäjistään", "edeltäjistään"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("ساهدهات"); ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات")); ArabicStemFilter filter = new ArabicStemFilter(new SetKeywordMarkerFilter(tokenStream, set)); assertTokenStreamContents(filter, new String[]{"ساهدهات"}); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("энергии"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new RussianLightStemFilter(sink)); } }; checkOneTerm(a, "энергии", "энергии"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlens"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new SwedishLightStemFilter(sink)); } }; checkOneTerm(a, "jaktkarlens", "jaktkarlens"); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("babakocsi"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new HungarianLightStemFilter(sink)); } }; checkOneTerm(a, "babakocsi", "babakocsi"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("строеве"); MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false); BulgarianStemFilter filter = new BulgarianStemFilter( new SetKeywordMarkerFilter(tokenStream, set)); assertTokenStreamContents(filter, new String[] { "строй", "строеве" }); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("elefantes"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GalicianMinimalStemFilter(sink)); } }; checkOneTerm(a, "elefantes", "elefantes"); }
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("Brasília"); BrazilianStemFilter filter = new BrazilianStemFilter( new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader( "Brasília Brasilia")), set)); assertTokenStreamContents(filter, new String[] { "brasília", "brasil" }); }
public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chevaux"), false); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new FrenchLightStemFilter(sink)); } }; checkOneTerm(a, "chevaux", "chevaux"); }