/** * Test ArabicStemFilterFactory */ public void testStemmer() throws Exception { Reader reader = new StringReader("الذين مَلكت أيمانكم"); StandardTokenizerFactory factory = new StandardTokenizerFactory(); factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); ArabicNormalizationFilterFactory normFactory = new ArabicNormalizationFilterFactory(); normFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT); ArabicStemFilterFactory stemFactory = new ArabicStemFilterFactory(); Map<String, String> args = Collections.emptyMap(); factory.init(args); normFactory.init(args); Tokenizer tokenizer = factory.create(reader); TokenStream stream = normFactory.create(tokenizer); stream = stemFactory.create(stream); assertTokenStreamContents(stream, new String[] {"ذين", "ملكت", "ايمانكم"}); }
/** * Test HindiNormalizationFilterFactory */ public void testHindiNormalizer() throws Exception { Reader reader = new StringReader("क़िताब"); StandardTokenizerFactory factory = new StandardTokenizerFactory(); factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory(); HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory(); hindiFilterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT); Map<String, String> args = Collections.emptyMap(); factory.init(args); hindiFilterFactory.init(args); Tokenizer tokenizer = factory.create(reader); TokenStream stream = indicFilterFactory.create(tokenizer); stream = hindiFilterFactory.create(stream); assertTokenStreamContents(stream, new String[] {"किताब"}); }
/** * Test HindiStemFilterFactory */ public void testStemmer() throws Exception { Reader reader = new StringReader("किताबें"); StandardTokenizerFactory factory = new StandardTokenizerFactory(); factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory(); HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory(); HindiStemFilterFactory stemFactory = new HindiStemFilterFactory(); stemFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT); Map<String, String> args = Collections.emptyMap(); factory.init(args); stemFactory.init(args); Tokenizer tokenizer = factory.create(reader); TokenStream stream = indicFilterFactory.create(tokenizer); stream = hindiFilterFactory.create(stream); stream = stemFactory.create(stream); assertTokenStreamContents(stream, new String[] {"किताब"}); }
@Factory public SearchMapping getSearchMapping() { final SearchMapping mapping = new SearchMapping(); mapping .analyzerDef("english", StandardTokenizerFactory.class) .filter(LowerCaseFilterFactory.class) .filter(SnowballPorterFilterFactory.class) .analyzerDef("german", StandardTokenizerFactory.class) .filter(LowerCaseFilterFactory.class) .filter(GermanStemFilterFactory.class); return mapping; }
/** * Gets the search mapping. * * @return the search mapping */ @Factory public SearchMapping getSearchMapping() { final SearchMapping mapping = new SearchMapping(); mapping.analyzerDef("ngram", StandardTokenizerFactory.class).filter(LowerCaseFilterFactory.class) .filter(NGramFilterFactory.class).param("minGramSize", "3").param("maxGramSize", "3") .analyzerDef("se", StandardTokenizerFactory.class).filter(LowerCaseFilterFactory.class) .filter(SwedishLightStemFilterFactory.class).analyzerDef("en", StandardTokenizerFactory.class) .filter(LowerCaseFilterFactory.class).filter(PorterStemFilterFactory.class) .entity(DocumentContentData.class).indexed().property("hjid", ElementType.FIELD).documentId().property("content", ElementType.METHOD).field().analyzer("se").store(Store.NO).analyze(Analyze.YES).property("id", ElementType.METHOD).field() .entity(DocumentElement.class).indexed().property("id", ElementType.FIELD).documentId().property("title", ElementType.METHOD).field().analyzer("se").store(Store.NO).analyze(Analyze.YES).property("subTitle", ElementType.METHOD).field().analyzer("se").store(Store.NO).analyze(Analyze.YES) .entity(DocumentStatusContainer.class).indexed().property("hjid", ElementType.FIELD).documentId().property("documentCategory", ElementType.METHOD).field().analyzer("se").store(Store.NO).analyze(Analyze.YES); return mapping; }
@Factory public SearchMapping getSearchMapping() { SearchMapping mapping = new SearchMapping(); mapping.analyzerDef("autocompleteEdgeAnalyzer", PatternTokenizerFactory.class) .tokenizerParam("pattern", "(.*)") .tokenizerParam("group", "1") .filter(LowerCaseFilterFactory.class) .filter(StopFilterFactory.class) .filter(EdgeNGramFilterFactory.class) .param("minGramSize", "3") .param("maxGramSize", "50") .analyzerDef("autocompletePhoneticAnalyzer", StandardTokenizerFactory.class) .filter(StandardFilterFactory.class) .filter(StopFilterFactory.class) .filter(PhoneticFilterFactory.class) .param("encoder", "DoubleMetaphone") .filter(SnowballPorterFilterFactory.class) .param("language", "English") .analyzerDef("autocompleteNGramAnalyzer", StandardTokenizerFactory.class) .filter(WordDelimiterFilterFactory.class) .filter(LowerCaseFilterFactory.class) .filter(NGramFilterFactory.class) .param("minGramSize", "3") .param("maxGramSize", "20") .analyzerDef("standardAnalyzer", StandardTokenizerFactory.class) .filter(LowerCaseFilterFactory.class) .analyzerDef("exactAnalyzer", StandardTokenizerFactory.class) .analyzerDef("conceptParentPidsAnalyzer", WhitespaceTokenizerFactory.class); return mapping; }
/** * Test ArabicNormalizationFilterFactory */ public void testNormalizer() throws Exception { Reader reader = new StringReader("الذين مَلكت أيمانكم"); StandardTokenizerFactory factory = new StandardTokenizerFactory(); factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); ArabicNormalizationFilterFactory filterFactory = new ArabicNormalizationFilterFactory(); filterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT); Map<String, String> args = Collections.emptyMap(); factory.init(args); filterFactory.init(args); Tokenizer tokenizer = factory.create(reader); TokenStream stream = filterFactory.create(tokenizer); assertTokenStreamContents(stream, new String[] {"الذين", "ملكت", "ايمانكم"}); }
/** * Test PersianCharFilterFactory */ public void testPersianCharFilter() throws Exception { Reader reader = new StringReader("میخورد"); PersianCharFilterFactory charfilterFactory = new PersianCharFilterFactory(); StandardTokenizerFactory tokenizerFactory = new StandardTokenizerFactory(); tokenizerFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT); Map<String, String> args = Collections.emptyMap(); tokenizerFactory.init(args); TokenStream stream = tokenizerFactory.create(charfilterFactory.create(reader)); assertTokenStreamContents(stream, new String[] { "می", "خورد" }); }
/** * Test IndicNormalizationFilterFactory */ public void testIndicNormalizer() throws Exception { Reader reader = new StringReader("ত্ अाैर"); StandardTokenizerFactory factory = new StandardTokenizerFactory(); factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); IndicNormalizationFilterFactory filterFactory = new IndicNormalizationFilterFactory(); filterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT); Map<String, String> args = Collections.emptyMap(); factory.init(args); filterFactory.init(args); Tokenizer tokenizer = factory.create(reader); TokenStream stream = filterFactory.create(tokenizer); assertTokenStreamContents(stream, new String[] { "ৎ", "और" }); }