public SimpleAnalyzer(boolean lowerCase) { Map<String, String> parameters = new HashMap<String, String>(); parameters.put(PatternTokenizerFactory.PATTERN, PATTERN); parameters.put(PatternTokenizerFactory.GROUP, "0"); parameters.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, version.name()); tokenizerFactory = new PatternTokenizerFactory(parameters); if (lowerCase) { parameters = new HashMap<String, String>(); parameters.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, version.name()); lowerCaseFilterFactory = new LowerCaseFilterFactory(parameters); } else { lowerCaseFilterFactory = null; } }
@Factory public SearchMapping getSearchMapping() { final SearchMapping mapping = new SearchMapping(); mapping .analyzerDef("english", StandardTokenizerFactory.class) .filter(LowerCaseFilterFactory.class) .filter(SnowballPorterFilterFactory.class) .analyzerDef("german", StandardTokenizerFactory.class) .filter(LowerCaseFilterFactory.class) .filter(GermanStemFilterFactory.class); return mapping; }
public void testLookupTokenFilter() { assertSame(LowerCaseFilterFactory.class, TokenFilterFactory.forName("Lowercase", versionArgOnly()).getClass()); assertSame(LowerCaseFilterFactory.class, TokenFilterFactory.forName("LOWERCASE", versionArgOnly()).getClass()); assertSame(LowerCaseFilterFactory.class, TokenFilterFactory.forName("lowercase", versionArgOnly()).getClass()); assertSame(RemoveDuplicatesTokenFilterFactory.class, TokenFilterFactory.forName("RemoveDuplicates", versionArgOnly()).getClass()); assertSame(RemoveDuplicatesTokenFilterFactory.class, TokenFilterFactory.forName("REMOVEDUPLICATES", versionArgOnly()).getClass()); assertSame(RemoveDuplicatesTokenFilterFactory.class, TokenFilterFactory.forName("removeduplicates", versionArgOnly()).getClass()); }
public void testLookupTokenFilterClass() { assertSame(LowerCaseFilterFactory.class, TokenFilterFactory.lookupClass("Lowercase")); assertSame(LowerCaseFilterFactory.class, TokenFilterFactory.lookupClass("LOWERCASE")); assertSame(LowerCaseFilterFactory.class, TokenFilterFactory.lookupClass("lowercase")); assertSame(RemoveDuplicatesTokenFilterFactory.class, TokenFilterFactory.lookupClass("RemoveDuplicates")); assertSame(RemoveDuplicatesTokenFilterFactory.class, TokenFilterFactory.lookupClass("REMOVEDUPLICATES")); assertSame(RemoveDuplicatesTokenFilterFactory.class, TokenFilterFactory.lookupClass("removeduplicates")); }
@Test public void testQueryCopiedToMulti() { SchemaField field = h.getCore().getLatestSchema().getField("content_charfilter"); Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer(); assertTrue(analyzer instanceof TokenizerChain); assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory); TokenizerChain tc = (TokenizerChain) analyzer; for (TokenFilterFactory factory : tc.getTokenFilterFactories()) { assertTrue(factory instanceof LowerCaseFilterFactory); } assertTrue(tc.getCharFilterFactories().length == 1); assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory); }
@Test public void testDefaultCopiedToMulti() { SchemaField field = h.getCore().getLatestSchema().getField("content_ws"); Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer(); assertTrue(analyzer instanceof TokenizerChain); assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory); TokenizerChain tc = (TokenizerChain) analyzer; for (TokenFilterFactory factory : tc.getTokenFilterFactories()) { assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory)); } assertTrue(tc.getCharFilterFactories() == null); }
/** * Gets the search mapping. * * @return the search mapping */ @Factory public SearchMapping getSearchMapping() { final SearchMapping mapping = new SearchMapping(); mapping.analyzerDef("ngram", StandardTokenizerFactory.class).filter(LowerCaseFilterFactory.class) .filter(NGramFilterFactory.class).param("minGramSize", "3").param("maxGramSize", "3") .analyzerDef("se", StandardTokenizerFactory.class).filter(LowerCaseFilterFactory.class) .filter(SwedishLightStemFilterFactory.class).analyzerDef("en", StandardTokenizerFactory.class) .filter(LowerCaseFilterFactory.class).filter(PorterStemFilterFactory.class) .entity(DocumentContentData.class).indexed().property("hjid", ElementType.FIELD).documentId().property("content", ElementType.METHOD).field().analyzer("se").store(Store.NO).analyze(Analyze.YES).property("id", ElementType.METHOD).field() .entity(DocumentElement.class).indexed().property("id", ElementType.FIELD).documentId().property("title", ElementType.METHOD).field().analyzer("se").store(Store.NO).analyze(Analyze.YES).property("subTitle", ElementType.METHOD).field().analyzer("se").store(Store.NO).analyze(Analyze.YES) .entity(DocumentStatusContainer.class).indexed().property("hjid", ElementType.FIELD).documentId().property("documentCategory", ElementType.METHOD).field().analyzer("se").store(Store.NO).analyze(Analyze.YES); return mapping; }
@Factory public SearchMapping getSearchMapping() { SearchMapping mapping = new SearchMapping(); mapping.analyzerDef("autocompleteEdgeAnalyzer", PatternTokenizerFactory.class) .tokenizerParam("pattern", "(.*)") .tokenizerParam("group", "1") .filter(LowerCaseFilterFactory.class) .filter(StopFilterFactory.class) .filter(EdgeNGramFilterFactory.class) .param("minGramSize", "3") .param("maxGramSize", "50") .analyzerDef("autocompletePhoneticAnalyzer", StandardTokenizerFactory.class) .filter(StandardFilterFactory.class) .filter(StopFilterFactory.class) .filter(PhoneticFilterFactory.class) .param("encoder", "DoubleMetaphone") .filter(SnowballPorterFilterFactory.class) .param("language", "English") .analyzerDef("autocompleteNGramAnalyzer", StandardTokenizerFactory.class) .filter(WordDelimiterFilterFactory.class) .filter(LowerCaseFilterFactory.class) .filter(NGramFilterFactory.class) .param("minGramSize", "3") .param("maxGramSize", "20") .analyzerDef("standardAnalyzer", StandardTokenizerFactory.class) .filter(LowerCaseFilterFactory.class) .analyzerDef("exactAnalyzer", StandardTokenizerFactory.class) .analyzerDef("conceptParentPidsAnalyzer", WhitespaceTokenizerFactory.class); return mapping; }
public void testLookupTokenFilter() { assertSame(LowerCaseFilterFactory.class, TokenFilterFactory.forName("Lowercase").getClass()); assertSame(LowerCaseFilterFactory.class, TokenFilterFactory.forName("LOWERCASE").getClass()); assertSame(LowerCaseFilterFactory.class, TokenFilterFactory.forName("lowercase").getClass()); assertSame(RemoveDuplicatesTokenFilterFactory.class, TokenFilterFactory.forName("RemoveDuplicates").getClass()); assertSame(RemoveDuplicatesTokenFilterFactory.class, TokenFilterFactory.forName("REMOVEDUPLICATES").getClass()); assertSame(RemoveDuplicatesTokenFilterFactory.class, TokenFilterFactory.forName("removeduplicates").getClass()); }
@Test public void testQueryCopiedToMulti() { SchemaField field = h.getCore().getSchema().getField("content_charfilter"); Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer(); assertTrue(analyzer instanceof TokenizerChain); assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory); TokenizerChain tc = (TokenizerChain) analyzer; for (TokenFilterFactory factory : tc.getTokenFilterFactories()) { assertTrue(factory instanceof LowerCaseFilterFactory); } assertTrue(tc.getCharFilterFactories().length == 1); assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory); }
@Test public void testDefaultCopiedToMulti() { SchemaField field = h.getCore().getSchema().getField("content_ws"); Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer(); assertTrue(analyzer instanceof TokenizerChain); assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory); TokenizerChain tc = (TokenizerChain) analyzer; for (TokenFilterFactory factory : tc.getTokenFilterFactories()) { assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory)); } assertTrue(tc.getCharFilterFactories() == null); }
protected void registerWithPrefix(String prefix, LuceneAnalyzerDefinitionRegistryBuilder builder) { builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD).tokenizer(KeywordTokenizerFactory.class); builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD_CLEAN).tokenizer(KeywordTokenizerFactory.class) .tokenFilter(ASCIIFoldingFilterFactory.class) .tokenFilter(LowerCaseFilterFactory.class); builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT).tokenizer(WhitespaceTokenizerFactory.class) .tokenFilter(ASCIIFoldingFilterFactory.class) .tokenFilter(WordDelimiterFilterFactory.class) .param("generateWordParts", "1") .param("generateNumberParts", "1") .param("catenateWords", "0") .param("catenateNumbers", "0") .param("catenateAll", "0") .param("splitOnCaseChange", "0") .param("splitOnNumerics", "0") .param("preserveOriginal", "1") .tokenFilter(LowerCaseFilterFactory.class); builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_STEMMING).tokenizer(WhitespaceTokenizerFactory.class) .tokenFilter(ASCIIFoldingFilterFactory.class) .tokenFilter(WordDelimiterFilterFactory.class) .param("generateWordParts", "1") .param("generateNumberParts", "1") .param("catenateWords", "0") .param("catenateNumbers", "0") .param("catenateAll", "0") .param("splitOnCaseChange", "0") .param("splitOnNumerics", "0") .param("preserveOriginal", "1") .tokenFilter(LowerCaseFilterFactory.class) .tokenFilter(CoreFrenchMinimalStemFilterFactory.class); builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_SORT).tokenizer(KeywordTokenizerFactory.class) .tokenFilter(ASCIIFoldingFilterFactory.class) .tokenFilter(LowerCaseFilterFactory.class) .tokenFilter(PatternReplaceFilterFactory.class) .param("pattern", "('-&\\.,\\(\\))") .param("replacement", " ") .param("replace", "all") .tokenFilter(PatternReplaceFilterFactory.class) .param("pattern", "([^0-9\\p{L} ])") .param("replacement", "") .param("replace", "all") .tokenFilter(TrimFilterFactory.class); }
private Analyzer makeAnalyzer() throws IOException { return CustomAnalyzer.builder() .withTokenizer(WhitespaceTokenizerFactory.class) .addTokenFilter(LowerCaseFilterFactory.class) .build(); }