@Test public void testQueryCopiedToMulti() { SchemaField field = h.getCore().getLatestSchema().getField("content_charfilter"); Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer(); assertTrue(analyzer instanceof TokenizerChain); assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory); TokenizerChain tc = (TokenizerChain) analyzer; for (TokenFilterFactory factory : tc.getTokenFilterFactories()) { assertTrue(factory instanceof LowerCaseFilterFactory); } assertTrue(tc.getCharFilterFactories().length == 1); assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory); }
@Test public void testDefaultCopiedToMulti() { SchemaField field = h.getCore().getLatestSchema().getField("content_ws"); Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer(); assertTrue(analyzer instanceof TokenizerChain); assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory); TokenizerChain tc = (TokenizerChain) analyzer; for (TokenFilterFactory factory : tc.getTokenFilterFactories()) { assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory)); } assertTrue(tc.getCharFilterFactories() == null); }
/** * Test KeywordTokenizerFactory */ public void testKeywordTokenizer() throws Exception { Reader reader = new StringReader("What's this thing do?"); KeywordTokenizerFactory factory = new KeywordTokenizerFactory(); factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); Map<String, String> args = Collections.emptyMap(); factory.init(args); Tokenizer stream = factory.create(reader); assertTokenStreamContents(stream, new String[] {"What's this thing do?"}); }
@Test public void testQueryCopiedToMulti() { SchemaField field = h.getCore().getSchema().getField("content_charfilter"); Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer(); assertTrue(analyzer instanceof TokenizerChain); assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory); TokenizerChain tc = (TokenizerChain) analyzer; for (TokenFilterFactory factory : tc.getTokenFilterFactories()) { assertTrue(factory instanceof LowerCaseFilterFactory); } assertTrue(tc.getCharFilterFactories().length == 1); assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory); }
@Test public void testDefaultCopiedToMulti() { SchemaField field = h.getCore().getSchema().getField("content_ws"); Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer(); assertTrue(analyzer instanceof TokenizerChain); assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory); TokenizerChain tc = (TokenizerChain) analyzer; for (TokenFilterFactory factory : tc.getTokenFilterFactories()) { assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory)); } assertTrue(tc.getCharFilterFactories() == null); }
protected void registerWithPrefix(String prefix, LuceneAnalyzerDefinitionRegistryBuilder builder) { builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD).tokenizer(KeywordTokenizerFactory.class); builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD_CLEAN).tokenizer(KeywordTokenizerFactory.class) .tokenFilter(ASCIIFoldingFilterFactory.class) .tokenFilter(LowerCaseFilterFactory.class); builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT).tokenizer(WhitespaceTokenizerFactory.class) .tokenFilter(ASCIIFoldingFilterFactory.class) .tokenFilter(WordDelimiterFilterFactory.class) .param("generateWordParts", "1") .param("generateNumberParts", "1") .param("catenateWords", "0") .param("catenateNumbers", "0") .param("catenateAll", "0") .param("splitOnCaseChange", "0") .param("splitOnNumerics", "0") .param("preserveOriginal", "1") .tokenFilter(LowerCaseFilterFactory.class); builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_STEMMING).tokenizer(WhitespaceTokenizerFactory.class) .tokenFilter(ASCIIFoldingFilterFactory.class) .tokenFilter(WordDelimiterFilterFactory.class) .param("generateWordParts", "1") .param("generateNumberParts", "1") .param("catenateWords", "0") .param("catenateNumbers", "0") .param("catenateAll", "0") .param("splitOnCaseChange", "0") .param("splitOnNumerics", "0") .param("preserveOriginal", "1") .tokenFilter(LowerCaseFilterFactory.class) .tokenFilter(CoreFrenchMinimalStemFilterFactory.class); builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_SORT).tokenizer(KeywordTokenizerFactory.class) .tokenFilter(ASCIIFoldingFilterFactory.class) .tokenFilter(LowerCaseFilterFactory.class) .tokenFilter(PatternReplaceFilterFactory.class) .param("pattern", "('-&\\.,\\(\\))") .param("replacement", " ") .param("replace", "all") .tokenFilter(PatternReplaceFilterFactory.class) .param("pattern", "([^0-9\\p{L} ])") .param("replacement", "") .param("replace", "all") .tokenFilter(TrimFilterFactory.class); }