Java 类org.apache.lucene.analysis.core.KeywordTokenizerFactory 实例源码

项目:search    文件:MultiTermTest.java   
@Test
public void testQueryCopiedToMulti() {
  SchemaField field = h.getCore().getLatestSchema().getField("content_charfilter");
  Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
  assertTrue(analyzer instanceof TokenizerChain);
  assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
  TokenizerChain tc = (TokenizerChain) analyzer;
  for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
    assertTrue(factory instanceof LowerCaseFilterFactory);
  }

  assertTrue(tc.getCharFilterFactories().length == 1);
  assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory);
}
项目:search    文件:MultiTermTest.java   
@Test
public void testDefaultCopiedToMulti() {
  SchemaField field = h.getCore().getLatestSchema().getField("content_ws");
  Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
  assertTrue(analyzer instanceof TokenizerChain);
  assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
  TokenizerChain tc = (TokenizerChain) analyzer;
  for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
    assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
  }

  assertTrue(tc.getCharFilterFactories() == null);

}
项目:NYBC    文件:TestStandardFactories.java   
/**
 * Test KeywordTokenizerFactory
 */
public void testKeywordTokenizer() throws Exception {
  Reader reader = new StringReader("What's this thing do?");
  KeywordTokenizerFactory factory = new KeywordTokenizerFactory();
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  Map<String, String> args = Collections.emptyMap();
  factory.init(args);
  Tokenizer stream = factory.create(reader);
  assertTokenStreamContents(stream, 
      new String[] {"What's this thing do?"});
}
项目:NYBC    文件:MultiTermTest.java   
@Test
public void testQueryCopiedToMulti() {
  SchemaField field = h.getCore().getSchema().getField("content_charfilter");
  Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
  assertTrue(analyzer instanceof TokenizerChain);
  assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
  TokenizerChain tc = (TokenizerChain) analyzer;
  for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
    assertTrue(factory instanceof LowerCaseFilterFactory);
  }

  assertTrue(tc.getCharFilterFactories().length == 1);
  assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory);
}
项目:NYBC    文件:MultiTermTest.java   
@Test
public void testDefaultCopiedToMulti() {
  SchemaField field = h.getCore().getSchema().getField("content_ws");
  Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
  assertTrue(analyzer instanceof TokenizerChain);
  assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
  TokenizerChain tc = (TokenizerChain) analyzer;
  for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
    assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
  }

  assertTrue(tc.getCharFilterFactories() == null);

}
项目:search-core    文件:MultiTermTest.java   
@Test
public void testQueryCopiedToMulti() {
  SchemaField field = h.getCore().getSchema().getField("content_charfilter");
  Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
  assertTrue(analyzer instanceof TokenizerChain);
  assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
  TokenizerChain tc = (TokenizerChain) analyzer;
  for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
    assertTrue(factory instanceof LowerCaseFilterFactory);
  }

  assertTrue(tc.getCharFilterFactories().length == 1);
  assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory);
}
项目:search-core    文件:MultiTermTest.java   
@Test
public void testDefaultCopiedToMulti() {
  SchemaField field = h.getCore().getSchema().getField("content_ws");
  Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
  assertTrue(analyzer instanceof TokenizerChain);
  assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
  TokenizerChain tc = (TokenizerChain) analyzer;
  for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
    assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
  }

  assertTrue(tc.getCharFilterFactories() == null);

}
项目:owsi-core-parent    文件:CoreLuceneAnalyzersDefinitionProvider.java   
protected void registerWithPrefix(String prefix, LuceneAnalyzerDefinitionRegistryBuilder builder) {
    builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD).tokenizer(KeywordTokenizerFactory.class);

    builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD_CLEAN).tokenizer(KeywordTokenizerFactory.class)
        .tokenFilter(ASCIIFoldingFilterFactory.class)
        .tokenFilter(LowerCaseFilterFactory.class);

    builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT).tokenizer(WhitespaceTokenizerFactory.class)
            .tokenFilter(ASCIIFoldingFilterFactory.class)
            .tokenFilter(WordDelimiterFilterFactory.class)
                    .param("generateWordParts", "1")
                    .param("generateNumberParts", "1")
                    .param("catenateWords", "0")
                    .param("catenateNumbers", "0")
                    .param("catenateAll", "0")
                    .param("splitOnCaseChange", "0")
                    .param("splitOnNumerics", "0")
                    .param("preserveOriginal", "1")
            .tokenFilter(LowerCaseFilterFactory.class);

    builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_STEMMING).tokenizer(WhitespaceTokenizerFactory.class)
            .tokenFilter(ASCIIFoldingFilterFactory.class)
            .tokenFilter(WordDelimiterFilterFactory.class)
                    .param("generateWordParts", "1")
                    .param("generateNumberParts", "1")
                    .param("catenateWords", "0")
                    .param("catenateNumbers", "0")
                    .param("catenateAll", "0")
                    .param("splitOnCaseChange", "0")
                    .param("splitOnNumerics", "0")
                    .param("preserveOriginal", "1")
            .tokenFilter(LowerCaseFilterFactory.class)
            .tokenFilter(CoreFrenchMinimalStemFilterFactory.class);

    builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_SORT).tokenizer(KeywordTokenizerFactory.class)
            .tokenFilter(ASCIIFoldingFilterFactory.class)
            .tokenFilter(LowerCaseFilterFactory.class)
            .tokenFilter(PatternReplaceFilterFactory.class)
                    .param("pattern", "('-&\\.,\\(\\))")
                    .param("replacement", " ")
                    .param("replace", "all")
            .tokenFilter(PatternReplaceFilterFactory.class)
                    .param("pattern", "([^0-9\\p{L} ])")
                    .param("replacement", "")
                    .param("replace", "all")
            .tokenFilter(TrimFilterFactory.class);

}