Java 类org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory 实例源码

项目:search    文件:MultiTermTest.java   
@Test
public void testDefaultCopiedToMulti() {
  SchemaField field = h.getCore().getLatestSchema().getField("content_ws");
  Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
  assertTrue(analyzer instanceof TokenizerChain);
  assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
  TokenizerChain tc = (TokenizerChain) analyzer;
  for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
    assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
  }

  assertTrue(tc.getCharFilterFactories() == null);

}
项目:NYBC    文件:TestStandardFactories.java   
/**
 * Ensure the ASCIIFoldingFilterFactory works
 */
public void testASCIIFolding() throws Exception {
  Reader reader = new StringReader("Česká");
  Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  ASCIIFoldingFilterFactory factory = new ASCIIFoldingFilterFactory();
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  Map<String, String> args = Collections.emptyMap();
  factory.init(args);
  TokenStream stream = factory.create(tokenizer);
  assertTokenStreamContents(stream, new String[] { "Ceska" });
}
项目:NYBC    文件:MultiTermTest.java   
@Test
public void testDefaultCopiedToMulti() {
  SchemaField field = h.getCore().getSchema().getField("content_ws");
  Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
  assertTrue(analyzer instanceof TokenizerChain);
  assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
  TokenizerChain tc = (TokenizerChain) analyzer;
  for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
    assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
  }

  assertTrue(tc.getCharFilterFactories() == null);

}
项目:search-core    文件:MultiTermTest.java   
@Test
public void testDefaultCopiedToMulti() {
  SchemaField field = h.getCore().getSchema().getField("content_ws");
  Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
  assertTrue(analyzer instanceof TokenizerChain);
  assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
  TokenizerChain tc = (TokenizerChain) analyzer;
  for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
    assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
  }

  assertTrue(tc.getCharFilterFactories() == null);

}
项目:owsi-core-parent    文件:CoreLuceneAnalyzersDefinitionProvider.java   
protected void registerWithPrefix(String prefix, LuceneAnalyzerDefinitionRegistryBuilder builder) {
    builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD).tokenizer(KeywordTokenizerFactory.class);

    builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD_CLEAN).tokenizer(KeywordTokenizerFactory.class)
        .tokenFilter(ASCIIFoldingFilterFactory.class)
        .tokenFilter(LowerCaseFilterFactory.class);

    builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT).tokenizer(WhitespaceTokenizerFactory.class)
            .tokenFilter(ASCIIFoldingFilterFactory.class)
            .tokenFilter(WordDelimiterFilterFactory.class)
                    .param("generateWordParts", "1")
                    .param("generateNumberParts", "1")
                    .param("catenateWords", "0")
                    .param("catenateNumbers", "0")
                    .param("catenateAll", "0")
                    .param("splitOnCaseChange", "0")
                    .param("splitOnNumerics", "0")
                    .param("preserveOriginal", "1")
            .tokenFilter(LowerCaseFilterFactory.class);

    builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_STEMMING).tokenizer(WhitespaceTokenizerFactory.class)
            .tokenFilter(ASCIIFoldingFilterFactory.class)
            .tokenFilter(WordDelimiterFilterFactory.class)
                    .param("generateWordParts", "1")
                    .param("generateNumberParts", "1")
                    .param("catenateWords", "0")
                    .param("catenateNumbers", "0")
                    .param("catenateAll", "0")
                    .param("splitOnCaseChange", "0")
                    .param("splitOnNumerics", "0")
                    .param("preserveOriginal", "1")
            .tokenFilter(LowerCaseFilterFactory.class)
            .tokenFilter(CoreFrenchMinimalStemFilterFactory.class);

    builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_SORT).tokenizer(KeywordTokenizerFactory.class)
            .tokenFilter(ASCIIFoldingFilterFactory.class)
            .tokenFilter(LowerCaseFilterFactory.class)
            .tokenFilter(PatternReplaceFilterFactory.class)
                    .param("pattern", "('-&\\.,\\(\\))")
                    .param("replacement", " ")
                    .param("replace", "all")
            .tokenFilter(PatternReplaceFilterFactory.class)
                    .param("pattern", "([^0-9\\p{L} ])")
                    .param("replacement", "")
                    .param("replace", "all")
            .tokenFilter(TrimFilterFactory.class);

}