Java 类org.apache.lucene.analysis.icu.segmentation.ICUTokenizer 实例源码

项目:elasticsearch_my    文件:IcuTokenizerFactory.java   
@Override
public Tokenizer create() {
    if (config == null) {
        return new ICUTokenizer();
    }else{
        return new ICUTokenizer(config);
    }
}
项目:emoji-search    文件:EmojiTokenizerFactory.java   
@Override
public Tokenizer create() {
    return new ICUTokenizer(config);
}
项目:emoji-search    文件:EmojiIT.java   
public void testSimpleIcuTokenizer() throws IOException {
    Settings indexSettings = Settings.builder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .build();

    Settings nodeSettings = Settings.builder()
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
            .build();

    Environment env = new Environment(nodeSettings);

    // Build the Tokenizer
    TokenizerFactory tokenizerFactory = new EmojiTokenizerFactory(
            IndexSettingsModule.newIndexSettings(new Index("test", "_na_"), indexSettings),
            env,
            "emoji_tokenizer",
            Settings.EMPTY
    );
    ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.create();

    // Real tests
    Reader reader = new StringReader("向日葵, one-two");
    tokenizer.setReader(reader);
    assertTokenStreamContents(tokenizer, new String[]{"向日葵", "one", "two"});

    Reader reader2 = new StringReader("Simple: \uD83D\uDE02, Modified: \uD83D\uDC66\uD83C\uDFFD " +
            "and composed rainbow: \uD83C\uDFF3️\u200D\uD83C\uDF08 and \uD83C\uDDF8\uD83C\uDDEA Sweden flag.");
    tokenizer.setReader(reader2);

    assertTokenStreamContents(tokenizer, new String[]{
        "Simple",
        "\uD83D\uDE02",
        "Modified",
        "\uD83D\uDC66\uD83C\uDFFD",
        "and",
        "composed",
        "rainbow",
        "\uD83C\uDFF3️\u200D\uD83C\uDF08",
        "and",
        "\uD83C\uDDF8\uD83C\uDDEA",
        "Sweden",
        "flag",
    });
}
项目:elasticsearch-analysis-german    文件:IcuTokenizerFactory.java   
@Override
public Tokenizer create(Reader reader) {
    return new ICUTokenizer(reader, config);
}