@Override public Tokenizer create() { if (config == null) { return new ICUTokenizer(); }else{ return new ICUTokenizer(config); } }
@Override public Tokenizer create() { return new ICUTokenizer(config); }
public void testSimpleIcuTokenizer() throws IOException { Settings indexSettings = Settings.builder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .build(); Settings nodeSettings = Settings.builder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) .build(); Environment env = new Environment(nodeSettings); // Build the Tokenizer TokenizerFactory tokenizerFactory = new EmojiTokenizerFactory( IndexSettingsModule.newIndexSettings(new Index("test", "_na_"), indexSettings), env, "emoji_tokenizer", Settings.EMPTY ); ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.create(); // Real tests Reader reader = new StringReader("向日葵, one-two"); tokenizer.setReader(reader); assertTokenStreamContents(tokenizer, new String[]{"向日葵", "one", "two"}); Reader reader2 = new StringReader("Simple: \uD83D\uDE02, Modified: \uD83D\uDC66\uD83C\uDFFD " + "and composed rainbow: \uD83C\uDFF3️\u200D\uD83C\uDF08 and \uD83C\uDDF8\uD83C\uDDEA Sweden flag."); tokenizer.setReader(reader2); assertTokenStreamContents(tokenizer, new String[]{ "Simple", "\uD83D\uDE02", "Modified", "\uD83D\uDC66\uD83C\uDFFD", "and", "composed", "rainbow", "\uD83C\uDFF3️\u200D\uD83C\uDF08", "and", "\uD83C\uDDF8\uD83C\uDDEA", "Sweden", "flag", }); }
@Override public Tokenizer create(Reader reader) { return new ICUTokenizer(reader, config); }