EdgeNGramTokenFilterFactory(Index index, Settings indexSettings, String name, Settings settings) { super(index, indexSettings, name, settings); this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE); this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE); this.side = parseSide(settings.get("side", "front")); this.esVersion = org.elasticsearch.Version.indexCreated(indexSettings); }
@SuppressWarnings("deprecation") @Override public TokenStream create(TokenStream tokenStream) { final Version version = this.version == Version.LUCENE_4_3 ? Version.LUCENE_4_4 : this.version; // we supported it since 4.3 if (version.onOrAfter(Version.LUCENE_4_3)) { return new NGramTokenFilter(tokenStream, minGram, maxGram); } else { return new Lucene43NGramTokenFilter(tokenStream, minGram, maxGram); } }
/** Initialize the n-gram min and max sizes and the side from which one should start tokenizing. */ @Override public void init(Map<String, String> args) { super.init(args); String maxArg = args.get("maxGramSize"); maxGramSize = (maxArg != null ? Integer.parseInt(maxArg) : NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE); String minArg = args.get("minGramSize"); minGramSize = (minArg != null ? Integer.parseInt(minArg) : NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE); }
/** * @param fieldName ignored param * @param reader contains data to parse * @return TokenStream of ngrams */ public TokenStream tokenStream(String fieldName, Reader reader) { return new NGramTokenFilter( new LowerCaseFilter( new StandardFilter( new StandardTokenizer(reader))), min_ngram, max_ngram); }
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new StandardTokenizer(); TokenStream src = new LowerCaseFilter(source); src = new AddWordBoundaryFilter(src); NGramTokenFilter filter = new NGramTokenFilter(src, this.minShingleSize, this.maxShingleSize); return new TokenStreamComponents(source, filter); }
@Override protected void initProperties() throws SearchLibException { super.initProperties(); addProperty(ClassPropertyEnum.MIN_GRAM, Integer.toString(NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE), null, 10, 1); addProperty(ClassPropertyEnum.MAX_GRAM, Integer.toString(NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE), null, 10, 1); }
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new LowerCaseTokenizer(); TokenStream filter = new NGramTokenFilter(tokenizer, 1, 5); return new TokenStreamComponents(tokenizer, filter); }
public EdgeNGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, name, settings); this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE); this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE); this.side = parseSide(settings.get("side", "front")); }
public NGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, name, settings); this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE); this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE); }
@Override public TokenStream create(TokenStream tokenStream) { return new NGramTokenFilter(tokenStream, minGram, maxGram); }
@Inject public NGramTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) { super(index, indexSettingsService.getSettings(), name, settings); this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE); this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE); }
private String[] tokenize(String input) throws Exception { input = input.toLowerCase(); NGramTokenFilter filter = new NGramTokenFilter( new KeywordTokenizer(new StringReader("^" + input + "$")), 1, 4); CharTermAttribute charTermAttrib = filter.getAttribute(CharTermAttribute.class); filter.reset(); Set<String> tokens = new LinkedHashSet<String>(); while (filter.incrementToken()) { tokens.add(charTermAttrib.toString()); } filter.close(); return tokens.toArray(new String[tokens.size()]); }
@Override public NGramTokenFilter create(TokenStream input) { return new NGramTokenFilter(input, minGramSize, maxGramSize); }
@Override public TokenStream create(TokenStream input) { return new NGramTokenFilter(input, min, max); }
@Override public TokenStream tokenStream(String fieldName, Reader reader) { return new NGramTokenFilter(new KeywordTokenizer(reader), 2, 4); }