@Override public Tokenizer create() { if (matcher == null) { return new EdgeNGramTokenizer(minGram, maxGram); } else { return new EdgeNGramTokenizer(minGram, maxGram) { @Override protected boolean isTokenChar(int chr) { return matcher.isTokenChar(chr); } }; } }
@Override public void init(Map<String, String> args) { super.init(args); String maxArg = args.get("maxGramSize"); maxGramSize = (maxArg != null ? Integer.parseInt(maxArg) : EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE); String minArg = args.get("minGramSize"); minGramSize = (minArg != null ? Integer.parseInt(minArg) : EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE); side = args.get("side"); if (side == null) { side = EdgeNGramTokenizer.Side.FRONT.getLabel(); } }
@Override public EdgeNGramTokenizer create(Reader input) { return new EdgeNGramTokenizer(input, side, minGramSize, maxGramSize); }
@Override public Object create(Random random) { return random.nextBoolean() ? EdgeNGramTokenizer.Side.FRONT : EdgeNGramTokenizer.Side.BACK; }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { return new TokenStreamComponents(new EdgeNGramTokenizer(reader, EdgeNGramTokenizer.Side.BACK, 10, 20)); }