@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { class SavedStreams { StandardTokenizer tokenStream; TokenStream filteredTokenStream; } SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); setPreviousTokenStream(streams); streams.tokenStream = new StandardTokenizer(LUCENE_VERSION, reader); streams.filteredTokenStream = new StandardFilter(streams.tokenStream); streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); streams.filteredTokenStream = new StopFilter(true, streams.filteredTokenStream, STOP_WORDS_SET); streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream); } else { streams.tokenStream.reset(reader); } streams.tokenStream.setMaxTokenLength(DEFAULT_MAX_TOKEN_LENGTH); return streams.filteredTokenStream; }
protected Query doToQuery(QueryShardContext context) throws IOException { // Analyzer analyzer = context.getMapperService().searchAnalyzer(); Analyzer analyzer = new WhitespaceAnalyzer(); try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) { CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source)); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); if (termAtt == null) { return null; } List<CustomSpanTermQuery> clauses = new ArrayList<>(); stream.reset(); while (stream.incrementToken()) { Term term = new Term(fieldName, termAtt.getBytesRef()); clauses.add(new CustomSpanTermQuery(term)); } return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount); } catch (IOException e) { throw new RuntimeException("Error analyzing query text", e); } }
@Override public TokenStream create(TokenStream tokenStream) { if (lang == null) { return new LowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("greek")) { return new GreekLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("irish")) { return new IrishLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("turkish")) { return new TurkishLowerCaseFilter(tokenStream); } else { throw new IllegalArgumentException("language [" + lang + "] not support for lower case"); } }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final Tokenizer src = new WhitespaceTokenizer(matchVersion, reader); TokenStream tok = new LowerCaseFilter(matchVersion, src); return new TokenStreamComponents(src, tok); }
/** * @param fieldName ignored param * @param reader contains data to parse * @return TokenStream of ngrams */ public TokenStream tokenStream(String fieldName, Reader reader) { return new NGramTokenFilter( new LowerCaseFilter( new StandardFilter( new StandardTokenizer(reader))), min_ngram, max_ngram); }
public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new SynonymFilter( new StopFilter(true, new LowerCaseFilter( new StandardFilter( new StandardTokenizer( Version.LUCENE_41, reader))), StopAnalyzer.ENGLISH_STOP_WORDS_SET), engine ); return result; }
public TokenStream tokenStream(String fieldname, Reader reader) { return new LatCyrFilter( new LowerCaseFilter( new WhitespaceTokenizer(reader))); //new StandardTokenizer(reader))); }
@NotNull private static TokenFilter defaultTokenFilter(@NotNull final Tokenizer source) { final TokenFilter filteredSource = new LowerCaseFilter(source); return new WordDelimiterGraphFilter(filteredSource, SPLIT_ON_NUMERICS | GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS, null); }
/** Construct the rewriter */ public StdTermFilter() { dribble = new DribbleStream(); filter = new StandardFilter(new LowerCaseFilter(dribble)); }
@Override public TokenStream tokenStream( String fieldName, Reader reader ) { return new LowerCaseFilter( LUCENE_VERSION, new WhitespaceTokenizer( LUCENE_VERSION, reader ) ); }
public TokenStream tokenStream(String fieldName, Reader reader) { return new StopFilter(true, new LowerCaseFilter(new LetterTokenizer(reader)), stopWords); }
/** * Ordering mistake here */ public TokenStream tokenStream(String fieldName, Reader reader) { return new LowerCaseFilter( new StopFilter(true, new LetterTokenizer(reader), stopWords)); }