Java 类org.apache.lucene.analysis.standard.StandardFilter 实例源码

项目:airsonic    文件:SearchService.java   
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    class SavedStreams {
        StandardTokenizer tokenStream;
        TokenStream filteredTokenStream;
    }

    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams();
        setPreviousTokenStream(streams);
        streams.tokenStream = new StandardTokenizer(LUCENE_VERSION, reader);
        streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
        streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
        streams.filteredTokenStream = new StopFilter(true, streams.filteredTokenStream, STOP_WORDS_SET);
        streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream);
    } else {
        streams.tokenStream.reset(reader);
    }
    streams.tokenStream.setMaxTokenLength(DEFAULT_MAX_TOKEN_LENGTH);

    return streams.filteredTokenStream;
}
项目:subsonic    文件:SearchService.java   
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    class SavedStreams {
        StandardTokenizer tokenStream;
        TokenStream filteredTokenStream;
    }

    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams();
        setPreviousTokenStream(streams);
        streams.tokenStream = new StandardTokenizer(LUCENE_VERSION, reader);
        streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
        streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
        streams.filteredTokenStream = new StopFilter(true, streams.filteredTokenStream, STOP_WORDS_SET);
        streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream);
    } else {
        streams.tokenStream.reset(reader);
    }
    streams.tokenStream.setMaxTokenLength(DEFAULT_MAX_TOKEN_LENGTH);

    return streams.filteredTokenStream;
}
项目:FutureSonic-Server    文件:SearchService.java   
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    class SavedStreams {
        StandardTokenizer tokenStream;
        TokenStream filteredTokenStream;
    }

    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams();
        setPreviousTokenStream(streams);
        streams.tokenStream = new StandardTokenizer(LUCENE_VERSION, reader);
        streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
        streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
        streams.filteredTokenStream = new StopFilter(true, streams.filteredTokenStream, STOP_WORDS_SET);
        streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream);
    } else {
        streams.tokenStream.reset(reader);
    }
    streams.tokenStream.setMaxTokenLength(DEFAULT_MAX_TOKEN_LENGTH);

    return streams.filteredTokenStream;
}
项目:madsonic-server-5.1    文件:SearchService.java   
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    class SavedStreams {
        StandardTokenizer tokenStream;
        TokenStream filteredTokenStream;
    }

    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams();
        setPreviousTokenStream(streams);
        streams.tokenStream = new StandardTokenizer(LUCENE_VERSION, reader);
        streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
        streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
        streams.filteredTokenStream = new StopFilter(true, streams.filteredTokenStream, STOP_WORDS_SET);
        streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream);
    } else {
        streams.tokenStream.reset(reader);
    }
    streams.tokenStream.setMaxTokenLength(DEFAULT_MAX_TOKEN_LENGTH);

    return streams.filteredTokenStream;
}
项目:lucene-korean    文件:KoreanAnalyzerTest.java   
public void testStandardTokenizer() throws Exception {

        String source = "우리나라라면에서부터 일본라면이 파생되었잖니?";
        source = "너는 너는 다시 내게 돌아 올거야. school is a good place 呵呵大笑 呵呵大笑";

        long start = System.currentTimeMillis();

        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        TokenStream stream = analyzer.tokenStream("s", new StringReader(source));
        TokenStream tok = new StandardFilter(Version.LUCENE_36, stream);

        while (tok.incrementToken()) {
            CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
            OffsetAttribute offAttr = stream.getAttribute(OffsetAttribute.class);
            PositionIncrementAttribute posAttr = stream.getAttribute(PositionIncrementAttribute.class);
            TypeAttribute typeAttr = stream.getAttribute(TypeAttribute.class);

            System.out.println(new String(termAttr.buffer(), 0, termAttr.length()));
        }

        System.out.println((System.currentTimeMillis() - start) + "ms");
    }
项目:madsonic-server-5.0    文件:SearchService.java   
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    class SavedStreams {
        StandardTokenizer tokenStream;
        TokenStream filteredTokenStream;
    }

    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams();
        setPreviousTokenStream(streams);
        streams.tokenStream = new StandardTokenizer(LUCENE_VERSION, reader);
        streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
        streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
        streams.filteredTokenStream = new StopFilter(true, streams.filteredTokenStream, STOP_WORDS_SET);
        streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream);
    } else {
        streams.tokenStream.reset(reader);
    }
    streams.tokenStream.setMaxTokenLength(DEFAULT_MAX_TOKEN_LENGTH);

    return streams.filteredTokenStream;
}
项目:miru    文件:TermTokenizer.java   
public List<String> tokenize(Analyzer analyzer, String data) {
    List<String> terms = Lists.newArrayList();
    try {
        TokenStream tokens = new StandardFilter(analyzer.tokenStream(null, new StringReader(data)));
        tokens.reset();

        while (tokens.incrementToken()) {
            CharTermAttribute termAttribute = tokens.getAttribute(CharTermAttribute.class);
            String term = trimToNull(termAttribute.toString());
            if (term != null) {
                terms.add(term);
            }
        }

        tokens.end();
        tokens.close();
    } catch (IOException ioe) {
        LOG.warn("Unable to tokenize data. cause: {}", new Object[] { ioe.getMessage() }, ioe);
    }
    return terms;
}
项目:debop4j    文件:KoreanAnalyzerTest.java   
public void testStandardTokenizer() throws Exception {

        String source = "우리나라라면에서부터 일본라면이 파생되었잖니?";
        source = "너는 너는 다시 내게 돌아 올거야. school is a good place 呵呵大笑 呵呵大笑";

        long start = System.currentTimeMillis();

        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        TokenStream stream = analyzer.tokenStream("s", new StringReader(source));
        TokenStream tok = new StandardFilter(Version.LUCENE_36, stream);

        while (tok.incrementToken()) {
            CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
            OffsetAttribute offAttr = stream.getAttribute(OffsetAttribute.class);
            PositionIncrementAttribute posAttr = stream.getAttribute(PositionIncrementAttribute.class);
            TypeAttribute typeAttr = stream.getAttribute(TypeAttribute.class);

            System.out.println(new String(termAttr.buffer(), 0, termAttr.length()));
        }

        System.out.println((System.currentTimeMillis() - start) + "ms");
    }
项目:madsonic-server-5.0    文件:SearchService.java   
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    class SavedStreams {
        StandardTokenizer tokenStream;
        TokenStream filteredTokenStream;
    }

    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams();
        setPreviousTokenStream(streams);
        streams.tokenStream = new StandardTokenizer(LUCENE_VERSION, reader);
        streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
        streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
        streams.filteredTokenStream = new StopFilter(true, streams.filteredTokenStream, STOP_WORDS_SET);
        streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream);
    } else {
        streams.tokenStream.reset(reader);
    }
    streams.tokenStream.setMaxTokenLength(DEFAULT_MAX_TOKEN_LENGTH);

    return streams.filteredTokenStream;
}
项目:lucene-analyzer-ik    文件:IKAnalyzer.java   
@Override
    protected TokenStreamComponents createComponents(String fieldName) {

//      Tokenizer _IKTokenizer = new IKTokenizer(in , this.useSmart());
        final Tokenizer src= new IKTokenizer(new StringReader(""), this.useSmart());
        TokenStream tok = new StandardFilter(src);
        return new TokenStreamComponents(src, tok) {
            @Override
            protected void setReader(final Reader reader) {
                super.setReader(reader);
            }
        };
    }
项目:fastcatsearch3    文件:AutocompleteAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final AutocompleteTokenizer tokenizer = new AutocompleteTokenizer(reader);

    TokenFilter filter = new StandardFilter(tokenizer);

    return new TokenStreamComponents(tokenizer, filter);
}
项目:spacewalk    文件:NGramAnalyzer.java   
/**
 * @param fieldName ignored param
 * @param reader contains data to parse
 * @return TokenStream of ngrams
 */
public TokenStream tokenStream(String fieldName, Reader reader) {
    return new NGramTokenFilter(
            new LowerCaseFilter(
                new StandardFilter(
                    new StandardTokenizer(reader))), min_ngram, max_ngram);
}
项目:solarie    文件:DiarienummerAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {

  Version matchVersion = Version.LUCENE_45;

  final Tokenizer source = new WhitespaceTokenizer(matchVersion, reader);
  TokenStream result = new StandardFilter(matchVersion, source);
  result = new DiarienummerTokenFilter(result);
  return new TokenStreamComponents(source, result);

}
项目:fastcatsearch    文件:AutocompleteAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final AutocompleteTokenizer tokenizer = new AutocompleteTokenizer(reader);

    TokenFilter filter = new StandardFilter(tokenizer);

    return new TokenStreamComponents(tokenizer, filter);
}
项目:metka    文件:DefaultAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer source = new StandardTokenizer(LuceneConfig.USED_VERSION, reader);
    TokenStream result = new StandardFilter(LuceneConfig.USED_VERSION, source);
    TokenStreamComponents components = new TokenStreamComponents(source, result);
    return components;
}
项目:t4f-data    文件:SynonymAnalyzer.java   
public TokenStream tokenStream(String fieldName, Reader reader) {
  TokenStream result = new SynonymFilter(
                        new StopFilter(true,
                          new LowerCaseFilter(
                            new StandardFilter(
                              new StandardTokenizer(
                               Version.LUCENE_41, reader))),
                          StopAnalyzer.ENGLISH_STOP_WORDS_SET),
                        engine
                       );
  return result;
}
项目:elasticsearch_my    文件:StandardTokenFilterFactory.java   
@Override
public TokenStream create(TokenStream tokenStream) {
    return new StandardFilter(tokenStream);
}
项目:lams    文件:StandardFilterFactory.java   
@Override
public StandardFilter create(TokenStream input) {
  return new StandardFilter(input);
}
项目:Elasticsearch    文件:StandardTokenFilterFactory.java   
@Override
public TokenStream create(TokenStream tokenStream) {
    return new StandardFilter(tokenStream);
}
项目:LuceneDB    文件:NgramAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String paramString) {
    Tokenizer source = new NGramTokenizer(n, n);
    TokenStream result =  new StandardFilter(source);
    return new TokenStreamComponents(source, result);
}
项目:search    文件:StandardFilterFactory.java   
@Override
public StandardFilter create(TokenStream input) {
  return new StandardFilter(input);
}
项目:NYBC    文件:StandardFilterFactory.java   
@Override
public StandardFilter create(TokenStream input) {
  return new StandardFilter(luceneMatchVersion, input);
}
项目:dash-xtf    文件:StdTermFilter.java   
/** Construct the rewriter */
public StdTermFilter() {
  dribble = new DribbleStream();
  filter = new StandardFilter(new LowerCaseFilter(dribble));
}
项目:read-open-source-code    文件:StandardFilterFactory.java   
@Override
public StandardFilter create(TokenStream input) {
  return new StandardFilter(luceneMatchVersion, input);
}
项目:read-open-source-code    文件:StandardFilterFactory.java   
@Override
public StandardFilter create(TokenStream input) {
  return new StandardFilter(luceneMatchVersion, input);
}
项目:read-open-source-code    文件:StandardFilterFactory.java   
@Override
public StandardFilter create(TokenStream input) {
  return new StandardFilter(input);
}
项目:Maskana-Gestor-de-Conocimiento    文件:StandardFilterFactory.java   
@Override
public StandardFilter create(TokenStream input) {
  return new StandardFilter(luceneMatchVersion, input);
}
项目:fastcatsearch3    文件:NGramWordAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {

    final NGramWordTokenizer tokenizer = new NGramWordTokenizer(reader, 2, 3);

    TokenFilter filter = new StandardFilter(tokenizer);

    return new TokenStreamComponents(tokenizer, filter);
}
项目:search    文件:MorfologikAnalyzer.java   
/**
 * Creates a
 * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
 * which tokenizes all the text in the provided {@link Reader}.
 * 
 * @param field ignored field name
 * @param reader source of tokens
 * @return A {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
 *         built from an {@link StandardTokenizer} filtered with
 *         {@link StandardFilter} and {@link MorfologikFilter}.
 */
@Override
protected TokenStreamComponents createComponents(final String field, final Reader reader) {
  final Tokenizer src = new StandardTokenizer(getVersion(), reader);

  return new TokenStreamComponents(
      src, 
      new MorfologikFilter(new StandardFilter(getVersion(), src), dictionary, getVersion()));
}
项目:NYBC    文件:MorfologikAnalyzer.java   
/**
 * Creates a
 * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
 * which tokenizes all the text in the provided {@link Reader}.
 * 
 * @param field ignored field name
 * @param reader source of tokens
 * 
 * @return A
 *         {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
 *         built from an {@link StandardTokenizer} filtered with
 *         {@link StandardFilter} and {@link MorfologikFilter}.
 */
@Override
protected TokenStreamComponents createComponents(final String field, final Reader reader) {
  final Tokenizer src = new StandardTokenizer(this.version, reader);

  return new TokenStreamComponents(
    src,
    new MorfologikFilter(new StandardFilter(this.version, src), this.dictionary, this.version));
}
项目:read-open-source-code    文件:MorfologikAnalyzer.java   
/**
 * Creates a
 * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
 * which tokenizes all the text in the provided {@link Reader}.
 * 
 * @param field ignored field name
 * @param reader source of tokens
 * 
 * @return A
 *         {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
 *         built from an {@link StandardTokenizer} filtered with
 *         {@link StandardFilter} and {@link MorfologikFilter}.
 */
@Override
protected TokenStreamComponents createComponents(final String field, final Reader reader) {
  final Tokenizer src = new StandardTokenizer(this.version, reader);

  return new TokenStreamComponents(
      src, 
      new MorfologikFilter(new StandardFilter(this.version, src), this.version));
}
项目:read-open-source-code    文件:MorfologikAnalyzer.java   
/**
 * Creates a
 * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
 * which tokenizes all the text in the provided {@link Reader}.
 * 
 * @param field ignored field name
 * @param reader source of tokens
 * 
 * @return A
 *         {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
 *         built from an {@link StandardTokenizer} filtered with
 *         {@link StandardFilter} and {@link MorfologikFilter}.
 */
@Override
protected TokenStreamComponents createComponents(final String field, final Reader reader) {
  final Tokenizer src = new StandardTokenizer(this.version, reader);

  return new TokenStreamComponents(
      src, 
      new MorfologikFilter(new StandardFilter(this.version, src), this.version));
}
项目:fastcatsearch    文件:NGram15WordAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {

    final NGramWordTokenizer tokenizer = new NGramWordTokenizer(reader, 1, 5, true);

    TokenFilter filter = new StandardFilter(tokenizer);

    return new TokenStreamComponents(tokenizer, filter);
}
项目:fastcatsearch    文件:ByGramWordAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {

    final NGramWordTokenizer tokenizer = new NGramWordTokenizer(reader, 2, 2, true);

    TokenFilter filter = new StandardFilter(tokenizer);

    return new TokenStreamComponents(tokenizer, filter);
}
项目:fastcatsearch    文件:TriGramWordAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {

    final NGramWordTokenizer tokenizer = new NGramWordTokenizer(reader, 3, 3, true);

    TokenFilter filter = new StandardFilter(tokenizer);

    return new TokenStreamComponents(tokenizer, filter);
}
项目:fastcatsearch    文件:NGramWordAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {

    final NGramWordTokenizer tokenizer = new NGramWordTokenizer(reader, 2, 3);

    TokenFilter filter = new StandardFilter(tokenizer);

    return new TokenStreamComponents(tokenizer, filter);
}
项目:fastcatsearch    文件:FiveGramWordAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {

    final NGramWordTokenizer tokenizer = new NGramWordTokenizer(reader, 5, 5, true);

    TokenFilter filter = new StandardFilter(tokenizer);

    return new TokenStreamComponents(tokenizer, filter);
}
项目:fastcatsearch    文件:FourGramWordAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {

    final NGramWordTokenizer tokenizer = new NGramWordTokenizer(reader, 4, 4, true);

    TokenFilter filter = new StandardFilter(tokenizer);

    return new TokenStreamComponents(tokenizer, filter);
}
项目:Maskana-Gestor-de-Conocimiento    文件:MorfologikAnalyzer.java   
/**
 * Creates a
 * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
 * which tokenizes all the text in the provided {@link Reader}.
 * 
 * @param field ignored field name
 * @param reader source of tokens
 * 
 * @return A
 *         {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
 *         built from an {@link StandardTokenizer} filtered with
 *         {@link StandardFilter} and {@link MorfologikFilter}.
 */
@Override
protected TokenStreamComponents createComponents(final String field, final Reader reader) {
  final Tokenizer src = new StandardTokenizer(this.version, reader);

  return new TokenStreamComponents(
      src, 
      new MorfologikFilter(new StandardFilter(this.version, src), this.version));
}