Java 类org.apache.lucene.analysis.ngram.NGramTokenFilter 实例源码

项目:Elasticsearch    文件:EdgeNGramTokenFilterFactory.java   
EdgeNGramTokenFilterFactory(Index index, Settings indexSettings, String name, Settings settings) {
    super(index, indexSettings, name, settings);
    this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
    this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
    this.side = parseSide(settings.get("side", "front"));
    this.esVersion = org.elasticsearch.Version.indexCreated(indexSettings);
}
项目:Elasticsearch    文件:NGramTokenFilterFactory.java   
@SuppressWarnings("deprecation")
@Override
public TokenStream create(TokenStream tokenStream) {
    final Version version = this.version == Version.LUCENE_4_3 ? Version.LUCENE_4_4 : this.version; // we supported it since 4.3
    if (version.onOrAfter(Version.LUCENE_4_3)) {
        return new NGramTokenFilter(tokenStream, minGram, maxGram);
    } else {
        return new Lucene43NGramTokenFilter(tokenStream, minGram, maxGram);
    }
}
项目:NYBC    文件:NGramFilterFactory.java   
/** Initialize the n-gram min and max sizes and the side from which one should start tokenizing. */
@Override
public void init(Map<String, String> args) {
  super.init(args);
  String maxArg = args.get("maxGramSize");
  maxGramSize = (maxArg != null ? Integer.parseInt(maxArg)
      : NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);

  String minArg = args.get("minGramSize");
  minGramSize = (minArg != null ? Integer.parseInt(minArg)
      : NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
}
项目:spacewalk    文件:NGramAnalyzer.java   
/**
 * @param fieldName ignored param
 * @param reader contains data to parse
 * @return TokenStream of ngrams
 */
public TokenStream tokenStream(String fieldName, Reader reader) {
    return new NGramTokenFilter(
            new LowerCaseFilter(
                new StandardFilter(
                    new StandardTokenizer(reader))), min_ngram, max_ngram);
}
项目:meresco-lucene    文件:NGramAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName) {
    Tokenizer source = new StandardTokenizer();
    TokenStream src = new LowerCaseFilter(source);
    src = new AddWordBoundaryFilter(src);
    NGramTokenFilter filter = new NGramTokenFilter(src, this.minShingleSize, this.maxShingleSize);
    return new TokenStreamComponents(source, filter);
}
项目:opensearchserver    文件:NGramFilter.java   
@Override
protected void initProperties() throws SearchLibException {
    super.initProperties();
    addProperty(ClassPropertyEnum.MIN_GRAM,
            Integer.toString(NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE),
            null, 10, 1);
    addProperty(ClassPropertyEnum.MAX_GRAM,
            Integer.toString(NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE),
            null, 10, 1);
}
项目:anycook-api    文件:NGramAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName) {
    Tokenizer tokenizer = new LowerCaseTokenizer();
    TokenStream filter = new NGramTokenFilter(tokenizer, 1, 5);

    return new TokenStreamComponents(tokenizer, filter);
}
项目:elasticsearch_my    文件:EdgeNGramTokenFilterFactory.java   
public EdgeNGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    super(indexSettings, name, settings);
    this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
    this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
    this.side = parseSide(settings.get("side", "front"));
}
项目:elasticsearch_my    文件:NGramTokenFilterFactory.java   
public NGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    super(indexSettings, name, settings);
    this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
    this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
}
项目:elasticsearch_my    文件:NGramTokenFilterFactory.java   
@Override
public TokenStream create(TokenStream tokenStream) {
    return new NGramTokenFilter(tokenStream, minGram, maxGram);
}
项目:Elasticsearch    文件:NGramTokenFilterFactory.java   
@Inject
public NGramTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
    this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
}
项目:fornamnsnamnklassificerare    文件:Classifier.java   
private String[] tokenize(String input) throws Exception {

    input = input.toLowerCase();

    NGramTokenFilter filter = new NGramTokenFilter(
        new KeywordTokenizer(new StringReader("^" + input + "$")),
        1, 4);

    CharTermAttribute charTermAttrib = filter.getAttribute(CharTermAttribute.class);


    filter.reset();

    Set<String> tokens = new LinkedHashSet<String>();

    while (filter.incrementToken()) {
      tokens.add(charTermAttrib.toString());
    }


    filter.close();

    return tokens.toArray(new String[tokens.size()]);

  }
项目:NYBC    文件:NGramFilterFactory.java   
@Override
public NGramTokenFilter create(TokenStream input) {
  return new NGramTokenFilter(input, minGramSize, maxGramSize);
}
项目:opensearchserver    文件:NGramFilter.java   
@Override
public TokenStream create(TokenStream input) {
    return new NGramTokenFilter(input, min, max);
}
项目:t4f-data    文件:NGramTest.java   
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
    return new NGramTokenFilter(new KeywordTokenizer(reader), 2, 4);
}