Java 类org.apache.lucene.analysis.pattern.PatternTokenizer 实例源码

项目:elasticsearch-analysis-url    文件:URLTokenizer.java   
private List<Token> getQueryTokens(String url, String partStringRaw, String partString) throws IOException {
    int start = getStartIndex(url, partStringRaw);
    if (!tokenizeQuery) {
        int end = getEndIndex(start, partStringRaw);
        return Collections.singletonList(new Token(partString, URLPart.QUERY, start, end));
    }
    return tokenize(URLPart.QUERY, addReader(new PatternTokenizer(QUERY_SEPARATOR, -1), new StringReader(partString)), start);
}
项目:NYBC    文件:PatternTokenizerFactory.java   
/**
 * Split the input using configured pattern
 */
@Override
public Tokenizer create(final Reader in) {
  try {
    return new PatternTokenizer(in, pattern, group);
  } catch( IOException ex ) {
    throw new RuntimeException("IOException thrown creating PatternTokenizer instance", ex);
  }
}
项目:information-retrieval-adventure    文件:CommaTokenizer.java   
public static void main(String[] args) throws IOException {

    Directory dir = new RAMDirectory();
    Analyzer analyzer =
        new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new PatternTokenizer(Pattern.compile("\\,"), -1);
            return new TokenStreamComponents(source);
          }
        };
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, iwc);

    Document doc = new Document();
    doc.add(new TextField("text", "Age 6, Age 7, Age 8", Field.Store.YES));
    writer.addDocument(doc);
    writer.close();

    IndexReader reader = DirectoryReader.open(dir);
    final Fields fields = MultiFields.getFields(reader);
    final Iterator<String> iterator = fields.iterator();

    while (iterator.hasNext()) {
      final String field = iterator.next();
      final Terms terms = MultiFields.getTerms(reader, field);
      final TermsEnum it = terms.iterator();
      BytesRef term = it.next();
      while (term != null) {
        System.out.println(term.utf8ToString());
        term = it.next();
      }
    }
  }
项目:elasticsearch_my    文件:PatternTokenizerFactory.java   
@Override
public Tokenizer create() {
    return new PatternTokenizer(pattern, group);
}
项目:Elasticsearch    文件:PatternTokenizerFactory.java   
@Override
public Tokenizer create() {
    return new PatternTokenizer(pattern, group);
}