private List<Token> getQueryTokens(String url, String partStringRaw, String partString) throws IOException { int start = getStartIndex(url, partStringRaw); if (!tokenizeQuery) { int end = getEndIndex(start, partStringRaw); return Collections.singletonList(new Token(partString, URLPart.QUERY, start, end)); } return tokenize(URLPart.QUERY, addReader(new PatternTokenizer(QUERY_SEPARATOR, -1), new StringReader(partString)), start); }
/** * Split the input using configured pattern */ @Override public Tokenizer create(final Reader in) { try { return new PatternTokenizer(in, pattern, group); } catch( IOException ex ) { throw new RuntimeException("IOException thrown creating PatternTokenizer instance", ex); } }
public static void main(String[] args) throws IOException { Directory dir = new RAMDirectory(); Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new PatternTokenizer(Pattern.compile("\\,"), -1); return new TokenStreamComponents(source); } }; IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("text", "Age 6, Age 7, Age 8", Field.Store.YES)); writer.addDocument(doc); writer.close(); IndexReader reader = DirectoryReader.open(dir); final Fields fields = MultiFields.getFields(reader); final Iterator<String> iterator = fields.iterator(); while (iterator.hasNext()) { final String field = iterator.next(); final Terms terms = MultiFields.getTerms(reader, field); final TermsEnum it = terms.iterator(); BytesRef term = it.next(); while (term != null) { System.out.println(term.utf8ToString()); term = it.next(); } } }
@Override public Tokenizer create() { return new PatternTokenizer(pattern, group); }