@Override protected Analyzer.TokenStreamComponents createComponents(String s) { Tokenizer tokenizer = new StandardTokenizer(); TokenStream tokenStream = new SynonymGraphFilter(tokenizer, synonymMap, true); TokenStream tokenStream1 = new FlattenGraphFilter(tokenStream); return new Analyzer.TokenStreamComponents(tokenizer, tokenStream1); }
@Override public TokenStream create(TokenStream tokenStream) { // fst is null means no synonyms return synonymMap.fst == null ? tokenStream : new SynonymGraphFilter(tokenStream, synonymMap, ignoreCase); }
private static TokenStream getTokenStream(String input) throws IOException { Tokenizer inputStream = new WhitespaceTokenizer(); inputStream.setReader(new StringReader(input)); return new SynonymGraphFilter(inputStream, builder.build(), true); }
/** * Now the graph is more interesting! For each token (arc), the PositionIncrementAttribute tells * us how many positions (nodes) ahead this arc starts from, while the new (as of 3.6.0) * PositionLengthAttribute tells us how many positions (nodes) ahead the arc arrives to. */ private static String getGraph(String input) throws IOException { final Tokenizer inputStream = new WhitespaceTokenizer(); inputStream.setReader(new StringReader(input)); // final TokenStream inputStream = new LowerCaseFilter(in); TokenStream tokenStream = new SynonymGraphFilter(inputStream, builder.build(), false); PositionIncrementAttribute posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); PositionLengthAttribute posLenAtt = tokenStream.addAttribute(PositionLengthAttribute.class); CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); int srcNode = -1; int destNode; StringBuilder b = new StringBuilder(); b.append("digraph Automaton {\n"); b.append(" initial [shape=plaintext,label=\"\"]\n"); b.append(" initial -> 0\n"); while (tokenStream.incrementToken()) { int posInc = posIncAtt.getPositionIncrement(); if (posInc != 0) { srcNode += posInc; b.append(" "); b.append(srcNode); b.append(" [shape=circle,label=\"" + srcNode + "\"]\n"); } destNode = srcNode + posLenAtt.getPositionLength(); b.append(" "); b.append(srcNode); b.append(" -> "); b.append(destNode); b.append(" [label=\""); b.append(termAtt); b.append("\""); b.append("]\n"); } tokenStream.end(); tokenStream.close(); b.append('}'); return b.toString(); }