Java 类org.apache.lucene.analysis.synonym.SynonymGraphFilter 实例源码

项目:information-retrieval-adventure    文件:EntradaSalidaTest.java   
@Override
protected Analyzer.TokenStreamComponents createComponents(String s) {

    Tokenizer tokenizer = new StandardTokenizer();
    TokenStream tokenStream = new SynonymGraphFilter(tokenizer, synonymMap, true);
    TokenStream tokenStream1 = new FlattenGraphFilter(tokenStream);
    return new Analyzer.TokenStreamComponents(tokenizer, tokenStream1);
}
项目:elasticsearch_my    文件:SynonymGraphTokenFilterFactory.java   
@Override
public TokenStream create(TokenStream tokenStream) {
    // fst is null means no synonyms
    return synonymMap.fst == null ? tokenStream : new SynonymGraphFilter(tokenStream, synonymMap, ignoreCase);
}
项目:information-retrieval-adventure    文件:SynonymGraphFilterTest.java   
private static TokenStream getTokenStream(String input) throws IOException {
  Tokenizer inputStream = new WhitespaceTokenizer();
  inputStream.setReader(new StringReader(input));

  return new SynonymGraphFilter(inputStream, builder.build(), true);
}
项目:information-retrieval-adventure    文件:EntradaSalida.java   
/**
 * Now the graph is more interesting! For each token (arc), the PositionIncrementAttribute tells
 * us how many positions (nodes) ahead this arc starts from, while the new (as of 3.6.0)
 * PositionLengthAttribute tells us how many positions (nodes) ahead the arc arrives to.
 */
private static String getGraph(String input) throws IOException {
  final Tokenizer inputStream = new WhitespaceTokenizer();
  inputStream.setReader(new StringReader(input));
  //        final TokenStream inputStream = new LowerCaseFilter(in);

  TokenStream tokenStream = new SynonymGraphFilter(inputStream, builder.build(), false);
  PositionIncrementAttribute posIncAtt =
      tokenStream.addAttribute(PositionIncrementAttribute.class);
  PositionLengthAttribute posLenAtt = tokenStream.addAttribute(PositionLengthAttribute.class);
  CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
  tokenStream.reset();
  int srcNode = -1;
  int destNode;

  StringBuilder b = new StringBuilder();
  b.append("digraph Automaton {\n");
  b.append("  initial [shape=plaintext,label=\"\"]\n");
  b.append("  initial -> 0\n");

  while (tokenStream.incrementToken()) {
    int posInc = posIncAtt.getPositionIncrement();
    if (posInc != 0) {
      srcNode += posInc;
      b.append("  ");
      b.append(srcNode);
      b.append(" [shape=circle,label=\"" + srcNode + "\"]\n");
    }
    destNode = srcNode + posLenAtt.getPositionLength();
    b.append("  ");
    b.append(srcNode);
    b.append(" -> ");
    b.append(destNode);
    b.append(" [label=\"");
    b.append(termAtt);
    b.append("\"");
    b.append("]\n");
  }
  tokenStream.end();
  tokenStream.close();

  b.append('}');
  return b.toString();
}