Java 类org.apache.lucene.analysis.synonym.SynonymFilter 实例源码

项目:search    文件:TestLimitTokenPositionFilter.java   
public void testMaxPosition3WithSynomyms() throws IOException {
  for (final boolean consumeAll : new boolean[]{true, false}) {
    MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false);
    // if we are consuming all tokens, we can use the checks, otherwise we can't
    tokenizer.setEnableChecks(consumeAll);

    SynonymMap.Builder builder = new SynonymMap.Builder(true);
    builder.add(new CharsRef("one"), new CharsRef("first"), true);
    builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
    builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
    CharsRefBuilder multiWordCharsRef = new CharsRefBuilder();
    SynonymMap.Builder.join(new String[]{"and", "indubitably", "single", "only"}, multiWordCharsRef);
    builder.add(new CharsRef("one"), multiWordCharsRef.get(), true);
    SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
    builder.add(new CharsRef("two"), multiWordCharsRef.get(), true);
    SynonymMap synonymMap = builder.build();
    TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
    stream = new LimitTokenPositionFilter(stream, 3, consumeAll);

    // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
    assertTokenStreamContents(stream,
        new String[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger"},
        new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0});
  }
}
项目:lucenelab    文件:SynonymFilterExample.java   
@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Tokenizer tok = new WhitespaceTokenizer();
    tok.setReader(new StringReader("dark sea green sea green"));

    final SynonymMap.Builder builder = new SynonymMap.Builder(true);
    addSynonym("dark sea green", "color", builder);
    addSynonym("green", "color", builder);
    addSynonym("dark sea", "color", builder);
    addSynonym("sea green", "color", builder);
    final SynonymMap synMap = builder.build();
    final TokenStream ts = new SynonymFilter(tok, synMap, true);

    final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    final PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
    final PositionLengthAttribute posLengthAtt = ts.addAttribute(PositionLengthAttribute.class);

    ts.reset();
    int pos = -1;
    while (ts.incrementToken()) {
        pos += posIncrAtt.getPositionIncrement();
        System.out.println("term=" + termAtt + ", pos=" + pos + ", posLen=" + posLengthAtt.getPositionLength());
    }
    ts.end();
    ts.close();
}
项目:search    文件:TestRemoveDuplicatesTokenFilter.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  final int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random().nextBoolean();

    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        TokenStream stream = new SynonymFilter(tokenizer, map, ignoreCase);
        return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
      }
    };

    checkRandomData(random(), analyzer, 200);
  }
}
项目:NYBC    文件:TestRemoveDuplicatesTokenFilter.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  final int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random().nextBoolean();

    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        TokenStream stream = new SynonymFilter(tokenizer, map, ignoreCase);
        return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
      }
    };

    checkRandomData(random(), analyzer, 200);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestRemoveDuplicatesTokenFilter.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  final int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random().nextBoolean();

    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        TokenStream stream = new SynonymFilter(tokenizer, map, ignoreCase);
        return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
      }
    };

    checkRandomData(random(), analyzer, 200);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestLimitTokenPositionFilter.java   
public void testMaxPosition3WithSynomyms() throws IOException {
  MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false);
  tokenizer.setEnableChecks(false); // LimitTokenPositionFilter doesn't consume the entire stream that it wraps

  SynonymMap.Builder builder = new SynonymMap.Builder(true);
  builder.add(new CharsRef("one"), new CharsRef("first"), true);
  builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
  builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
  CharsRef multiWordCharsRef = new CharsRef();
  SynonymMap.Builder.join(new String[] { "and", "indubitably", "single", "only" }, multiWordCharsRef);
  builder.add(new CharsRef("one"), multiWordCharsRef, true);
  SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
  builder.add(new CharsRef("two"), multiWordCharsRef, true);
  SynonymMap synonymMap = builder.build();
  TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
  stream = new LimitTokenPositionFilter(stream, 3); // consumeAllTokens defaults to false

  // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
  assertTokenStreamContents(stream, 
      new String[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" },
      new int[]    {     1,       0,       0,         0,    0,     1,              0,        0,       1,       0,         0 });

}
项目:querqy    文件:AnalyzingQuerqyParserTest.java   
@Before
public void createAnalyzers() throws Exception {
   queryAnalyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
         // White space tokenizer, to lower case tokenizer.
         return new TokenStreamComponents(new MockTokenizer());
      }
   };

   SynonymMap.Builder builder = new SynonymMap.Builder(true);
   builder.add(new CharsRef("test"), new CharsRef("synonym1"), false);
   builder.add(new CharsRef("test"), new CharsRef("synonym2"), false);
   final SynonymMap synonyms = builder.build();

   synonymAnalyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
         // White space tokenizer, to lower case tokenizer.
         MockTokenizer tokenizer = new MockTokenizer();
         // Filter for adding synonyms
         TokenStream result = new SynonymFilter(tokenizer, synonyms, true);
         // Filter all non-synonyms, because the synonym filter outputs the
         // original token too.
         result = new TypeTokenFilter(result, Collections.singleton(SynonymFilter.TYPE_SYNONYM),
               true);
         return new TokenStreamComponents(tokenizer, result);
      }
   };
}
项目:NYBC    文件:TestSynonymFilterFactory.java   
/** test that we can parse and use the solr syn file */
public void testSynonyms() throws Exception {
  SynonymFilterFactory factory = new SynonymFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("synonyms", "synonyms.txt");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(new ClasspathResourceLoader(getClass()));
  TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
  assertTrue(ts instanceof SynonymFilter);
  assertTokenStreamContents(ts, 
      new String[] { "GB", "gib", "gigabyte", "gigabytes" },
      new int[] { 1, 0, 0, 0 });
}
项目:elasticsearch_my    文件:SynonymTokenFilterFactory.java   
@Override
public TokenStream create(TokenStream tokenStream) {
    // fst is null means no synonyms
    return synonymMap.fst == null ? tokenStream : new SynonymFilter(tokenStream, synonymMap, ignoreCase);
}
项目:lams    文件:FSTSynonymFilterFactory.java   
@Override
public TokenStream create(TokenStream input) {
  // if the fst is null, it means there's actually no synonyms... just return the original stream
  // as there is nothing to do here.
  return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
}
项目:Elasticsearch    文件:SynonymTokenFilterFactory.java   
@Override
public TokenStream create(TokenStream tokenStream) {
    // fst is null means no synonyms
    return synonymMap.fst == null ? tokenStream : new SynonymFilter(tokenStream, synonymMap, ignoreCase);
}
项目:search    文件:FSTSynonymFilterFactory.java   
@Override
public TokenStream create(TokenStream input) {
  // if the fst is null, it means there's actually no synonyms... just return the original stream
  // as there is nothing to do here.
  return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
}
项目:search    文件:FSTSynonymFilterFactory.java   
@Override
public TokenStream create(TokenStream input) {
  // if the fst is null, it means there's actually no synonyms... just return the original stream
  // as there is nothing to do here.
  return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
}
项目:elasticsearch-synonym-remote    文件:FileRemoteSynonymTokenFilterFactory.java   
@Override
public TokenStream create(TokenStream tokenStream) {
    return synonymMap.fst == null ? tokenStream : new SynonymFilter(tokenStream, synonymMap, true);
}
项目:NYBC    文件:FSTSynonymFilterFactory.java   
@Override
public TokenStream create(TokenStream input) {
  // if the fst is null, it means there's actually no synonyms... just return the original stream
  // as there is nothing to do here.
  return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
}
项目:read-open-source-code    文件:FSTSynonymFilterFactory.java   
@Override
public TokenStream create(TokenStream input) {
  // if the fst is null, it means there's actually no synonyms... just return the original stream
  // as there is nothing to do here.
  return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
}
项目:read-open-source-code    文件:FSTSynonymFilterFactory.java   
@Override
public TokenStream create(TokenStream input) {
  // if the fst is null, it means there's actually no synonyms... just return the original stream
  // as there is nothing to do here.
  return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
}
项目:read-open-source-code    文件:FSTSynonymFilterFactory.java   
@Override
public TokenStream create(TokenStream input) {
  // if the fst is null, it means there's actually no synonyms... just return the original stream
  // as there is nothing to do here.
  return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
}
项目:read-open-source-code    文件:FSTSynonymFilterFactory.java   
@Override
public TokenStream create(TokenStream input) {
  // if the fst is null, it means there's actually no synonyms... just return the original stream
  // as there is nothing to do here.
  return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
}
项目:Maskana-Gestor-de-Conocimiento    文件:FSTSynonymFilterFactory.java   
@Override
public TokenStream create(TokenStream input) {
  // if the fst is null, it means there's actually no synonyms... just return the original stream
  // as there is nothing to do here.
  return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
}