Java 类org.apache.lucene.analysis.synonym.SynonymMap 实例源码

项目:search    文件:TestLimitTokenPositionFilter.java   
public void testMaxPosition3WithSynomyms() throws IOException {
  for (final boolean consumeAll : new boolean[]{true, false}) {
    MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false);
    // if we are consuming all tokens, we can use the checks, otherwise we can't
    tokenizer.setEnableChecks(consumeAll);

    SynonymMap.Builder builder = new SynonymMap.Builder(true);
    builder.add(new CharsRef("one"), new CharsRef("first"), true);
    builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
    builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
    CharsRefBuilder multiWordCharsRef = new CharsRefBuilder();
    SynonymMap.Builder.join(new String[]{"and", "indubitably", "single", "only"}, multiWordCharsRef);
    builder.add(new CharsRef("one"), multiWordCharsRef.get(), true);
    SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
    builder.add(new CharsRef("two"), multiWordCharsRef.get(), true);
    SynonymMap synonymMap = builder.build();
    TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
    stream = new LimitTokenPositionFilter(stream, 3, consumeAll);

    // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
    assertTokenStreamContents(stream,
        new String[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger"},
        new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0});
  }
}
项目:lucenelab    文件:SynonymFilterExample.java   
@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Tokenizer tok = new WhitespaceTokenizer();
    tok.setReader(new StringReader("dark sea green sea green"));

    final SynonymMap.Builder builder = new SynonymMap.Builder(true);
    addSynonym("dark sea green", "color", builder);
    addSynonym("green", "color", builder);
    addSynonym("dark sea", "color", builder);
    addSynonym("sea green", "color", builder);
    final SynonymMap synMap = builder.build();
    final TokenStream ts = new SynonymFilter(tok, synMap, true);

    final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    final PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
    final PositionLengthAttribute posLengthAtt = ts.addAttribute(PositionLengthAttribute.class);

    ts.reset();
    int pos = -1;
    while (ts.incrementToken()) {
        pos += posIncrAtt.getPositionIncrement();
        System.out.println("term=" + termAtt + ", pos=" + pos + ", posLen=" + posLengthAtt.getPositionLength());
    }
    ts.end();
    ts.close();
}
项目:elasticsearch-analysis-dynamic-synonym    文件:DynamicSynonymFilter.java   
/**
 * 增加update逻辑,此方法中所有赋值的属性皆为final改造,注意只能在此方法中使用,否则可能导致bug
 *
 * @param synonymMap
 */
@Override
public void update(SynonymMap synonymMap) {
    this.synonyms = synonymMap;
    this.fst = synonyms.fst;
    if(this.fst == null) {
        throw new IllegalArgumentException("fst must be non-null");
    } else {
        this.fstReader = this.fst.getBytesReader();
        this.rollBufferSize = 1 + synonyms.maxHorizontalContext;
        this.futureInputs = new DynamicSynonymFilter.PendingInput[this.rollBufferSize];
        this.futureOutputs = new DynamicSynonymFilter.PendingOutputs[this.rollBufferSize];

        for(int pos = 0; pos < this.rollBufferSize; ++pos) {
            this.futureInputs[pos] = new DynamicSynonymFilter.PendingInput();
            this.futureOutputs[pos] = new DynamicSynonymFilter.PendingOutputs();
        }

        this.scratchArc = new FST.Arc();
    }
}
项目:elasticsearch-analysis-dynamic-synonym    文件:DynamicSynonymTokenFilterFactory.java   
@Override
public void run() {
    try {
        if (synonymFile.isNeedReloadSynonymMap()) {
            SynonymMap newSynonymMap = synonymFile.reloadSynonymMap();
            if (newSynonymMap == null || newSynonymMap.fst == null) {
                logger.error("Monitor thread reload remote synonym non-null! indexName:{} path:{}",
                        indexName, synonymFile.getLocation());
                return;
            }
            synonymMap = newSynonymMap;
            Iterator<SynonymDynamicSupport> filters = dynamicSynonymFilters.get(indexName).iterator();
            while (filters.hasNext()) {
                filters.next().update(synonymMap);
                logger.info("success reload synonym success! indexName:{} path:{}", indexName, synonymFile.getLocation());
            }
        }
    } catch (Exception e) {
        logger.error("Monitor thread reload remote synonym error! indexName:{} path:{}",
                indexName, synonymFile.getLocation());
    }
}
项目:search    文件:TestRemoveDuplicatesTokenFilter.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  final int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random().nextBoolean();

    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        TokenStream stream = new SynonymFilter(tokenizer, map, ignoreCase);
        return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
      }
    };

    checkRandomData(random(), analyzer, 200);
  }
}
项目:elasticsearch-analysis-synonym    文件:NGramSynonymTokenizer.java   
@Override
public void reset() throws IOException {
    super.reset();
    block.setLength(0);
    prevToken = null;
    readBufferIndex = BUFFER_SIZE;
    readBufferLen = 0;
    ch = 0;
    blkStart = 0;
    nextBlkStart = 0;
    if (synonymLoader != null && synonymLoader.isUpdate(lastModified)) {
        lastModified = synonymLoader.getLastModified();
        final SynonymMap map = synonymLoader.getSynonymMap();
        if (map != null) {
            synonymMap = map;
            fst = synonymMap.fst;
            if (fst == null) {
                throw new IllegalArgumentException("fst must be non-null");
            }
            fstReader = fst.getBytesReader();
            scratchArc = new FST.Arc<>();
            clearAttributes();
        }
    }
}
项目:elasticsearch-analysis-synonym    文件:NGramSynonymTokenizerTest.java   
protected TokenStreamComponents createComponents(String fieldName) {
        final Tokenizer source = new NGramSynonymTokenizer(n,
                delimiters, expand, true, new SynonymLoader(null, null,
                        expand, null) {
                    @Override
                    public SynonymMap getSynonymMap() {
                        return synonyms;
                    }

                    @Override
                    protected void createSynonymMap(boolean reload) {
                        // nothing
                    }
                });
  return new TokenStreamComponents(source);
}
项目:NYBC    文件:FSTSynonymFilterFactory.java   
/**
 * Load synonyms from the solr format, "format=solr".
 */
private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  final boolean expand = getBoolean("expand", true);
  String synonyms = args.get("synonyms");
  if (synonyms == null)
    throw new IllegalArgumentException("Missing required argument 'synonyms'.");

  CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
    .onMalformedInput(CodingErrorAction.REPORT)
    .onUnmappableCharacter(CodingErrorAction.REPORT);

  SolrSynonymParser parser = new SolrSynonymParser(dedup, expand, analyzer);
  File synonymFile = new File(synonyms);
  if (synonymFile.exists()) {
    decoder.reset();
    parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
  } else {
    List<String> files = splitFileNames(synonyms);
    for (String file : files) {
      decoder.reset();
      parser.add(new InputStreamReader(loader.openResource(file), decoder));
    }
  }
  return parser.build();
}
项目:NYBC    文件:FSTSynonymFilterFactory.java   
/**
 * Load synonyms from the wordnet format, "format=wordnet".
 */
private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  final boolean expand = getBoolean("expand", true);
  String synonyms = args.get("synonyms");
  if (synonyms == null)
    throw new IllegalArgumentException("Missing required argument 'synonyms'.");

  CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
    .onMalformedInput(CodingErrorAction.REPORT)
    .onUnmappableCharacter(CodingErrorAction.REPORT);

  WordnetSynonymParser parser = new WordnetSynonymParser(dedup, expand, analyzer);
  File synonymFile = new File(synonyms);
  if (synonymFile.exists()) {
    decoder.reset();
    parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
  } else {
    List<String> files = splitFileNames(synonyms);
    for (String file : files) {
      decoder.reset();
      parser.add(new InputStreamReader(loader.openResource(file), decoder));
    }
  }
  return parser.build();
}
项目:NYBC    文件:TestRemoveDuplicatesTokenFilter.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  final int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random().nextBoolean();

    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        TokenStream stream = new SynonymFilter(tokenizer, map, ignoreCase);
        return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
      }
    };

    checkRandomData(random(), analyzer, 200);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestRemoveDuplicatesTokenFilter.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  final int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random().nextBoolean();

    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        TokenStream stream = new SynonymFilter(tokenizer, map, ignoreCase);
        return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
      }
    };

    checkRandomData(random(), analyzer, 200);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestLimitTokenPositionFilter.java   
public void testMaxPosition3WithSynomyms() throws IOException {
  MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false);
  tokenizer.setEnableChecks(false); // LimitTokenPositionFilter doesn't consume the entire stream that it wraps

  SynonymMap.Builder builder = new SynonymMap.Builder(true);
  builder.add(new CharsRef("one"), new CharsRef("first"), true);
  builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
  builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
  CharsRef multiWordCharsRef = new CharsRef();
  SynonymMap.Builder.join(new String[] { "and", "indubitably", "single", "only" }, multiWordCharsRef);
  builder.add(new CharsRef("one"), multiWordCharsRef, true);
  SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
  builder.add(new CharsRef("two"), multiWordCharsRef, true);
  SynonymMap synonymMap = builder.build();
  TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
  stream = new LimitTokenPositionFilter(stream, 3); // consumeAllTokens defaults to false

  // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
  assertTokenStreamContents(stream, 
      new String[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" },
      new int[]    {     1,       0,       0,         0,    0,     1,              0,        0,       1,       0,         0 });

}
项目:lams    文件:FSTSynonymFilterFactory.java   
/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
private SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  File synonymFile = new File(synonyms);
  if (synonymFile.exists()) {
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder));
  } else {
    List<String> files = splitFileNames(synonyms);
    for (String file : files) {
      decoder.reset();
      parser.parse(new InputStreamReader(loader.openResource(file), decoder));
    }
  }
  return parser.build();
}
项目:elasticsearch-analysis-dynamic-synonym    文件:DynamicSynonymGraphFilter.java   
/**
 * 增加update逻辑,此方法中所有赋值的属性皆为final改造,注意只能在此方法中使用,否则可能导致bug
 *
 * @param synonymMap
 */
@Override
public void update(SynonymMap synonymMap) {
    this.synonyms = synonymMap;
    this.fst = synonyms.fst;
    if(this.fst == null) {
        throw new IllegalArgumentException("fst must be non-null");
    } else {
        this.fstReader = this.fst.getBytesReader();
        this.scratchArc = new FST.Arc();
        //this.ignoreCase = ignoreCase;
    }
}
项目:query-autofiltering-component    文件:QueryAutoFilteringComponent.java   
private void addTerms( NamedList<NamedList<Number>> terms, SynonymMap.Builder fieldBuilder, SynonymMap.Builder termBuilder, ArrayList<String> searchFields ) throws IOException {
  TermsResponse termsResponse = new TermsResponse( terms );
  for (String fieldName : searchFields ) {
    CharsRef fieldChars = new CharsRef( fieldName );
    List<TermsResponse.Term> termList = termsResponse.getTerms( fieldName );
    if (termList != null) {
      for (TermsResponse.Term tc : termList) {
        String term = tc.getTerm();
        Log.debug( "Add distributed term: " + fieldName + " = " + term );
        addTerm( fieldChars, term, fieldBuilder, termBuilder );
      }
    }
  }
}
项目:query-autofiltering-component    文件:QueryAutoFilteringComponent.java   
private void buildFieldMap( ResponseBuilder rb ) throws IOException {
  Log.debug( "buildFieldMap" );
  SolrIndexSearcher searcher = rb.req.getSearcher();
  // build a synonym map from the SortedDocValues -
  // for each field value: lower case, stemmed, lookup synonyms from synonyms.txt - map to fieldValue
  SynonymMap.Builder fieldBuilder = new SynonymMap.Builder( true );
  SynonymMap.Builder termBuilder = new SynonymMap.Builder( true );

  ArrayList<String> searchFields = getStringFields( searcher );

  for (String searchField : searchFields ) {
    Log.debug( "adding searchField " + searchField );
    CharsRef fieldChars = new CharsRef( searchField );
    SortedSetDocValues sdv = FieldCache.DEFAULT.getDocTermOrds( searcher.getAtomicReader( ), searchField );
    if (sdv == null) continue;
    Log.debug( "got SortedSetDocValues for " + searchField );
    TermsEnum te = sdv.termsEnum();
    while (te.next() != null) {
      BytesRef term = te.term();
      String fieldValue = term.utf8ToString( );
      addTerm ( fieldChars, fieldValue, fieldBuilder, termBuilder );
    }
  }

  addDistributedTerms( rb, fieldBuilder, termBuilder, searchFields );

  fieldMap = fieldBuilder.build( );
  termMap = termBuilder.build( );
}
项目:query-autofiltering-component    文件:QueryAutoFilteringComponent.java   
private void addTerms( NamedList<NamedList<Number>> terms, SynonymMap.Builder fieldBuilder, SynonymMap.Builder termBuilder, ArrayList<String> searchFields ) throws IOException {
  TermsResponse termsResponse = new TermsResponse( terms );
  for (String fieldName : searchFields ) {
    CharsRef fieldChars = new CharsRef( fieldName );
    List<TermsResponse.Term> termList = termsResponse.getTerms( fieldName );
    if (termList != null) {
      for (TermsResponse.Term tc : termList) {
        String term = tc.getTerm();
        Log.debug( "Add distributed term: " + fieldName + " = " + term );
        addTerm( fieldChars, term, fieldBuilder, termBuilder );
      }
    }
  }
}
项目:search    文件:FSTSynonymFilterFactory.java   
/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
private SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  File synonymFile = new File(synonyms);
  if (synonymFile.exists()) {
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder));
  } else {
    List<String> files = splitFileNames(synonyms);
    for (String file : files) {
      decoder.reset();
      parser.parse(new InputStreamReader(loader.openResource(file), decoder));
    }
  }
  return parser.build();
}
项目:search    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
  final int numEntries = atLeast(10);
  for (int j = 0; j < numEntries; j++) {
    addSyn(b, randomNonEmptyString(random), randomNonEmptyString(random), random.nextBoolean());
  }
  try {
    return b.build();
  } catch (Exception ex) {
    Rethrow.rethrow(ex);
    return null; // unreachable code
  }
}
项目:search    文件:FSTSynonymFilterFactory.java   
/**
 * Load synonyms with the given SynonymMap.Parser class.
 */
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  File synonymFile = new File(synonyms);
  if (synonymFile.exists()) {
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder));
  } else {
    List<String> files = splitFileNames(synonyms);
    for (String file : files) {
      decoder.reset();
      parser.parse(new InputStreamReader(loader.openResource(file), decoder));
    }
  }
  return parser.build();
}
项目:search    文件:ManagedSynonymFilterFactory.java   
/**
 * Called once, during core initialization, to initialize any analysis components
 * that depend on the data managed by this resource. It is important that the
 * analysis component is only initialized once during core initialization so that
 * text analysis is consistent, especially in a distributed environment, as we
 * don't want one server applying a different set of stop words than other servers.
 */
@SuppressWarnings("unchecked")
@Override
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res) 
    throws SolrException
{    
  NamedList<Object> args = (NamedList<Object>)initArgs;    
  args.add("synonyms", getResourceId());
  args.add("expand", "false");
  args.add("format", "solr");

  Map<String,String> filtArgs = new HashMap<>();
  for (Map.Entry<String,?> entry : args) {
    filtArgs.put(entry.getKey(), entry.getValue().toString());
  }
  // create the actual filter factory that pulls the synonym mappings
  // from synonymMappings using a custom parser implementation
  delegate = new FSTSynonymFilterFactory(filtArgs) {
    @Override
    protected SynonymMap loadSynonyms
        (ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer)
        throws IOException, ParseException {

      ManagedSynonymParser parser =
          new ManagedSynonymParser((SynonymManager)res, dedup, analyzer);
      // null is safe here because there's no actual parsing done against a input Reader
      parser.parse(null);
      return parser.build(); 
    }
  };
  try {
    delegate.inform(res.getResourceLoader());
  } catch (IOException e) {
    throw new SolrException(ErrorCode.SERVER_ERROR, e);
  }    
}
项目:elasticsearch-analysis-synonym    文件:SynonymFilter.java   
@Override
public void reset() throws IOException {
  super.reset();
  captureCount = 0;
  finished = false;
  inputSkipCount = 0;
  nextRead = nextWrite = 0;

  // In normal usage these resets would not be needed,
  // since they reset-as-they-are-consumed, but the app
  // may not consume all input tokens (or we might hit an
  // exception), in which case we have leftover state
  // here:
  for (final PendingInput input : futureInputs) {
    input.reset();
  }
  for (final PendingOutputs output : futureOutputs) {
    output.reset();
  }

  if (synonymLoader != null && synonymLoader.isUpdate(lastModified)) {
      lastModified = synonymLoader.getLastModified();
      final SynonymMap map = synonymLoader.getSynonymMap();
      if (map != null) {
          synonyms = map;
          fst = synonyms.fst;
          if (fst == null) {
              throw new IllegalArgumentException("fst must be non-null");
          }
          fstReader = fst.getBytesReader();
          scratchArc = new FST.Arc<>();
          clearAttributes();
      }
  }
}
项目:querqy    文件:AnalyzingQuerqyParserTest.java   
@Before
public void createAnalyzers() throws Exception {
   queryAnalyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
         // White space tokenizer, to lower case tokenizer.
         return new TokenStreamComponents(new MockTokenizer());
      }
   };

   SynonymMap.Builder builder = new SynonymMap.Builder(true);
   builder.add(new CharsRef("test"), new CharsRef("synonym1"), false);
   builder.add(new CharsRef("test"), new CharsRef("synonym2"), false);
   final SynonymMap synonyms = builder.build();

   synonymAnalyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
         // White space tokenizer, to lower case tokenizer.
         MockTokenizer tokenizer = new MockTokenizer();
         // Filter for adding synonyms
         TokenStream result = new SynonymFilter(tokenizer, synonyms, true);
         // Filter all non-synonyms, because the synonym filter outputs the
         // original token too.
         result = new TypeTokenFilter(result, Collections.singleton(SynonymFilter.TYPE_SYNONYM),
               true);
         return new TokenStreamComponents(tokenizer, result);
      }
   };
}
项目:lucenelab    文件:FstExample.java   
public static void main(String[] args) throws Exception {
    final CharsRef output = new CharsRef("color");
    final SynonymMap.Builder builder = new SynonymMap.Builder(true);
    builder.add(SynonymMap.Builder.join("blue".split(" "), new CharsRefBuilder()), output, true);
    builder.add(SynonymMap.Builder.join("green".split(" "), new CharsRefBuilder()), output, true);
    builder.add(SynonymMap.Builder.join("pale green".split(" "), new CharsRefBuilder()), output, true);
    builder.add(SynonymMap.Builder.join("pale blue".split(" "), new CharsRefBuilder()), output, true);
    builder.add(SynonymMap.Builder.join("dark sea green".split(" "), new CharsRefBuilder()), output, true);
    final SynonymMap synMap = builder.build();
    try (PrintWriter pw = new PrintWriter("d:/tmp/syns.dot");) {
        Util.toDot(synMap.fst, pw, true, true);
    }
    System.out.println("Done!");
}
项目:NYBC    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
  final int numEntries = atLeast(10);
  for (int j = 0; j < numEntries; j++) {
    addSyn(b, randomNonEmptyString(random), randomNonEmptyString(random), random.nextBoolean());
  }
  try {
    return b.build();
  } catch (Exception ex) {
    Rethrow.rethrow(ex);
    return null; // unreachable code
  }
}
项目:read-open-source-code    文件:FSTSynonymFilterFactory.java   
/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
private SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  File synonymFile = new File(synonyms);
  if (synonymFile.exists()) {
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder));
  } else {
    List<String> files = splitFileNames(synonyms);
    for (String file : files) {
      decoder.reset();
      parser.parse(new InputStreamReader(loader.openResource(file), decoder));
    }
  }
  return parser.build();
}
项目:read-open-source-code    文件:FSTSynonymFilterFactory.java   
/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
private SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  File synonymFile = new File(synonyms);
  if (synonymFile.exists()) {
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder));
  } else {
    List<String> files = splitFileNames(synonyms);
    for (String file : files) {
      decoder.reset();
      parser.parse(new InputStreamReader(loader.openResource(file), decoder));
    }
  }
  return parser.build();
}
项目:read-open-source-code    文件:FSTSynonymFilterFactory.java   
/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
private SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  File synonymFile = new File(synonyms);
  if (synonymFile.exists()) {
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder));
  } else {
    List<String> files = splitFileNames(synonyms);
    for (String file : files) {
      decoder.reset();
      parser.parse(new InputStreamReader(loader.openResource(file), decoder));
    }
  }
  return parser.build();
}
项目:read-open-source-code    文件:FSTSynonymFilterFactory.java   
/**
 * Load synonyms with the given SynonymMap.Parser class.
 */
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  File synonymFile = new File(synonyms);
  if (synonymFile.exists()) {
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder));
  } else {
    List<String> files = splitFileNames(synonyms);
    for (String file : files) {
      decoder.reset();
      parser.parse(new InputStreamReader(loader.openResource(file), decoder));
    }
  }
  return parser.build();
}
项目:read-open-source-code    文件:ManagedSynonymFilterFactory.java   
/**
 * Called once, during core initialization, to initialize any analysis components
 * that depend on the data managed by this resource. It is important that the
 * analysis component is only initialized once during core initialization so that
 * text analysis is consistent, especially in a distributed environment, as we
 * don't want one server applying a different set of stop words than other servers.
 */
@SuppressWarnings("unchecked")
@Override
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res) 
    throws SolrException
{    
  NamedList<Object> args = (NamedList<Object>)initArgs;    
  args.add("synonyms", getResourceId());
  args.add("expand", "false");
  args.add("format", "solr");

  Map<String,String> filtArgs = new HashMap<>();
  for (Map.Entry<String,?> entry : args) {
    filtArgs.put(entry.getKey(), entry.getValue().toString());
  }
  // create the actual filter factory that pulls the synonym mappings
  // from synonymMappings using a custom parser implementation
  delegate = new FSTSynonymFilterFactory(filtArgs) {
    @Override
    protected SynonymMap loadSynonyms
        (ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer)
        throws IOException, ParseException {

      ManagedSynonymParser parser =
          new ManagedSynonymParser((SynonymManager)res, dedup, analyzer);
      // null is safe here because there's no actual parsing done against a input Reader
      parser.parse(null);
      return parser.build(); 
    }
  };
  try {
    delegate.inform(res.getResourceLoader());
  } catch (IOException e) {
    throw new SolrException(ErrorCode.SERVER_ERROR, e);
  }    
}
项目:fess-solr-plugin    文件:NGramSynonymTokenizerFactory.java   
private SynonymMap loadSynonyms(final ResourceLoader loader,
        final String cname, final boolean dedup, final Analyzer analyzer,
        final boolean expand, final String synonyms) throws IOException,
        ParseException {
    final CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
            .onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT);

    SynonymMap.Parser parser;
    final Class<? extends SynonymMap.Parser> clazz = loader.findClass(
            cname, SynonymMap.Parser.class);
    try {
        parser = clazz.getConstructor(boolean.class, boolean.class,
                Analyzer.class).newInstance(dedup, expand, analyzer);
    } catch (final Exception e) {
        throw new RuntimeException(e);
    }

    final File synonymFile = new File(synonyms);
    if (synonymFile.exists()) {
        decoder.reset();
        parser.parse(new InputStreamReader(loader.openResource(synonyms),
                decoder));
    } else {
        final List<String> files = splitFileNames(synonyms);
        for (final String file : files) {
            decoder.reset();
            parser.parse(new InputStreamReader(loader.openResource(file),
                    decoder));
        }
    }
    return parser.build();
}
项目:fess-solr-plugin    文件:NGramSynonymTokenizer.java   
protected NGramSynonymTokenizer(final Reader input, final int n,
        final String delimiters, final boolean expand,
        final boolean ignoreCase, final SynonymMap map) {
    super(input);
    this.n = n;
    this.delimiters = delimiters;
    this.expand = expand;
    this.ignoreCase = ignoreCase;
    this.map = map;
    if (map != null) {
        fst = map.fst;
        if (fst == null) {
            throw new IllegalArgumentException("fst must be non-null");
        }
        fstReader = fst.getBytesReader();
        scratchArc = new FST.Arc<BytesRef>();
    }

    ch = 0;
    readBuffer = new char[BUFFER_SIZE];
    readBufferIndex = BUFFER_SIZE;
    readBufferLen = 0;
    block = new StringBuilder();
    nextBlkStart = 0;
    queue = new PriorityQueue<NGramSynonymTokenizer.MyToken>(100,
            new MyTokensComparator());
    synonyms = new ArrayList<NGramSynonymTokenizer.MyToken>();
}
项目:fess-solr-plugin    文件:NGramSynonymTokenizerTest.java   
public NGramSynonymTokenizerTestAnalyzer(final int n,
        final String delimiters, final boolean expand,
        final SynonymMap synonyms) {
    this.n = n;
    this.delimiters = delimiters;
    this.expand = expand;
    this.synonyms = synonyms;
}
项目:information-retrieval-adventure    文件:SynonymGraphFilterTest.java   
private static void add(String input, String output, boolean keepOrig) {
  System.out.println("  add input=" + input + " output=" + output + " keepOrig=" + keepOrig);

  CharsRefBuilder inputCharsRef = new CharsRefBuilder();
  SynonymMap.Builder.join(input.split(" +"), inputCharsRef);

  CharsRefBuilder outputCharsRef = new CharsRefBuilder();
  SynonymMap.Builder.join(output.split(" +"), outputCharsRef);

  builder.add(inputCharsRef.get(), outputCharsRef.get(), keepOrig);
}
项目:information-retrieval-adventure    文件:EntradaSalida.java   
private static void add(String input, String output, boolean keepOrig) {
  System.out.println("  add input=" + input + " output=" + output + " keepOrig=" + keepOrig);

  CharsRefBuilder inputCharsRef = new CharsRefBuilder();
  SynonymMap.Builder.join(input.split(" +"), inputCharsRef);

  CharsRefBuilder outputCharsRef = new CharsRefBuilder();
  SynonymMap.Builder.join(output.split(" +"), outputCharsRef);

  builder.add(inputCharsRef.get(), outputCharsRef.get(), keepOrig);
}
项目:information-retrieval-adventure    文件:EntradaSalidaTest.java   
@Test
    public void testSynonyms() throws Exception {
        String entrada = "ALCALDE KOOPER";
        String salida = "FEDERICO KOOPER";

        SynonymMap.Builder builder = new SynonymMap.Builder(true);

        CharsRef input = SynonymMap.Builder.join(entrada.split(" "), new CharsRefBuilder());
        CharsRef output = SynonymMap.Builder.join(salida.split(" "), new CharsRefBuilder());


        builder.add(input, output, true);

        SuggestAnalizer suggestAnalizer = new SuggestAnalizer(builder.build());

        Analyzer.TokenStreamComponents components = suggestAnalizer.createComponents(entrada);
        final TokenStream tokenStream = components.getTokenStream();

        CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.reset();

        while (tokenStream.incrementToken()) {
//            if
            System.out.println(termAtt.toString());

        }
        tokenStream.end();
        tokenStream.close();

//        assertTokenStreamContents(tokenStream, new String[]{
//                "FEDERICO"
//        });
//
//        assertAnalyzesTo(suggestAnalizer, entrada, new String[]{
//                "FEDERICO"
//        });
    }
项目:Maskana-Gestor-de-Conocimiento    文件:FSTSynonymFilterFactory.java   
/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
private SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  File synonymFile = new File(synonyms);
  if (synonymFile.exists()) {
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder));
  } else {
    List<String> files = splitFileNames(synonyms);
    for (String file : files) {
      decoder.reset();
      parser.parse(new InputStreamReader(loader.openResource(file), decoder));
    }
  }
  return parser.build();
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
  final int numEntries = atLeast(10);
  for (int j = 0; j < numEntries; j++) {
    addSyn(b, randomNonEmptyString(random), randomNonEmptyString(random), random.nextBoolean());
  }
  try {
    return b.build();
  } catch (Exception ex) {
    Rethrow.rethrow(ex);
    return null; // unreachable code
  }
}
项目:elasticsearch-analysis-dynamic-synonym    文件:DynamicSynonymGraphFilter.java   
public DynamicSynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) {
    super(input);
    this.ignoreCase = ignoreCase;
    update(synonyms);
}
项目:elasticsearch-analysis-dynamic-synonym    文件:DynamicSynonymFilter.java   
public DynamicSynonymFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) {
    super(input);
    this.ignoreCase = ignoreCase;

    update(synonyms);
}