@Override public void inform(ResourceLoader loader) throws IOException { final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory); Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader); TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; try { String formatClass = format; if (format == null || format.equals("solr")) { formatClass = SolrSynonymParser.class.getName(); } else if (format.equals("wordnet")) { formatClass = WordnetSynonymParser.class.getName(); } // TODO: expose dedup as a parameter? map = loadSynonyms(loader, formatClass, true, analyzer); } catch (ParseException e) { throw new IOException("Error parsing synonyms file:", e); } }
/** * Load synonyms from the wordnet format, "format=wordnet". */ private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException { final boolean expand = getBoolean("expand", true); String synonyms = args.get("synonyms"); if (synonyms == null) throw new IllegalArgumentException("Missing required argument 'synonyms'."); CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); WordnetSynonymParser parser = new WordnetSynonymParser(dedup, expand, analyzer); File synonymFile = new File(synonyms); if (synonymFile.exists()) { decoder.reset(); parser.add(new InputStreamReader(loader.openResource(synonyms), decoder)); } else { List<String> files = splitFileNames(synonyms); for (String file : files) { decoder.reset(); parser.add(new InputStreamReader(loader.openResource(file), decoder)); } } return parser.build(); }
@Override public void inform(final ResourceLoader loader) throws IOException { if (synonymFiles == null) { map = null; return; } final Analyzer analyzer = getAnalyzer(ignoreCase); try { String formatClass = format; if (format == null || format.equals("solr")) { formatClass = SolrSynonymParser.class.getName(); } else if (format.equals("wordnet")) { formatClass = WordnetSynonymParser.class.getName(); } // TODO: expose dedup as a parameter? map = loadSynonyms(loader, formatClass, true, analyzer, true, synonymFiles); // always expand=true in NGramSynonymTokenizer } catch (final ParseException e) { throw new IOException("Error parsing synonyms file:", e); } }
@Override public void inform(ResourceLoader loader) throws IOException { final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory); Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_43, reader) : factory.create(reader); TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_43, tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; try { String formatClass = format; if (format == null || format.equals("solr")) { formatClass = SolrSynonymParser.class.getName(); } else if (format.equals("wordnet")) { formatClass = WordnetSynonymParser.class.getName(); } // TODO: expose dedup as a parameter? map = loadSynonyms(loader, formatClass, true, analyzer); } catch (ParseException e) { throw new IOException("Error parsing synonyms file:", e); } }