@Override public void inform(ResourceLoader loader) throws IOException { final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory); Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader); TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; try { String formatClass = format; if (format == null || format.equals("solr")) { formatClass = SolrSynonymParser.class.getName(); } else if (format.equals("wordnet")) { formatClass = WordnetSynonymParser.class.getName(); } // TODO: expose dedup as a parameter? map = loadSynonyms(loader, formatClass, true, analyzer); } catch (ParseException e) { throw new IOException("Error parsing synonyms file:", e); } }
/** * Load synonyms from the solr format, "format=solr". */ private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException { final boolean expand = getBoolean("expand", true); String synonyms = args.get("synonyms"); if (synonyms == null) throw new IllegalArgumentException("Missing required argument 'synonyms'."); CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); SolrSynonymParser parser = new SolrSynonymParser(dedup, expand, analyzer); File synonymFile = new File(synonyms); if (synonymFile.exists()) { decoder.reset(); parser.add(new InputStreamReader(loader.openResource(synonyms), decoder)); } else { List<String> files = splitFileNames(synonyms); for (String file : files) { decoder.reset(); parser.add(new InputStreamReader(loader.openResource(file), decoder)); } } return parser.build(); }
@Override public void inform(final ResourceLoader loader) throws IOException { if (synonymFiles == null) { map = null; return; } final Analyzer analyzer = getAnalyzer(ignoreCase); try { String formatClass = format; if (format == null || format.equals("solr")) { formatClass = SolrSynonymParser.class.getName(); } else if (format.equals("wordnet")) { formatClass = WordnetSynonymParser.class.getName(); } // TODO: expose dedup as a parameter? map = loadSynonyms(loader, formatClass, true, analyzer, true, synonymFiles); // always expand=true in NGramSynonymTokenizer } catch (final ParseException e) { throw new IOException("Error parsing synonyms file:", e); } }
@Override public void inform(ResourceLoader loader) throws IOException { final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory); Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_43, reader) : factory.create(reader); TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_43, tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; try { String formatClass = format; if (format == null || format.equals("solr")) { formatClass = SolrSynonymParser.class.getName(); } else if (format.equals("wordnet")) { formatClass = WordnetSynonymParser.class.getName(); } // TODO: expose dedup as a parameter? map = loadSynonyms(loader, formatClass, true, analyzer); } catch (ParseException e) { throw new IOException("Error parsing synonyms file:", e); } }
@Override public void inform( SolrCore core ) { if (initParams != null) { SolrResourceLoader resourceLoader = core.getResourceLoader( ); synonymsFile = (String)initParams.get( "synonyms" ); if (synonymsFile != null) { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); return new TokenStreamComponents(tokenizer, tokenizer ); } }; try { SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer); CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); parser.parse(new InputStreamReader( resourceLoader.openResource(synonymsFile), decoder)); this.synonyms = parser.build( ); } catch ( Exception e ) { // ??? Log.warn( "Parsing Synonyms Got Exception " + e ); } } String stopwordsFile = (String)initParams.get( "stopwords" ); if (stopwordsFile != null) { this.stopwords = new HashSet<String>( ); try { BufferedReader br = new BufferedReader( new InputStreamReader( resourceLoader.openResource( stopwordsFile ))); String line = null; while ((line = br.readLine( )) != null) { stopwords.add( line.toLowerCase( ) ); } br.close( ); } catch ( IOException ioe ) { Log.warn( "Adding Stopwords Got Exception " + ioe ); } } } core.registerFirstSearcherListener( this ); core.registerNewSearcherListener( this ); }
@Override public void inform( SolrCore core ) { if (initParams != null) { SolrResourceLoader resourceLoader = core.getResourceLoader( ); synonymsFile = (String)initParams.get( "synonyms" ); if (synonymsFile != null) { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new KeywordTokenizer( reader ); return new TokenStreamComponents(tokenizer, tokenizer ); } }; try { SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer); CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); parser.parse(new InputStreamReader( resourceLoader.openResource(synonymsFile), decoder)); this.synonyms = parser.build( ); } catch ( Exception e ) { // ??? Log.warn( "Parsing Synonyms Got Exception " + e ); } } String stopwordsFile = (String)initParams.get( "stopwords" ); if (stopwordsFile != null) { this.stopwords = new HashSet<String>( ); try { BufferedReader br = new BufferedReader( new InputStreamReader( resourceLoader.openResource( stopwordsFile ))); String line = null; while ((line = br.readLine( )) != null) { stopwords.add( line.toLowerCase( ) ); } br.close( ); } catch ( IOException ioe ) { Log.warn( "Adding Stopwords Got Exception " + ioe ); } } } core.registerFirstSearcherListener( this ); core.registerNewSearcherListener( this ); }