public Analyzer createAnalyzer( String tokenFilterFile) { Analyzer analyzer = null; try { lucene4ir.utils.TokenFilters tokenFilters = JAXB.unmarshal(new File(tokenFilterFile), lucene4ir.utils.TokenFilters.class); CustomAnalyzer.Builder builder; if (tokenFilters.getResourceDir() != null) { builder = CustomAnalyzer.builder(Paths.get(tokenFilters.getResourceDir())); } else { builder = CustomAnalyzer.builder(); } builder.withTokenizer(tokenFilters.getTokenizer()); for (lucene4ir.utils.TokenFilter filter : tokenFilters.getTokenFilters()) { System.out.println("Token filter: " + filter.getName()); List<lucene4ir.utils.Param> params = filter.getParams(); if (params.size() > 0) { Map<String, String> paramMap = new HashMap<>(); for (lucene4ir.utils.Param param : params) { paramMap.put(param.getKey(), param.getValue()); } builder.addTokenFilter(filter.getName(), paramMap); } else { builder.addTokenFilter(filter.getName()); } } analyzer = builder.build(); } catch (IOException ioe){ System.out.println(" caught a " + ioe.getClass() + "\n with message: " + ioe.getMessage()); } return analyzer; }
@Override public void index(final IndexType indexType, final Collection<Song> songs) { executor.execute(new Runnable() { @Override public void run() { Stopwatch stopwatch = Stopwatch.createStarted(); Directory directory = new RAMDirectory(); try { LOG.debug("available tokenizers: {}", TokenizerFactory.availableTokenizers()); LOG.debug("available token filters: {}", TokenFilterFactory.availableTokenFilters()); Analyzer analyzer = CustomAnalyzer.builder() .withTokenizer("standard") .addTokenFilter("lowercase") .addTokenFilter("ngram", "minGramSize", "1", "maxGramSize", "25") .build(); IndexWriterConfig config = new IndexWriterConfig(analyzer); try (IndexWriter writer = new IndexWriter(directory, config)) { for (Song song : songs) { Document document = createDocument(song); writer.addDocument(document); songByUuid.put(song.getUUID(), song); } } catch (IOException e) { LOG.warn("couldn't index songs", e); } } catch (IOException e1) { LOG.warn("couldn't create analyzer", e1); } finally { putIndex(indexType, directory); stopwatch.stop(); LOG.info("indexing songs in background thread took {}", stopwatch.toString()); } } }); }
private Analyzer makeAnalyzer() throws IOException { return CustomAnalyzer.builder() .withTokenizer(WhitespaceTokenizerFactory.class) .addTokenFilter(LowerCaseFilterFactory.class) .build(); }