public HyphenationCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, env, name, settings); String hyphenationPatternsPath = settings.get("hyphenation_patterns_path", null); if (hyphenationPatternsPath == null) { throw new IllegalArgumentException("hyphenation_patterns_path is a required setting."); } Path hyphenationPatternsFile = env.configFile().resolve(hyphenationPatternsPath); try { hyphenationTree = HyphenationCompoundWordTokenFilter.getHyphenationTree(new InputSource(Files.newInputStream(hyphenationPatternsFile))); } catch (Exception e) { throw new IllegalArgumentException("Exception while reading hyphenation_patterns_path.", e); } }
@Inject public HyphenationCompoundWordTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettingsService.getSettings(), env, name, settings); String hyphenationPatternsPath = settings.get("hyphenation_patterns_path", null); if (hyphenationPatternsPath == null) { throw new IllegalArgumentException("hyphenation_patterns_path is a required setting."); } Path hyphenationPatternsFile = env.configFile().resolve(hyphenationPatternsPath); try { hyphenationTree = HyphenationCompoundWordTokenFilter.getHyphenationTree(new InputSource(Files.newInputStream(hyphenationPatternsFile))); } catch (Exception e) { throw new IllegalArgumentException("Exception while reading hyphenation_patterns_path: " + e.getMessage()); } }
@Override public void inform(ResourceLoader loader) throws IOException { InputStream stream = null; try { if (dictFile != null) // the dictionary can be empty. dictionary = getWordSet(loader, dictFile, false); // TODO: Broken, because we cannot resolve real system id // ResourceLoader should also supply method like ClassLoader to get resource URL stream = loader.openResource(hypFile); final InputSource is = new InputSource(stream); is.setEncoding(encoding); // if it's null let xml parser decide is.setSystemId(hypFile); hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is); } finally { IOUtils.closeWhileHandlingException(stream); } }
@Override public TokenStream create(TokenStream tokenStream) { if (version.onOrAfter(Version.LUCENE_4_4_0)) { return new HyphenationCompoundWordTokenFilter(tokenStream, hyphenationTree, wordList, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch); } else { return new Lucene43HyphenationCompoundWordTokenFilter(tokenStream, hyphenationTree, wordList, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch); } }
@Override public Object create(Random random) { // TODO: make nastier try { InputSource is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm()); HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is); return hyphenator; } catch (Exception ex) { Rethrow.rethrow(ex); return null; // unreachable code } }
@Override public TokenStream create(TokenStream tokenStream) { return new HyphenationCompoundWordTokenFilter(tokenStream, hyphenationTree, wordList, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch); }
@Override public HyphenationCompoundWordTokenFilter create(TokenStream input) { return new HyphenationCompoundWordTokenFilter(luceneMatchVersion, input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch); }