static void parseRules(List<String> rules, StemmerOverrideFilter.Builder builder, String mappingSep) { for (String rule : rules) { String key, override; List<String> mapping = Strings.splitSmart(rule, mappingSep, false); if (mapping.size() == 2) { key = mapping.get(0).trim(); override = mapping.get(1).trim(); } else { throw new RuntimeException("Invalid Keyword override Rule:" + rule); } if (key.isEmpty() || override.isEmpty()) { throw new RuntimeException("Invalid Keyword override Rule:" + rule); } else { builder.add(key, override); } } }
@Override public Object create(Random random) { int num = random.nextInt(10); StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean()); for (int i = 0; i < num; i++) { String input = ""; do { input = TestUtil.randomRealisticUnicodeString(random); } while(input.isEmpty()); String out = ""; TestUtil.randomSimpleString(random); do { out = TestUtil.randomRealisticUnicodeString(random); } while(out.isEmpty()); builder.add(input, out); } try { return builder.build(); } catch (Exception ex) { Rethrow.rethrow(ex); return null; // unreachable code } }
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) { this.matchVersion = matchVersion; this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords)); this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable)); if (stemOverrideDict.isEmpty() || !matchVersion.onOrAfter(Version.LUCENE_31)) { this.stemdict = null; this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict)); } else { this.origStemdict = null; // we don't need to ignore case here since we lowercase in this analyzer anyway StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false); CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator(); CharsRef spare = new CharsRef(); while (iter.hasNext()) { char[] nextKey = iter.nextKey(); spare.copyChars(nextKey, 0, nextKey.length); builder.add(spare, iter.currentValue()); } try { this.stemdict = builder.build(); } catch (IOException ex) { throw new RuntimeException("can not build stem dict", ex); } } }
@Override public Object create(Random random) { int num = random.nextInt(10); StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean()); for (int i = 0; i < num; i++) { String input = ""; do { input = _TestUtil.randomRealisticUnicodeString(random); } while(input.isEmpty()); String out = ""; _TestUtil.randomSimpleString(random); do { out = _TestUtil.randomRealisticUnicodeString(random); } while(out.isEmpty()); builder.add(input, out); } try { return builder.build(); } catch (Exception ex) { Rethrow.rethrow(ex); return null; // unreachable code } }
public StemmerOverrideTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) throws IOException { super(indexSettings, name, settings); List<String> rules = Analysis.getWordList(env, settings, "rules"); if (rules == null) { throw new IllegalArgumentException("stemmer override filter requires either `rules` or `rules_path` to be configured"); } StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false); parseRules(rules, builder, "=>"); overrideMap = builder.build(); }
/** * @deprecated Use {@link #DutchAnalyzer(CharArraySet,CharArraySet,CharArrayMap)} */ @Deprecated public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) { setVersion(matchVersion); this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords)); this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable)); if (stemOverrideDict.isEmpty() || !matchVersion.onOrAfter(Version.LUCENE_3_1)) { this.stemdict = null; this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict)); } else { this.origStemdict = null; // we don't need to ignore case here since we lowercase in this analyzer anyway StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false); CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator(); CharsRefBuilder spare = new CharsRefBuilder(); while (iter.hasNext()) { char[] nextKey = iter.nextKey(); spare.copyChars(nextKey, 0, nextKey.length); builder.add(spare.get(), iter.currentValue()); } try { this.stemdict = builder.build(); } catch (IOException ex) { throw new RuntimeException("can not build stem dict", ex); } } }
@Inject public StemmerOverrideTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) throws IOException { super(index, indexSettingsService.getSettings(), name, settings); List<String> rules = Analysis.getWordList(env, settings, "rules"); if (rules == null) { throw new IllegalArgumentException("stemmer override filter requires either `rules` or `rules_path` to be configured"); } StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false); parseRules(rules, builder, "=>"); overrideMap = builder.build(); }
@Override public TokenStream create(TokenStream tokenStream) { return new StemmerOverrideFilter(tokenStream, overrideMap); }
@Override public TokenStream create(TokenStream input) { return dictionary == null ? input : new StemmerOverrideFilter(input, dictionary); }