private static StringDistance resolveDistance(String distanceVal) { distanceVal = distanceVal.toLowerCase(Locale.US); if ("internal".equals(distanceVal)) { return DirectSpellChecker.INTERNAL_LEVENSHTEIN; } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) { return new LuceneLevenshteinDistance(); } else if ("levenstein".equals(distanceVal)) { return new LevensteinDistance(); // TODO Jaro and Winkler are 2 people - so apply same naming logic // as damerau_levenshtein } else if ("jarowinkler".equals(distanceVal)) { return new JaroWinklerDistance(); } else if ("ngram".equals(distanceVal)) { return new NGramDistance(); } else { throw new IllegalArgumentException("Illegal distance option " + distanceVal); } }
/** * Resolves terms that could not be resolved with the lucene approach. This * brute-force function is significantly slower, but always works * * @param needles the URIs that produced errors in lucene * @param possibles the set of all possible solutions * @param hits populate this multimap with matches * @param levy the string distance object to use to measure hits * @param minDistance the minimum similarity measure */ private void fallbackResolve( Collection<URI> needles, Map<URI, String> possibles, MultiMap<URI, Hit> hits, StringDistance levy, float minDistance ) { log.debug( "falling back to resolve " + needles.size() + " items" ); for ( URI needle : needles ) { String needlelabel = labels.get( needle ); for ( Map.Entry<URI, String> en : possibles.entrySet() ) { URI match = en.getKey(); String matchlabel = en.getValue(); float distance = levy.getDistance( needlelabel, matchlabel ); if ( distance >= minDistance && !match.equals( needle ) ) { hits.add( needle, new Hit( match, matchlabel, uriToTypeLkp.get( match ), distance ) ); } } } }
public CheckConsistencyPanel() { initComponents(); conceptList.setCellRenderer( crenderer ); relationList.setCellRenderer( rrenderer ); LabeledPairRenderer<StringDistance> arend = new LabeledPairRenderer<>(); algorithm.setRenderer( arend ); Map<StringDistance, String> dists = new LinkedHashMap<>(); dists.put( new LevensteinDistance(), "Levenstein" ); dists.put( new DoubleMetaphoneDistance(), "Double Metaphone" ); dists.put( new MetaphoneDistance(), "Metaphone" ); dists.put( new SoundexDistance(), "Soundex" ); arend.cache( dists ); for( StringDistance s : dists.keySet() ){ algorithm.addItem( s ); } }
@Override protected StringDistance getStringDistance() { if (stringDistance == null) { return super.getStringDistance(); } return stringDistance; }
@Test public void testAlternateDistance() throws Exception { TestSpellChecker checker = new TestSpellChecker(); NamedList spellchecker = new NamedList(); spellchecker.add("classname", IndexBasedSpellChecker.class.getName()); File indexDir = createTempDir(); spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title"); spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker); spellchecker.add(AbstractLuceneSpellChecker.STRING_DISTANCE, JaroWinklerDistance.class.getName()); SolrCore core = h.getCore(); String dictName = checker.init(spellchecker, core); assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true); RefCounted<SolrIndexSearcher> holder = core.getSearcher(); SolrIndexSearcher searcher = holder.get(); try { checker.build(core, searcher); SpellChecker sc = checker.getSpellChecker(); assertTrue("sc is null and it shouldn't be", sc != null); StringDistance sd = sc.getStringDistance(); assertTrue("sd is null and it shouldn't be", sd != null); assertTrue("sd is not an instance of " + JaroWinklerDistance.class.getName(), sd instanceof JaroWinklerDistance); } finally { holder.decref(); } }
@Test public void testAlternateDistance() throws Exception { TestSpellChecker checker = new TestSpellChecker(); NamedList spellchecker = new NamedList(); spellchecker.add("classname", IndexBasedSpellChecker.class.getName()); File indexDir = new File(TEMP_DIR, "spellingIdx" + new Date().getTime()); indexDir.mkdirs(); spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title"); spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker); spellchecker.add(AbstractLuceneSpellChecker.STRING_DISTANCE, JaroWinklerDistance.class.getName()); SolrCore core = h.getCore(); String dictName = checker.init(spellchecker, core); assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true); RefCounted<SolrIndexSearcher> holder = core.getSearcher(); SolrIndexSearcher searcher = holder.get(); try { checker.build(core, searcher); SpellChecker sc = checker.getSpellChecker(); assertTrue("sc is null and it shouldn't be", sc != null); StringDistance sd = sc.getStringDistance(); assertTrue("sd is null and it shouldn't be", sd != null); assertTrue("sd is not an instance of " + JaroWinklerDistance.class.getName(), sd instanceof JaroWinklerDistance); } finally { holder.decref(); } }
private static void check() { try { Class.forName(JaroWinklerDistance.class.getName()); Class.forName(LevensteinDistance.class.getName()); Class.forName(StringDistance.class.getName()); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } }
public StringDistance stringDistance() { return stringDistance; }
public void stringDistance(StringDistance distance) { this.stringDistance = distance; }
@Override public StringDistance toLucene() { return DirectSpellChecker.INTERNAL_LEVENSHTEIN; }
@Override public StringDistance toLucene() { return new LuceneLevenshteinDistance(); }
@Override public StringDistance toLucene() { return new LevensteinDistance(); }
@Override public StringDistance toLucene() { return new JaroWinklerDistance(); }
@Override public StringDistance toLucene() { return new NGramDistance(); }
public EngineConsistencyChecker( IEngine eng, boolean across, StringDistance dist ) { this.engine = eng; this.across = across; this.strdist = dist; }
public StringDistance getDistanceAlg() { return algorithm.getItemAt( algorithm.getSelectedIndex() ); }
private MemoryAwareSpellChecker(Directory spellIndex, StringDistance sd) throws IOException { super(spellIndex, sd); _spellIndex = spellIndex; }
/** * Get the distance implementation used by this spellchecker, or NULL if not applicable. */ protected StringDistance getStringDistance() { throw new UnsupportedOperationException(); }
@Override public StringDistance getStringDistance() { return checker.getDistance(); }
@Override public StringDistance getStringDistance() { return sd; }
public StringDistanceFunction(ValueSource str1, ValueSource str2, StringDistance measure) { this.str1 = str1; this.str2 = str2; dist = measure; }