private static StringDistance resolveDistance(String distanceVal) { distanceVal = distanceVal.toLowerCase(Locale.US); if ("internal".equals(distanceVal)) { return DirectSpellChecker.INTERNAL_LEVENSHTEIN; } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) { return new LuceneLevenshteinDistance(); } else if ("levenstein".equals(distanceVal)) { return new LevensteinDistance(); // TODO Jaro and Winkler are 2 people - so apply same naming logic // as damerau_levenshtein } else if ("jarowinkler".equals(distanceVal)) { return new JaroWinklerDistance(); } else if ("ngram".equals(distanceVal)) { return new NGramDistance(); } else { throw new IllegalArgumentException("Illegal distance option " + distanceVal); } }
/** * Validates that the setting is valid */ public final void validate(String key, Settings settings) { Setting setting = get(key); if (setting == null) { LevensteinDistance ld = new LevensteinDistance(); List<Tuple<Float, String>> scoredKeys = new ArrayList<>(); for (String k : this.keySettings.keySet()) { float distance = ld.getDistance(key, k); if (distance > 0.7f) { scoredKeys.add(new Tuple<>(distance, k)); } } CollectionUtil.timSort(scoredKeys, (a,b) -> b.v1().compareTo(a.v1())); String msgPrefix = "unknown setting"; SecureSettings secureSettings = settings.getSecureSettings(); if (secureSettings != null && settings.getSecureSettings().getSettingNames().contains(key)) { msgPrefix = "unknown secure setting"; } String msg = msgPrefix + " [" + key + "]"; List<String> keys = scoredKeys.stream().map((a) -> a.v2()).collect(Collectors.toList()); if (keys.isEmpty() == false) { msg += " did you mean " + (keys.size() == 1 ? "[" + keys.get(0) + "]": "any of " + keys.toString()) + "?"; } else { msg += " please check that any required plugins are installed, or check the breaking changes documentation for removed " + "settings"; } throw new IllegalArgumentException(msg); } setting.get(settings); }
public CheckConsistencyPanel() { initComponents(); conceptList.setCellRenderer( crenderer ); relationList.setCellRenderer( rrenderer ); LabeledPairRenderer<StringDistance> arend = new LabeledPairRenderer<>(); algorithm.setRenderer( arend ); Map<StringDistance, String> dists = new LinkedHashMap<>(); dists.put( new LevensteinDistance(), "Levenstein" ); dists.put( new DoubleMetaphoneDistance(), "Double Metaphone" ); dists.put( new MetaphoneDistance(), "Metaphone" ); dists.put( new SoundexDistance(), "Soundex" ); arend.cache( dists ); for( StringDistance s : dists.keySet() ){ algorithm.addItem( s ); } }
public static void main(String[] args) throws IOException { if (args.length != 2) { LOGGER.info("Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir wordToRespell"); System.exit(1); } String spellCheckDir = args[0]; String wordToRespell = args[1]; Directory dir = FSDirectory.open(new File(spellCheckDir)); if (!IndexReader.indexExists(dir)) { LOGGER.info("\nERROR: No spellchecker index at path \"" + spellCheckDir + "\"; please run CreateSpellCheckerIndex first\n"); System.exit(1); } SpellChecker spell = new SpellChecker(dir); // #A spell.setStringDistance(new LevensteinDistance()); // #B // spell.setStringDistance(new JaroWinklerDistance()); String[] suggestions = spell.suggestSimilar(wordToRespell, 5); // #C LOGGER.info(suggestions.length + " suggestions for '" + wordToRespell + "':"); for (String suggestion : suggestions) LOGGER.info(" " + suggestion); }
protected final String unrecognized( final RestRequest request, final Set<String> invalids, final Set<String> candidates, final String detail) { String message = String.format( Locale.ROOT, "request [%s] contains unrecognized %s%s: ", request.path(), detail, invalids.size() > 1 ? "s" : ""); boolean first = true; for (final String invalid : invalids) { final LevensteinDistance ld = new LevensteinDistance(); final List<Tuple<Float, String>> scoredParams = new ArrayList<>(); for (final String candidate : candidates) { final float distance = ld.getDistance(invalid, candidate); if (distance > 0.5f) { scoredParams.add(new Tuple<>(distance, candidate)); } } CollectionUtil.timSort(scoredParams, (a, b) -> { // sort by distance in reverse order, then parameter name for equal distances int compare = a.v1().compareTo(b.v1()); if (compare != 0) return -compare; else return a.v2().compareTo(b.v2()); }); if (first == false) { message += ", "; } message += "[" + invalid + "]"; final List<String> keys = scoredParams.stream().map(Tuple::v2).collect(Collectors.toList()); if (keys.isEmpty() == false) { message += " -> did you mean " + (keys.size() == 1 ? "[" + keys.get(0) + "]" : "any of " + keys.toString()) + "?"; } first = false; } return message; }
/** Returns all the official plugin names that look similar to pluginId. **/ private List<String> checkMisspelledPlugin(String pluginId) { LevensteinDistance ld = new LevensteinDistance(); List<Tuple<Float, String>> scoredKeys = new ArrayList<>(); for (String officialPlugin : OFFICIAL_PLUGINS) { float distance = ld.getDistance(pluginId, officialPlugin); if (distance > 0.7f) { scoredKeys.add(new Tuple<>(distance, officialPlugin)); } } CollectionUtil.timSort(scoredKeys, (a, b) -> b.v1().compareTo(a.v1())); return scoredKeys.stream().map((a) -> a.v2()).collect(Collectors.toList()); }
private static void check() { try { Class.forName(JaroWinklerDistance.class.getName()); Class.forName(LevensteinDistance.class.getName()); Class.forName(StringDistance.class.getName()); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } }
@Override public StringDistance toLucene() { return new LevensteinDistance(); }
@Before public void setUp() { ecc = new EngineConsistencyChecker( engine, false, new LevensteinDistance() ); }
/** * Main class. * * @param args * arguments * @throws IOException * throw when error occurs */ public static void main(String[] args) throws IOException { if (args.length != 11 && args.length != 12) { System.err.println("Usage:"); System.err.println("java -jar RelatedQueries.jar <redis_port> <redis_port> <time_between_queries> " + "<suggestion_threshold> <similarity_threshold> <more_similar> <distance_boost> " + "<timebased_boost> <non_zero_hits_only> <query_log> <output_file>"); System.err.println("java -jar RelatedQueries.jar <redis_port> <redis_port> <time_between_queries> " + "<suggestion_threshold> <similarity_threshold> <more_similar> <distance_boost> " + "<timebased_boost> <non_zero_hits_only> <query_log> <es_host> <es_index>"); System.exit(-1); } String redisHost = args[0]; int redisPort = Integer.parseInt(args[1]); int timeBetweenQueries = Integer.parseInt(args[2]); double suggestionThreshold = Double.parseDouble(args[3]); float similarityThreshold = Float.parseFloat(args[4]); boolean moreSimilar = Boolean.parseBoolean(args[5]); float distanceBoost = Float.parseFloat(args[6]); float timebasedBoost = Float.parseFloat(args[7]); boolean nonZeroHitsOnly = Boolean.parseBoolean(args[8]); String queryLogPath = args[9]; String outputFile = args[10]; String esIndex = null; if (args.length == 12) { esIndex = args[11]; } List<Float> boosts = new ArrayList<Float>(); boosts.add(distanceBoost); boosts.add(timebasedBoost); List<String> prefixes = new ArrayList<String>(); prefixes.add(Settings.STRING_DISTANCE_PREFIX); prefixes.add(Settings.TIME_CLICK_PREFIX); LookBackTrigger lookBackTrigger = null; SegmentProcessorQueriesEvaluator stringDistanceApproach = new SegmentProcessorQueriesEvaluator(redisHost, redisPort, queryLogPath, new TimeBasedLookBackStrategy(60 * 1000), lookBackTrigger, new SimilarityCond(new LevensteinDistance(), similarityThreshold, moreSimilar), suggestionThreshold, Settings.STRING_DISTANCE_PREFIX, nonZeroHitsOnly); SegmentProcessorQueriesEvaluator timeBasedApproach = new SegmentProcessorQueriesEvaluator(redisHost, redisPort, queryLogPath, new TimeBasedLookBackStrategy(timeBetweenQueries * 1000), lookBackTrigger, new SimilarityCond(new LevensteinDistance(), 0.1f, true), suggestionThreshold, Settings.TIME_CLICK_PREFIX, nonZeroHitsOnly); List<SegmentProcessorQueriesEvaluator> approachesList = new ArrayList<SegmentProcessorQueriesEvaluator>(); approachesList.add(stringDistanceApproach); approachesList.add(timeBasedApproach); OutputWriter writer = null; boolean isEsEnabled = Boolean.parseBoolean(System.getProperty(Settings.ES_OUTPUT_ENABLED) != null ? System.getProperty(Settings.ES_OUTPUT_ENABLED) : "false"); if (isEsEnabled) { writer = new CombinedQueriesEvaluatorESWriter(outputFile, esIndex, boosts, prefixes, 2, nonZeroHitsOnly); } else { writer = new CombinedQueriesEvaluatorFileWriter(outputFile, boosts, prefixes, 2, nonZeroHitsOnly); } writer.write(approachesList); writer.close(); }
/** * Main class. * * @param args * arguments * @throws IOException * throw when error occurs */ public static void main(String[] args) throws IOException { if (args.length < 7 || args.length > 9) { System.err.println("Usage:"); System.err.println("java -jar RelatedQueries.jar <redis_port> <redis_port> <time_between_queries> " + "<suggestion_threshold> <similarity_threshold> <more_similar> <query_log>"); System.err.println("java -jar RelatedQueries.jar <redis_port> <redis_port> <time_between_queries> " + "<suggestion_threshold> <similarity_threshold> <more_similar> <query_log> <output_file>"); System.err.println("java -jar RelatedQueries.jar <redis_port> <redis_port> <time_between_queries> " + "<suggestion_threshold> <similarity_threshold> <more_similar> <query_log> <elasticsearch_address> <index_name>"); System.exit(-1); } String redisHost = args[0]; int redisPort = Integer.parseInt(args[1]); int timeBetweenQueries = Integer.parseInt(args[2]); double suggestionThreshold = Double.parseDouble(args[3]); float similarityThreshold = Float.parseFloat(args[4]); boolean moreSimilar = Boolean.parseBoolean(args[5]); String queryLogPath = args[6]; String outputFile = null; String host = null; String index = null; if (args.length == 8) { outputFile = args[7]; } else if (args.length == 9) { host = args[7]; index = args[8]; } List<SegmentProcessorQueriesEvaluator> sys = new LinkedList<SegmentProcessorQueriesEvaluator>(); EntryAcceptCond conds[] = { new SimilarityCond(new LevensteinDistance(), similarityThreshold, moreSimilar) }; // FIXME: extract to configuration LookBackStrategy strategies[] = { new TimeBasedLookBackStrategy(timeBetweenQueries * 1000) }; // FIXME: extract to configuration LookBackTrigger triggers[] = { null }; // initialize for (int i = 0; i < triggers.length; i++) { for (int j = 0; j < strategies.length; j++) { for (int k = 0; k < conds.length; k++) { sys.add(new SegmentProcessorQueriesEvaluator(redisHost, redisPort, queryLogPath, strategies[j], triggers[i], conds[k], suggestionThreshold, Settings.REDIS_PREFIX, false)); } } } OutputWriter outputWriter; if (outputFile != null) { outputWriter = new SingleFileOutputWriter(outputFile, Settings.REDIS_PREFIX); } else if (host != null && index != null) { outputWriter = new ElasticsearchHTTPOutputWriter(host, index, Settings.REDIS_PREFIX); } else { outputWriter = new PerEvaluationFileOutputWriter(Settings.REDIS_PREFIX); } outputWriter.write(sys); outputWriter.close(); }