/** * Create a new FuzzyQuery that will match terms with an edit distance * of at most <code>maxEdits</code> to <code>term</code>. * If a <code>prefixLength</code> > 0 is specified, a common prefix * of that length is also required. * * @param term the term to search for * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}. * @param prefixLength length of common (non-fuzzy) prefix * @param maxExpansions the maximum number of terms to match. If this number is * greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, * then the maxClauseCount will be used instead. * @param transpositions true if transpositions should be treated as a primitive * edit operation. If this is false, comparisons will implement the classic * Levenshtein algorithm. */ public FuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, boolean transpositions) { super(term.field()); if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } if (prefixLength < 0) { throw new IllegalArgumentException("prefixLength cannot be negative."); } if (maxExpansions <= 0) { throw new IllegalArgumentException("maxExpansions must be positive."); } this.term = term; this.maxEdits = maxEdits; this.prefixLength = prefixLength; this.transpositions = transpositions; this.maxExpansions = maxExpansions; setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions)); }
/** initialize levenshtein DFAs up to maxDistance, if possible */ private List<CompiledAutomaton> initAutomata(int maxDistance) { final List<CompiledAutomaton> runAutomata = dfaAtt.automata(); //System.out.println("cached automata size: " + runAutomata.size()); if (runAutomata.size() <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { LevenshteinAutomata builder = new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions); String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength); for (int i = runAutomata.size(); i <= maxDistance; i++) { Automaton a = builder.toAutomaton(i, prefix); //System.out.println("compute automaton n=" + i); runAutomata.add(new CompiledAutomaton(a, true, false)); } } return runAutomata; }
/** test a fuzzy query */ public void testFuzzy() throws Exception { Query regular = new TermQuery(new Term("field", "foobar")); Query expected = new FuzzyQuery(new Term("field", "foobar"), 2); assertEquals(expected, parse("foobar~2")); assertEquals(regular, parse("foobar~")); assertEquals(regular, parse("foobar~a")); assertEquals(regular, parse("foobar~1a")); BooleanQuery bool = new BooleanQuery(); FuzzyQuery fuzzy = new FuzzyQuery(new Term("field", "foo"), LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); bool.add(fuzzy, Occur.MUST); bool.add(new TermQuery(new Term("field", "bar")), Occur.MUST); assertEquals(bool, parse("foo~" + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + 1 + " bar")); }
/** * Create a new FuzzyQuery that will match terms with an edit distance * of at most <code>maxEdits</code> to <code>term</code>. * If a <code>prefixLength</code> > 0 is specified, a common prefix * of that length is also required. * * @param term the term to search for * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}. * @param prefixLength length of common (non-fuzzy) prefix * @param maxExpansions the maximum number of terms to match. If this number is * greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, * then the maxClauseCount will be used instead. * @param transpositions true if transpositions should be treated as a primitive * edit operation. If this is false, comparisons will implement the classic * Levenshtein algorithm. */ public FuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, boolean transpositions) { super(term.field()); if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } if (prefixLength < 0) { throw new IllegalArgumentException("prefixLength cannot be negative."); } if (maxExpansions < 0) { throw new IllegalArgumentException("maxExpansions cannot be negative."); } this.term = term; this.maxEdits = maxEdits; this.prefixLength = prefixLength; this.transpositions = transpositions; this.maxExpansions = maxExpansions; setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions)); }
/** initialize levenshtein DFAs up to maxDistance, if possible */ private List<CompiledAutomaton> initAutomata(int maxDistance) { final List<CompiledAutomaton> runAutomata = dfaAtt.automata(); //System.out.println("cached automata size: " + runAutomata.size()); if (runAutomata.size() <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { LevenshteinAutomata builder = new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions); for (int i = runAutomata.size(); i <= maxDistance; i++) { Automaton a = builder.toAutomaton(i); //System.out.println("compute automaton n=" + i); // constant prefix if (realPrefixLength > 0) { Automaton prefix = BasicAutomata.makeString( UnicodeUtil.newString(termText, 0, realPrefixLength)); a = BasicOperations.concatenate(prefix, a); } runAutomata.add(new CompiledAutomaton(a, true, false)); } } return runAutomata; }
@Override public PhraseSuggestionContext.DirectCandidateGenerator build(MapperService mapperService) throws IOException { PhraseSuggestionContext.DirectCandidateGenerator generator = new PhraseSuggestionContext.DirectCandidateGenerator(); generator.setField(this.field); transferIfNotNull(this.size, generator::size); if (this.preFilter != null) { generator.preFilter(mapperService.getIndexAnalyzers().get(this.preFilter)); if (generator.preFilter() == null) { throw new IllegalArgumentException("Analyzer [" + this.preFilter + "] doesn't exists"); } } if (this.postFilter != null) { generator.postFilter(mapperService.getIndexAnalyzers().get(this.postFilter)); if (generator.postFilter() == null) { throw new IllegalArgumentException("Analyzer [" + this.postFilter + "] doesn't exists"); } } transferIfNotNull(this.accuracy, generator::accuracy); if (this.suggestMode != null) { generator.suggestMode(resolveSuggestMode(this.suggestMode)); } if (this.sort != null) { generator.sort(SortBy.resolve(this.sort)); } if (this.stringDistance != null) { generator.stringDistance(resolveDistance(this.stringDistance)); } transferIfNotNull(this.maxEdits, generator::maxEdits); if (generator.maxEdits() < 1 || generator.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new IllegalArgumentException("Illegal max_edits value " + generator.maxEdits()); } transferIfNotNull(this.maxInspections, generator::maxInspections); transferIfNotNull(this.maxTermFreq, generator::maxTermFreq); transferIfNotNull(this.prefixLength, generator::prefixLength); transferIfNotNull(this.minWordLength, generator::minWordLength); transferIfNotNull(this.minDocFreq, generator::minDocFreq); return generator; }
/** * Helper function to convert from deprecated "minimumSimilarity" fractions * to raw edit distances. * * @param minimumSimilarity scaled similarity * @param termLen length (in unicode codepoints) of the term. * @return equivalent number of maxEdits * @deprecated pass integer edit distances instead. */ @Deprecated public static int floatToEdits(float minimumSimilarity, int termLen) { if (minimumSimilarity >= 1f) { return (int) Math.min(minimumSimilarity, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } else if (minimumSimilarity == 0.0f) { return 0; // 0 means exact, not infinite # of edits! } else { return Math.min((int) ((1D-minimumSimilarity) * termLen), LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } }
public static boolean parseDirectSpellcheckerSettings(XContentParser parser, String fieldName, DirectSpellcheckerSettings suggestion, ParseFieldMatcher parseFieldMatcher) throws IOException { if ("accuracy".equals(fieldName)) { suggestion.accuracy(parser.floatValue()); } else if (parseFieldMatcher.match(fieldName, Fields.SUGGEST_MODE)) { suggestion.suggestMode(SuggestUtils.resolveSuggestMode(parser.text())); } else if ("sort".equals(fieldName)) { suggestion.sort(SuggestUtils.resolveSort(parser.text())); } else if (parseFieldMatcher.match(fieldName, Fields.STRING_DISTANCE)) { suggestion.stringDistance(SuggestUtils.resolveDistance(parser.text())); } else if (parseFieldMatcher.match(fieldName, Fields.MAX_EDITS)) { suggestion.maxEdits(parser.intValue()); if (suggestion.maxEdits() < 1 || suggestion.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new IllegalArgumentException("Illegal max_edits value " + suggestion.maxEdits()); } } else if (parseFieldMatcher.match(fieldName, Fields.MAX_INSPECTIONS)) { suggestion.maxInspections(parser.intValue()); } else if (parseFieldMatcher.match(fieldName, Fields.MAX_TERM_FREQ)) { suggestion.maxTermFreq(parser.floatValue()); } else if (parseFieldMatcher.match(fieldName, Fields.PREFIX_LENGTH)) { suggestion.prefixLength(parser.intValue()); } else if (parseFieldMatcher.match(fieldName, Fields.MIN_WORD_LENGTH)) { suggestion.minQueryLength(parser.intValue()); } else if (parseFieldMatcher.match(fieldName, Fields.MIN_DOC_FREQ)) { suggestion.minDocFreq(parser.floatValue()); } else { return false; } return true; }
/** * Helper function to convert from deprecated "minimumSimilarity" fractions * to raw edit distances. * * @param minimumSimilarity * scaled similarity * @param termLen * length (in unicode codepoints) of the term. * @return equivalent number of maxEdits * @deprecated pass integer edit distances instead. */ @Deprecated public static int floatToEdits(final float minimumSimilarity, final int termLen) { if (minimumSimilarity >= 1f) { return (int) Math.min(minimumSimilarity, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } else if (minimumSimilarity == 0.0f) { return 0; // 0 means exact, not infinite # of edits! } else { return Math.min((int) ((1D - minimumSimilarity) * termLen), LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } }
/** * Create a new FuzzyQuery that will match terms with an edit distance of at * most <code>maxEdits</code> to <code>term</code>. If a * <code>prefixLength</code> > 0 is specified, a common prefix of that * length is also required. * * @param term * the term to search for * @param maxEdits * must be >= 0 and <= * {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}. * @param prefixLength * length of common (non-fuzzy) prefix * @param maxExpansions * the maximum number of terms to match. If this number is * greater than {@link BooleanQuery#getMaxClauseCount} when the * query is rewritten, then the maxClauseCount will be used * instead. * @param transpositions * true if transpositions should be treated as a primitive edit * operation. If this is false, comparisons will implement the * classic Levenshtein algorithm. */ public LearnToRankFuzzyQuery(final Term term, final int maxEdits, final int prefixLength, final int maxExpansions, final boolean transpositions, final Similarity sim) { super(term.field()); if ((maxEdits < 0) || (maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)) { throw new IllegalArgumentException( "maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } if (prefixLength < 0) { throw new IllegalArgumentException( "prefixLength cannot be negative."); } if (maxExpansions < 0) { throw new IllegalArgumentException( "maxExpansions cannot be negative."); } this.term = term; this.maxEdits = maxEdits; this.prefixLength = prefixLength; this.transpositions = transpositions; this.maxExpansions = maxExpansions; setRewriteMethod(new LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite( maxExpansions, sim)); // setRewriteMethod(new // LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite( // maxExpansions)); }
/** * Create a new FuzzyQuery that will match terms with an edit distance of at * most <code>maxEdits</code> to <code>term</code>. If a * <code>prefixLength</code> > 0 is specified, a common prefix of that * length is also required. * * @param term * the term to search for * @param maxEdits * must be >= 0 and <= * {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}. * @param prefixLength * length of common (non-fuzzy) prefix * @param maxExpansions * the maximum number of terms to match. If this number is * greater than {@link BooleanQuery#getMaxClauseCount} when the * query is rewritten, then the maxClauseCount will be used * instead. * @param transpositions * true if transpositions should be treated as a primitive edit * operation. If this is false, comparisons will implement the * classic Levenshtein algorithm. */ public LearnToRankFuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, boolean transpositions, Similarity sim) { super(term.field()); if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new IllegalArgumentException( "maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } if (prefixLength < 0) { throw new IllegalArgumentException( "prefixLength cannot be negative."); } if (maxExpansions < 0) { throw new IllegalArgumentException( "maxExpansions cannot be negative."); } this.term = term; this.maxEdits = maxEdits; this.prefixLength = prefixLength; this.transpositions = transpositions; this.maxExpansions = maxExpansions; LearnToRankFuzzyQuery.sim = sim; setRewriteMethod(new LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(maxExpansions)); // setRewriteMethod(new LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite( // maxExpansions)); }
/** * Helper function to convert from deprecated "minimumSimilarity" fractions * to raw edit distances. * * @param minimumSimilarity * scaled similarity * @param termLen * length (in unicode codepoints) of the term. * @return equivalent number of maxEdits * @deprecated pass integer edit distances instead. */ @Deprecated public static int floatToEdits(float minimumSimilarity, int termLen) { if (minimumSimilarity >= 1f) { return (int) Math.min(minimumSimilarity, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } else if (minimumSimilarity == 0.0f) { return 0; // 0 means exact, not infinite # of edits! } else { return Math.min((int) ((1D - minimumSimilarity) * termLen), LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } }
/** * Creates a {@link FuzzySuggester} instance. * * @param indexAnalyzer Analyzer that will be used for * analyzing suggestions while building the index. * @param queryAnalyzer Analyzer that will be used for * analyzing query text during lookup * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP} * @param maxSurfaceFormsPerAnalyzedForm Maximum number of * surface forms to keep for a single analyzed form. * When there are too many surface forms we discard the * lowest weighted ones. * @param maxGraphExpansions Maximum number of graph paths * to expand from the analyzed form. Set this to -1 for * no limit. * @param maxEdits must be >= 0 and <= {@link org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} . * @param transpositions <code>true</code> if transpositions should be treated as a primitive * edit operation. If this is false, comparisons will implement the classic * Levenshtein algorithm. * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX} * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH}) * @param sepLabel separation label * @param payloadSep payload separator byte * @param endByte end byte marker byte */ public XFuzzySuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware, FST<PairOutputs.Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput, int sepLabel, int payloadSep, int endByte, int holeCharacter) { super(indexAnalyzer, queryPrefix, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter); if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new IllegalArgumentException( "maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } if (nonFuzzyPrefix < 0) { throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")"); } if (minFuzzyLength < 0) { throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")"); } this.maxEdits = maxEdits; this.transpositions = transpositions; this.nonFuzzyPrefix = nonFuzzyPrefix; this.minFuzzyLength = minFuzzyLength; this.unicodeAware = unicodeAware; }
/** * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of * length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity > * <code>minSimilarity</code>. * <p> * After calling the constructor the enumeration is already pointing to the first * valid term if such a term exists. * * @param terms Delivers terms. * @param atts {@link AttributeSource} created by the rewrite method of {@link MultiTermQuery} * thats contains information about competitive boosts during rewrite. It is also used * to cache DFAs between segment transitions. * @param term Pattern term. * @param minSimilarity Minimum required similarity for terms from the reader. Pass an integer value * representing edit distance. Passing a fraction is deprecated. * @param prefixLength Length of required common prefix. Default value is 0. * @throws IOException if there is a low-level IO error */ public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, final float minSimilarity, final int prefixLength, boolean transpositions) throws IOException { if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity) throw new IllegalArgumentException("fractional edit distances are not allowed"); if (minSimilarity < 0.0f) throw new IllegalArgumentException("minimumSimilarity cannot be less than 0"); if(prefixLength < 0) throw new IllegalArgumentException("prefixLength cannot be less than 0"); this.terms = terms; this.term = term; // convert the string into a utf32 int[] representation for fast comparisons final String utf16 = term.text(); this.termText = new int[utf16.codePointCount(0, utf16.length())]; for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) termText[j++] = cp = utf16.codePointAt(i); this.termLength = termText.length; this.dfaAtt = atts.addAttribute(LevenshteinAutomataAttribute.class); //The prefix could be longer than the word. //It's kind of silly though. It means we must match the entire word. this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength; // if minSimilarity >= 1, we treat it as number of edits if (minSimilarity >= 1f) { this.minSimilarity = 0; // just driven by number of edits maxEdits = (int) minSimilarity; raw = true; } else { this.minSimilarity = minSimilarity; // calculate the maximum k edits for this similarity maxEdits = initialMaxDistance(this.minSimilarity, termLength); raw = false; } if (transpositions && maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new UnsupportedOperationException("with transpositions enabled, distances > " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported "); } this.transpositions = transpositions; this.scale_factor = 1.0f / (1.0f - this.minSimilarity); this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class); bottom = maxBoostAtt.getMaxNonCompetitiveBoost(); bottomTerm = maxBoostAtt.getCompetitiveTerm(); bottomChanged(null, true); }
/** * Creates a {@link FuzzySuggester} instance. * * @param indexAnalyzer Analyzer that will be used for * analyzing suggestions while building the index. * @param queryAnalyzer Analyzer that will be used for * analyzing query text during lookup * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP} * @param maxSurfaceFormsPerAnalyzedForm Maximum number of * surface forms to keep for a single analyzed form. * When there are too many surface forms we discard the * lowest weighted ones. * @param maxGraphExpansions Maximum number of graph paths * to expand from the analyzed form. Set this to -1 for * no limit. * @param preservePositionIncrements Whether position holes should appear in the automaton * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} . * @param transpositions <code>true</code> if transpositions should be treated as a primitive * edit operation. If this is false, comparisons will implement the classic * Levenshtein algorithm. * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX} * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH}) * @param unicodeAware operate Unicode code points instead of bytes. */ public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean preservePositionIncrements, int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware) { super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements); if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } if (nonFuzzyPrefix < 0) { throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")"); } if (minFuzzyLength < 0) { throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")"); } this.maxEdits = maxEdits; this.transpositions = transpositions; this.nonFuzzyPrefix = nonFuzzyPrefix; this.minFuzzyLength = minFuzzyLength; this.unicodeAware = unicodeAware; }
/** * Creates a {@link FuzzySuggester} instance. * * @param indexAnalyzer Analyzer that will be used for * analyzing suggestions while building the index. * @param queryAnalyzer Analyzer that will be used for * analyzing query text during lookup * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP} * @param maxSurfaceFormsPerAnalyzedForm Maximum number of * surface forms to keep for a single analyzed form. * When there are too many surface forms we discard the * lowest weighted ones. * @param maxGraphExpansions Maximum number of graph paths * to expand from the analyzed form. Set this to -1 for * no limit. * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} . * @param transpositions <code>true</code> if transpositions should be treated as a primitive * edit operation. If this is false, comparisons will implement the classic * Levenshtein algorithm. * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX} * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH}) */ public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength) { super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions); if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } if (nonFuzzyPrefix < 0) { throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")"); } if (minFuzzyLength < 0) { throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")"); } this.maxEdits = maxEdits; this.transpositions = transpositions; this.nonFuzzyPrefix = nonFuzzyPrefix; this.minFuzzyLength = minFuzzyLength; }