Java 类org.apache.lucene.util.automaton.LevenshteinAutomata 实例源码

项目:lams    文件:FuzzyQuery.java   
/**
 * Create a new FuzzyQuery that will match terms with an edit distance 
 * of at most <code>maxEdits</code> to <code>term</code>.
 * If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
 * of that length is also required.
 * 
 * @param term the term to search for
 * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
 * @param prefixLength length of common (non-fuzzy) prefix
 * @param maxExpansions the maximum number of terms to match. If this number is
 *  greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, 
 *  then the maxClauseCount will be used instead.
 * @param transpositions true if transpositions should be treated as a primitive
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 */
public FuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, boolean transpositions) {
  super(term.field());

  if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
  if (prefixLength < 0) {
    throw new IllegalArgumentException("prefixLength cannot be negative.");
  }
  if (maxExpansions <= 0) {
    throw new IllegalArgumentException("maxExpansions must be positive.");
  }

  this.term = term;
  this.maxEdits = maxEdits;
  this.prefixLength = prefixLength;
  this.transpositions = transpositions;
  this.maxExpansions = maxExpansions;
  setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
}
项目:lams    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance &&
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i, prefix);
      //System.out.println("compute automaton n=" + i);
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:search    文件:TestSimpleQueryParser.java   
/** test a fuzzy query */
public void testFuzzy() throws Exception {
  Query regular = new TermQuery(new Term("field", "foobar"));
  Query expected = new FuzzyQuery(new Term("field", "foobar"), 2);

  assertEquals(expected, parse("foobar~2"));
  assertEquals(regular, parse("foobar~"));
  assertEquals(regular, parse("foobar~a"));
  assertEquals(regular, parse("foobar~1a"));

  BooleanQuery bool = new BooleanQuery();
  FuzzyQuery fuzzy = new FuzzyQuery(new Term("field", "foo"), LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  bool.add(fuzzy, Occur.MUST);
  bool.add(new TermQuery(new Term("field", "bar")), Occur.MUST);

  assertEquals(bool, parse("foo~" + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + 1 + " bar"));
}
项目:search    文件:FuzzyQuery.java   
/**
 * Create a new FuzzyQuery that will match terms with an edit distance 
 * of at most <code>maxEdits</code> to <code>term</code>.
 * If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
 * of that length is also required.
 * 
 * @param term the term to search for
 * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
 * @param prefixLength length of common (non-fuzzy) prefix
 * @param maxExpansions the maximum number of terms to match. If this number is
 *  greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, 
 *  then the maxClauseCount will be used instead.
 * @param transpositions true if transpositions should be treated as a primitive
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 */
public FuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, boolean transpositions) {
  super(term.field());

  if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
  if (prefixLength < 0) {
    throw new IllegalArgumentException("prefixLength cannot be negative.");
  }
  if (maxExpansions <= 0) {
    throw new IllegalArgumentException("maxExpansions must be positive.");
  }

  this.term = term;
  this.maxEdits = maxEdits;
  this.prefixLength = prefixLength;
  this.transpositions = transpositions;
  this.maxExpansions = maxExpansions;
  setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
}
项目:search    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance &&
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i, prefix);
      //System.out.println("compute automaton n=" + i);
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:NYBC    文件:FuzzyQuery.java   
/**
 * Create a new FuzzyQuery that will match terms with an edit distance 
 * of at most <code>maxEdits</code> to <code>term</code>.
 * If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
 * of that length is also required.
 * 
 * @param term the term to search for
 * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
 * @param prefixLength length of common (non-fuzzy) prefix
 * @param maxExpansions the maximum number of terms to match. If this number is
 *  greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, 
 *  then the maxClauseCount will be used instead.
 * @param transpositions true if transpositions should be treated as a primitive
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 */
public FuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, boolean transpositions) {
  super(term.field());

  if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
  if (prefixLength < 0) {
    throw new IllegalArgumentException("prefixLength cannot be negative.");
  }
  if (maxExpansions < 0) {
    throw new IllegalArgumentException("maxExpansions cannot be negative.");
  }

  this.term = term;
  this.maxEdits = maxEdits;
  this.prefixLength = prefixLength;
  this.transpositions = transpositions;
  this.maxExpansions = maxExpansions;
  setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
}
项目:NYBC    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:read-open-source-code    文件:FuzzyQuery.java   
/**
 * Create a new FuzzyQuery that will match terms with an edit distance 
 * of at most <code>maxEdits</code> to <code>term</code>.
 * If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
 * of that length is also required.
 * 
 * @param term the term to search for
 * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
 * @param prefixLength length of common (non-fuzzy) prefix
 * @param maxExpansions the maximum number of terms to match. If this number is
 *  greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, 
 *  then the maxClauseCount will be used instead.
 * @param transpositions true if transpositions should be treated as a primitive
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 */
public FuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, boolean transpositions) {
  super(term.field());

  if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
  if (prefixLength < 0) {
    throw new IllegalArgumentException("prefixLength cannot be negative.");
  }
  if (maxExpansions < 0) {
    throw new IllegalArgumentException("maxExpansions cannot be negative.");
  }

  this.term = term;
  this.maxEdits = maxEdits;
  this.prefixLength = prefixLength;
  this.transpositions = transpositions;
  this.maxExpansions = maxExpansions;
  setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
}
项目:read-open-source-code    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:read-open-source-code    文件:FuzzyQuery.java   
/**
 * Create a new FuzzyQuery that will match terms with an edit distance 
 * of at most <code>maxEdits</code> to <code>term</code>.
 * If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
 * of that length is also required.
 * 
 * @param term the term to search for
 * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
 * @param prefixLength length of common (non-fuzzy) prefix
 * @param maxExpansions the maximum number of terms to match. If this number is
 *  greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, 
 *  then the maxClauseCount will be used instead.
 * @param transpositions true if transpositions should be treated as a primitive
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 */
public FuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, boolean transpositions) {
  super(term.field());

  if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
  if (prefixLength < 0) {
    throw new IllegalArgumentException("prefixLength cannot be negative.");
  }
  if (maxExpansions < 0) {
    throw new IllegalArgumentException("maxExpansions cannot be negative.");
  }

  this.term = term;
  this.maxEdits = maxEdits;
  this.prefixLength = prefixLength;
  this.transpositions = transpositions;
  this.maxExpansions = maxExpansions;
  setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
}
项目:read-open-source-code    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:read-open-source-code    文件:FuzzyQuery.java   
/**
 * Create a new FuzzyQuery that will match terms with an edit distance 
 * of at most <code>maxEdits</code> to <code>term</code>.
 * If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
 * of that length is also required.
 * 
 * @param term the term to search for
 * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
 * @param prefixLength length of common (non-fuzzy) prefix
 * @param maxExpansions the maximum number of terms to match. If this number is
 *  greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, 
 *  then the maxClauseCount will be used instead.
 * @param transpositions true if transpositions should be treated as a primitive
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 */
public FuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, boolean transpositions) {
  super(term.field());

  if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
  if (prefixLength < 0) {
    throw new IllegalArgumentException("prefixLength cannot be negative.");
  }
  if (maxExpansions <= 0) {
    throw new IllegalArgumentException("maxExpansions must be positive.");
  }

  this.term = term;
  this.maxEdits = maxEdits;
  this.prefixLength = prefixLength;
  this.transpositions = transpositions;
  this.maxExpansions = maxExpansions;
  setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
}
项目:read-open-source-code    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance &&
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i, prefix);
      //System.out.println("compute automaton n=" + i);
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:Maskana-Gestor-de-Conocimiento    文件:FuzzyQuery.java   
/**
 * Create a new FuzzyQuery that will match terms with an edit distance 
 * of at most <code>maxEdits</code> to <code>term</code>.
 * If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
 * of that length is also required.
 * 
 * @param term the term to search for
 * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
 * @param prefixLength length of common (non-fuzzy) prefix
 * @param maxExpansions the maximum number of terms to match. If this number is
 *  greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, 
 *  then the maxClauseCount will be used instead.
 * @param transpositions true if transpositions should be treated as a primitive
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 */
public FuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, boolean transpositions) {
  super(term.field());

  if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
  if (prefixLength < 0) {
    throw new IllegalArgumentException("prefixLength cannot be negative.");
  }
  if (maxExpansions < 0) {
    throw new IllegalArgumentException("maxExpansions cannot be negative.");
  }

  this.term = term;
  this.maxEdits = maxEdits;
  this.prefixLength = prefixLength;
  this.transpositions = transpositions;
  this.maxExpansions = maxExpansions;
  setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
}
项目:Maskana-Gestor-de-Conocimiento    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:elasticsearch_my    文件:DirectCandidateGeneratorBuilder.java   
@Override
public PhraseSuggestionContext.DirectCandidateGenerator build(MapperService mapperService) throws IOException {
    PhraseSuggestionContext.DirectCandidateGenerator generator = new PhraseSuggestionContext.DirectCandidateGenerator();
    generator.setField(this.field);
    transferIfNotNull(this.size, generator::size);
    if (this.preFilter != null) {
        generator.preFilter(mapperService.getIndexAnalyzers().get(this.preFilter));
        if (generator.preFilter() == null) {
            throw new IllegalArgumentException("Analyzer [" + this.preFilter + "] doesn't exists");
        }
    }
    if (this.postFilter != null) {
        generator.postFilter(mapperService.getIndexAnalyzers().get(this.postFilter));
        if (generator.postFilter() == null) {
            throw new IllegalArgumentException("Analyzer [" + this.postFilter + "] doesn't exists");
        }
    }
    transferIfNotNull(this.accuracy, generator::accuracy);
    if (this.suggestMode != null) {
        generator.suggestMode(resolveSuggestMode(this.suggestMode));
    }
    if (this.sort != null) {
        generator.sort(SortBy.resolve(this.sort));
    }
    if (this.stringDistance != null) {
        generator.stringDistance(resolveDistance(this.stringDistance));
    }
    transferIfNotNull(this.maxEdits, generator::maxEdits);
    if (generator.maxEdits() < 1 || generator.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
        throw new IllegalArgumentException("Illegal max_edits value " + generator.maxEdits());
    }
    transferIfNotNull(this.maxInspections, generator::maxInspections);
    transferIfNotNull(this.maxTermFreq, generator::maxTermFreq);
    transferIfNotNull(this.prefixLength, generator::prefixLength);
    transferIfNotNull(this.minWordLength, generator::minWordLength);
    transferIfNotNull(this.minDocFreq, generator::minDocFreq);
    return generator;
}
项目:lams    文件:FuzzyQuery.java   
/**
 * Helper function to convert from deprecated "minimumSimilarity" fractions
 * to raw edit distances.
 * 
 * @param minimumSimilarity scaled similarity
 * @param termLen length (in unicode codepoints) of the term.
 * @return equivalent number of maxEdits
 * @deprecated pass integer edit distances instead.
 */
@Deprecated
public static int floatToEdits(float minimumSimilarity, int termLen) {
  if (minimumSimilarity >= 1f) {
    return (int) Math.min(minimumSimilarity, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  } else if (minimumSimilarity == 0.0f) {
    return 0; // 0 means exact, not infinite # of edits!
  } else {
    return Math.min((int) ((1D-minimumSimilarity) * termLen), 
      LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
}
项目:Elasticsearch    文件:SuggestUtils.java   
public static boolean parseDirectSpellcheckerSettings(XContentParser parser, String fieldName,
            DirectSpellcheckerSettings suggestion, ParseFieldMatcher parseFieldMatcher) throws IOException {
        if ("accuracy".equals(fieldName)) {
            suggestion.accuracy(parser.floatValue());
        } else if (parseFieldMatcher.match(fieldName, Fields.SUGGEST_MODE)) {
            suggestion.suggestMode(SuggestUtils.resolveSuggestMode(parser.text()));
        } else if ("sort".equals(fieldName)) {
            suggestion.sort(SuggestUtils.resolveSort(parser.text()));
        } else if (parseFieldMatcher.match(fieldName, Fields.STRING_DISTANCE)) {
        suggestion.stringDistance(SuggestUtils.resolveDistance(parser.text()));
        } else if (parseFieldMatcher.match(fieldName, Fields.MAX_EDITS)) {
        suggestion.maxEdits(parser.intValue());
            if (suggestion.maxEdits() < 1 || suggestion.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
                throw new IllegalArgumentException("Illegal max_edits value " + suggestion.maxEdits());
            }
        } else if (parseFieldMatcher.match(fieldName, Fields.MAX_INSPECTIONS)) {
        suggestion.maxInspections(parser.intValue());
        } else if (parseFieldMatcher.match(fieldName, Fields.MAX_TERM_FREQ)) {
        suggestion.maxTermFreq(parser.floatValue());
        } else if (parseFieldMatcher.match(fieldName, Fields.PREFIX_LENGTH)) {
        suggestion.prefixLength(parser.intValue());
        } else if (parseFieldMatcher.match(fieldName, Fields.MIN_WORD_LENGTH)) {
        suggestion.minQueryLength(parser.intValue());
        } else if (parseFieldMatcher.match(fieldName, Fields.MIN_DOC_FREQ)) {
        suggestion.minDocFreq(parser.floatValue());
        } else {
            return false;
        }
        return true;
}
项目:DoSeR-Disambiguation    文件:LearnToRankFuzzyQuery.java   
/**
 * Helper function to convert from deprecated "minimumSimilarity" fractions
 * to raw edit distances.
 * 
 * @param minimumSimilarity
 *            scaled similarity
 * @param termLen
 *            length (in unicode codepoints) of the term.
 * @return equivalent number of maxEdits
 * @deprecated pass integer edit distances instead.
 */
@Deprecated
public static int floatToEdits(final float minimumSimilarity,
        final int termLen) {
    if (minimumSimilarity >= 1f) {
        return (int) Math.min(minimumSimilarity,
                LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    } else if (minimumSimilarity == 0.0f) {
        return 0; // 0 means exact, not infinite # of edits!
    } else {
        return Math.min((int) ((1D - minimumSimilarity) * termLen),
                LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    }
}
项目:DoSeR-Disambiguation    文件:LearnToRankFuzzyQuery.java   
/**
 * Create a new FuzzyQuery that will match terms with an edit distance of at
 * most <code>maxEdits</code> to <code>term</code>. If a
 * <code>prefixLength</code> &gt; 0 is specified, a common prefix of that
 * length is also required.
 * 
 * @param term
 *            the term to search for
 * @param maxEdits
 *            must be >= 0 and <=
 *            {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
 * @param prefixLength
 *            length of common (non-fuzzy) prefix
 * @param maxExpansions
 *            the maximum number of terms to match. If this number is
 *            greater than {@link BooleanQuery#getMaxClauseCount} when the
 *            query is rewritten, then the maxClauseCount will be used
 *            instead.
 * @param transpositions
 *            true if transpositions should be treated as a primitive edit
 *            operation. If this is false, comparisons will implement the
 *            classic Levenshtein algorithm.
 */
public LearnToRankFuzzyQuery(final Term term, final int maxEdits,
        final int prefixLength, final int maxExpansions,
        final boolean transpositions, final Similarity sim) {
    super(term.field());

    if ((maxEdits < 0)
            || (maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)) {
        throw new IllegalArgumentException(
                "maxEdits must be between 0 and "
                        + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    }
    if (prefixLength < 0) {
        throw new IllegalArgumentException(
                "prefixLength cannot be negative.");
    }
    if (maxExpansions < 0) {
        throw new IllegalArgumentException(
                "maxExpansions cannot be negative.");
    }

    this.term = term;
    this.maxEdits = maxEdits;
    this.prefixLength = prefixLength;
    this.transpositions = transpositions;
    this.maxExpansions = maxExpansions;
    setRewriteMethod(new LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(
            maxExpansions, sim));
    // setRewriteMethod(new
    // LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(
    // maxExpansions));
}
项目:search    文件:FuzzyQuery.java   
/**
 * Helper function to convert from deprecated "minimumSimilarity" fractions
 * to raw edit distances.
 * 
 * @param minimumSimilarity scaled similarity
 * @param termLen length (in unicode codepoints) of the term.
 * @return equivalent number of maxEdits
 * @deprecated pass integer edit distances instead.
 */
@Deprecated
public static int floatToEdits(float minimumSimilarity, int termLen) {
  if (minimumSimilarity >= 1f) {
    return (int) Math.min(minimumSimilarity, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  } else if (minimumSimilarity == 0.0f) {
    return 0; // 0 means exact, not infinite # of edits!
  } else {
    return Math.min((int) ((1D-minimumSimilarity) * termLen), 
      LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
}
项目:DoSeR    文件:LearnToRankFuzzyQuery.java   
/**
     * Create a new FuzzyQuery that will match terms with an edit distance of at
     * most <code>maxEdits</code> to <code>term</code>. If a
     * <code>prefixLength</code> &gt; 0 is specified, a common prefix of that
     * length is also required.
     * 
     * @param term
     *            the term to search for
     * @param maxEdits
     *            must be >= 0 and <=
     *            {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
     * @param prefixLength
     *            length of common (non-fuzzy) prefix
     * @param maxExpansions
     *            the maximum number of terms to match. If this number is
     *            greater than {@link BooleanQuery#getMaxClauseCount} when the
     *            query is rewritten, then the maxClauseCount will be used
     *            instead.
     * @param transpositions
     *            true if transpositions should be treated as a primitive edit
     *            operation. If this is false, comparisons will implement the
     *            classic Levenshtein algorithm.
     */
    public LearnToRankFuzzyQuery(Term term, int maxEdits, int prefixLength,
            int maxExpansions, boolean transpositions, Similarity sim) {
        super(term.field());

        if (maxEdits < 0
                || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
            throw new IllegalArgumentException(
                    "maxEdits must be between 0 and "
                            + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
        }
        if (prefixLength < 0) {
            throw new IllegalArgumentException(
                    "prefixLength cannot be negative.");
        }
        if (maxExpansions < 0) {
            throw new IllegalArgumentException(
                    "maxExpansions cannot be negative.");
        }

        this.term = term;
        this.maxEdits = maxEdits;
        this.prefixLength = prefixLength;
        this.transpositions = transpositions;
        this.maxExpansions = maxExpansions;
        LearnToRankFuzzyQuery.sim = sim;
        setRewriteMethod(new LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(maxExpansions));
//      setRewriteMethod(new LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(
//              maxExpansions));
    }
项目:DoSeR    文件:LearnToRankFuzzyQuery.java   
/**
 * Helper function to convert from deprecated "minimumSimilarity" fractions
 * to raw edit distances.
 * 
 * @param minimumSimilarity
 *            scaled similarity
 * @param termLen
 *            length (in unicode codepoints) of the term.
 * @return equivalent number of maxEdits
 * @deprecated pass integer edit distances instead.
 */
@Deprecated
public static int floatToEdits(float minimumSimilarity, int termLen) {
    if (minimumSimilarity >= 1f) {
        return (int) Math.min(minimumSimilarity,
                LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    } else if (minimumSimilarity == 0.0f) {
        return 0; // 0 means exact, not infinite # of edits!
    } else { 
        return Math.min((int) ((1D - minimumSimilarity) * termLen),
                LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    }
}
项目:DoSeR    文件:LearnToRankFuzzyQuery.java   
/**
 * Helper function to convert from deprecated "minimumSimilarity" fractions
 * to raw edit distances.
 * 
 * @param minimumSimilarity
 *            scaled similarity
 * @param termLen
 *            length (in unicode codepoints) of the term.
 * @return equivalent number of maxEdits
 * @deprecated pass integer edit distances instead.
 */
@Deprecated
public static int floatToEdits(final float minimumSimilarity,
        final int termLen) {
    if (minimumSimilarity >= 1f) {
        return (int) Math.min(minimumSimilarity,
                LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    } else if (minimumSimilarity == 0.0f) {
        return 0; // 0 means exact, not infinite # of edits!
    } else {
        return Math.min((int) ((1D - minimumSimilarity) * termLen),
                LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    }
}
项目:DoSeR    文件:LearnToRankFuzzyQuery.java   
/**
 * Create a new FuzzyQuery that will match terms with an edit distance of at
 * most <code>maxEdits</code> to <code>term</code>. If a
 * <code>prefixLength</code> &gt; 0 is specified, a common prefix of that
 * length is also required.
 * 
 * @param term
 *            the term to search for
 * @param maxEdits
 *            must be >= 0 and <=
 *            {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
 * @param prefixLength
 *            length of common (non-fuzzy) prefix
 * @param maxExpansions
 *            the maximum number of terms to match. If this number is
 *            greater than {@link BooleanQuery#getMaxClauseCount} when the
 *            query is rewritten, then the maxClauseCount will be used
 *            instead.
 * @param transpositions
 *            true if transpositions should be treated as a primitive edit
 *            operation. If this is false, comparisons will implement the
 *            classic Levenshtein algorithm.
 */
public LearnToRankFuzzyQuery(final Term term, final int maxEdits,
        final int prefixLength, final int maxExpansions,
        final boolean transpositions, final Similarity sim) {
    super(term.field());

    if ((maxEdits < 0)
            || (maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)) {
        throw new IllegalArgumentException(
                "maxEdits must be between 0 and "
                        + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    }
    if (prefixLength < 0) {
        throw new IllegalArgumentException(
                "prefixLength cannot be negative.");
    }
    if (maxExpansions < 0) {
        throw new IllegalArgumentException(
                "maxExpansions cannot be negative.");
    }

    this.term = term;
    this.maxEdits = maxEdits;
    this.prefixLength = prefixLength;
    this.transpositions = transpositions;
    this.maxExpansions = maxExpansions;
    setRewriteMethod(new LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(
            maxExpansions, sim));
    // setRewriteMethod(new
    // LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(
    // maxExpansions));
}
项目:NYBC    文件:FuzzyQuery.java   
/**
 * Helper function to convert from deprecated "minimumSimilarity" fractions
 * to raw edit distances.
 * 
 * @param minimumSimilarity scaled similarity
 * @param termLen length (in unicode codepoints) of the term.
 * @return equivalent number of maxEdits
 * @deprecated pass integer edit distances instead.
 */
@Deprecated
public static int floatToEdits(float minimumSimilarity, int termLen) {
  if (minimumSimilarity >= 1f) {
    return (int) Math.min(minimumSimilarity, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  } else if (minimumSimilarity == 0.0f) {
    return 0; // 0 means exact, not infinite # of edits!
  } else {
    return Math.min((int) ((1D-minimumSimilarity) * termLen), 
      LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
}
项目:read-open-source-code    文件:FuzzyQuery.java   
/**
 * Helper function to convert from deprecated "minimumSimilarity" fractions
 * to raw edit distances.
 * 
 * @param minimumSimilarity scaled similarity
 * @param termLen length (in unicode codepoints) of the term.
 * @return equivalent number of maxEdits
 * @deprecated pass integer edit distances instead.
 */
@Deprecated
public static int floatToEdits(float minimumSimilarity, int termLen) {
  if (minimumSimilarity >= 1f) {
    return (int) Math.min(minimumSimilarity, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  } else if (minimumSimilarity == 0.0f) {
    return 0; // 0 means exact, not infinite # of edits!
  } else {
    return Math.min((int) ((1D-minimumSimilarity) * termLen), 
      LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
}
项目:read-open-source-code    文件:FuzzyQuery.java   
/**
 * Helper function to convert from deprecated "minimumSimilarity" fractions
 * to raw edit distances.
 * 
 * @param minimumSimilarity scaled similarity
 * @param termLen length (in unicode codepoints) of the term.
 * @return equivalent number of maxEdits
 * @deprecated pass integer edit distances instead.
 */
@Deprecated
public static int floatToEdits(float minimumSimilarity, int termLen) {
  if (minimumSimilarity >= 1f) {
    return (int) Math.min(minimumSimilarity, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  } else if (minimumSimilarity == 0.0f) {
    return 0; // 0 means exact, not infinite # of edits!
  } else {
    return Math.min((int) ((1D-minimumSimilarity) * termLen), 
      LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
}
项目:read-open-source-code    文件:FuzzyQuery.java   
/**
 * Helper function to convert from deprecated "minimumSimilarity" fractions
 * to raw edit distances.
 * 
 * @param minimumSimilarity scaled similarity
 * @param termLen length (in unicode codepoints) of the term.
 * @return equivalent number of maxEdits
 * @deprecated pass integer edit distances instead.
 */
@Deprecated
public static int floatToEdits(float minimumSimilarity, int termLen) {
  if (minimumSimilarity >= 1f) {
    return (int) Math.min(minimumSimilarity, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  } else if (minimumSimilarity == 0.0f) {
    return 0; // 0 means exact, not infinite # of edits!
  } else {
    return Math.min((int) ((1D-minimumSimilarity) * termLen), 
      LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:FuzzyQuery.java   
/**
 * Helper function to convert from deprecated "minimumSimilarity" fractions
 * to raw edit distances.
 * 
 * @param minimumSimilarity scaled similarity
 * @param termLen length (in unicode codepoints) of the term.
 * @return equivalent number of maxEdits
 * @deprecated pass integer edit distances instead.
 */
@Deprecated
public static int floatToEdits(float minimumSimilarity, int termLen) {
  if (minimumSimilarity >= 1f) {
    return (int) Math.min(minimumSimilarity, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  } else if (minimumSimilarity == 0.0f) {
    return 0; // 0 means exact, not infinite # of edits!
  } else {
    return Math.min((int) ((1D-minimumSimilarity) * termLen), 
      LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
}
项目:elasticsearch_my    文件:XFuzzySuggester.java   
/**
 * Creates a {@link FuzzySuggester} instance.
 *
 * @param indexAnalyzer Analyzer that will be used for
 *        analyzing suggestions while building the index.
 * @param queryAnalyzer Analyzer that will be used for
 *        analyzing query text during lookup
 * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
 * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
 *        surface forms to keep for a single analyzed form.
 *        When there are too many surface forms we discard the
 *        lowest weighted ones.
 * @param maxGraphExpansions Maximum number of graph paths
 *        to expand from the analyzed form.  Set this to -1 for
 *        no limit.
 * @param maxEdits must be &gt;= 0 and &lt;= {@link org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
 * @param transpositions <code>true</code> if transpositions should be treated as a primitive
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
 * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
 * @param sepLabel separation label
 * @param payloadSep payload separator byte
 * @param endByte end byte marker byte
 */
public XFuzzySuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer,
                       int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
                       int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength,
                       boolean unicodeAware, FST<PairOutputs.Pair<Long, BytesRef>> fst, boolean hasPayloads,
                       int maxAnalyzedPathsForOneInput, int sepLabel, int payloadSep, int endByte, int holeCharacter) {
    super(indexAnalyzer, queryPrefix, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions,
        true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter);
    if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
        throw new IllegalArgumentException(
            "maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    }
    if (nonFuzzyPrefix < 0) {
        throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
    }
    if (minFuzzyLength < 0) {
        throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
    }

    this.maxEdits = maxEdits;
    this.transpositions = transpositions;
    this.nonFuzzyPrefix = nonFuzzyPrefix;
    this.minFuzzyLength = minFuzzyLength;
    this.unicodeAware = unicodeAware;
}
项目:lams    文件:FuzzyTermsEnum.java   
/**
 * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
 * length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
 * <code>minSimilarity</code>.
 * <p>
 * After calling the constructor the enumeration is already pointing to the first 
 * valid term if such a term exists. 
 * 
 * @param terms Delivers terms.
 * @param atts {@link AttributeSource} created by the rewrite method of {@link MultiTermQuery}
 * thats contains information about competitive boosts during rewrite. It is also used
 * to cache DFAs between segment transitions.
 * @param term Pattern term.
 * @param minSimilarity Minimum required similarity for terms from the reader. Pass an integer value
 *        representing edit distance. Passing a fraction is deprecated.
 * @param prefixLength Length of required common prefix. Default value is 0.
 * @throws IOException if there is a low-level IO error
 */
public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, 
    final float minSimilarity, final int prefixLength, boolean transpositions) throws IOException {
  if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
    throw new IllegalArgumentException("fractional edit distances are not allowed");
  if (minSimilarity < 0.0f)
    throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
  if(prefixLength < 0)
    throw new IllegalArgumentException("prefixLength cannot be less than 0");
  this.terms = terms;
  this.term = term;

  // convert the string into a utf32 int[] representation for fast comparisons
  final String utf16 = term.text();
  this.termText = new int[utf16.codePointCount(0, utf16.length())];
  for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
         termText[j++] = cp = utf16.codePointAt(i);
  this.termLength = termText.length;
  this.dfaAtt = atts.addAttribute(LevenshteinAutomataAttribute.class);

  //The prefix could be longer than the word.
  //It's kind of silly though.  It means we must match the entire word.
  this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
  // if minSimilarity >= 1, we treat it as number of edits
  if (minSimilarity >= 1f) {
    this.minSimilarity = 0; // just driven by number of edits
    maxEdits = (int) minSimilarity;
    raw = true;
  } else {
    this.minSimilarity = minSimilarity;
    // calculate the maximum k edits for this similarity
    maxEdits = initialMaxDistance(this.minSimilarity, termLength);
    raw = false;
  }
  if (transpositions && maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new UnsupportedOperationException("with transpositions enabled, distances > " 
      + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported ");
  }
  this.transpositions = transpositions;
  this.scale_factor = 1.0f / (1.0f - this.minSimilarity);

  this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
  bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
  bottomTerm = maxBoostAtt.getCompetitiveTerm();
  bottomChanged(null, true);
}
项目:search    文件:FuzzySuggester.java   
/**
 * Creates a {@link FuzzySuggester} instance.
 * 
 * @param indexAnalyzer Analyzer that will be used for
 *        analyzing suggestions while building the index.
 * @param queryAnalyzer Analyzer that will be used for
 *        analyzing query text during lookup
 * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
 * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
 *        surface forms to keep for a single analyzed form.
 *        When there are too many surface forms we discard the
 *        lowest weighted ones.
 * @param maxGraphExpansions Maximum number of graph paths
 *        to expand from the analyzed form.  Set this to -1 for
 *        no limit.
 * @param preservePositionIncrements Whether position holes should appear in the automaton
 * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
 * @param transpositions <code>true</code> if transpositions should be treated as a primitive 
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
 * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
 * @param unicodeAware operate Unicode code points instead of bytes.
 */
public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer,
                      int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
                      boolean preservePositionIncrements, int maxEdits, boolean transpositions,
                      int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware) {
  super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements);
  if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
  if (nonFuzzyPrefix < 0) {
    throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
  }
  if (minFuzzyLength < 0) {
    throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
  }

  this.maxEdits = maxEdits;
  this.transpositions = transpositions;
  this.nonFuzzyPrefix = nonFuzzyPrefix;
  this.minFuzzyLength = minFuzzyLength;
  this.unicodeAware = unicodeAware;
}
项目:search    文件:FuzzyTermsEnum.java   
/**
 * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
 * length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
 * <code>minSimilarity</code>.
 * <p>
 * After calling the constructor the enumeration is already pointing to the first 
 * valid term if such a term exists. 
 * 
 * @param terms Delivers terms.
 * @param atts {@link AttributeSource} created by the rewrite method of {@link MultiTermQuery}
 * thats contains information about competitive boosts during rewrite. It is also used
 * to cache DFAs between segment transitions.
 * @param term Pattern term.
 * @param minSimilarity Minimum required similarity for terms from the reader. Pass an integer value
 *        representing edit distance. Passing a fraction is deprecated.
 * @param prefixLength Length of required common prefix. Default value is 0.
 * @throws IOException if there is a low-level IO error
 */
public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, 
    final float minSimilarity, final int prefixLength, boolean transpositions) throws IOException {
  if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
    throw new IllegalArgumentException("fractional edit distances are not allowed");
  if (minSimilarity < 0.0f)
    throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
  if(prefixLength < 0)
    throw new IllegalArgumentException("prefixLength cannot be less than 0");
  this.terms = terms;
  this.term = term;

  // convert the string into a utf32 int[] representation for fast comparisons
  final String utf16 = term.text();
  this.termText = new int[utf16.codePointCount(0, utf16.length())];
  for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
         termText[j++] = cp = utf16.codePointAt(i);
  this.termLength = termText.length;
  this.dfaAtt = atts.addAttribute(LevenshteinAutomataAttribute.class);

  //The prefix could be longer than the word.
  //It's kind of silly though.  It means we must match the entire word.
  this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
  // if minSimilarity >= 1, we treat it as number of edits
  if (minSimilarity >= 1f) {
    this.minSimilarity = 0; // just driven by number of edits
    maxEdits = (int) minSimilarity;
    raw = true;
  } else {
    this.minSimilarity = minSimilarity;
    // calculate the maximum k edits for this similarity
    maxEdits = initialMaxDistance(this.minSimilarity, termLength);
    raw = false;
  }
  if (transpositions && maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new UnsupportedOperationException("with transpositions enabled, distances > " 
      + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported ");
  }
  this.transpositions = transpositions;
  this.scale_factor = 1.0f / (1.0f - this.minSimilarity);

  this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
  bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
  bottomTerm = maxBoostAtt.getCompetitiveTerm();
  bottomChanged(null, true);
}
项目:NYBC    文件:FuzzySuggester.java   
/**
 * Creates a {@link FuzzySuggester} instance.
 * 
 * @param indexAnalyzer Analyzer that will be used for
 *        analyzing suggestions while building the index.
 * @param queryAnalyzer Analyzer that will be used for
 *        analyzing query text during lookup
 * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
 * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
 *        surface forms to keep for a single analyzed form.
 *        When there are too many surface forms we discard the
 *        lowest weighted ones.
 * @param maxGraphExpansions Maximum number of graph paths
 *        to expand from the analyzed form.  Set this to -1 for
 *        no limit.
 * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
 * @param transpositions <code>true</code> if transpositions should be treated as a primitive 
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
 * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
 */
public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer,
                      int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
                      int maxEdits, boolean transpositions, int nonFuzzyPrefix,
                      int minFuzzyLength) {
  super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions);
  if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
  if (nonFuzzyPrefix < 0) {
    throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
  }
  if (minFuzzyLength < 0) {
    throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
  }

  this.maxEdits = maxEdits;
  this.transpositions = transpositions;
  this.nonFuzzyPrefix = nonFuzzyPrefix;
  this.minFuzzyLength = minFuzzyLength;
}
项目:NYBC    文件:FuzzyTermsEnum.java   
/**
 * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
 * length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
 * <code>minSimilarity</code>.
 * <p>
 * After calling the constructor the enumeration is already pointing to the first 
 * valid term if such a term exists. 
 * 
 * @param terms Delivers terms.
 * @param atts {@link AttributeSource} created by the rewrite method of {@link MultiTermQuery}
 * thats contains information about competitive boosts during rewrite. It is also used
 * to cache DFAs between segment transitions.
 * @param term Pattern term.
 * @param minSimilarity Minimum required similarity for terms from the reader. Pass an integer value
 *        representing edit distance. Passing a fraction is deprecated.
 * @param prefixLength Length of required common prefix. Default value is 0.
 * @throws IOException if there is a low-level IO error
 */
public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, 
    final float minSimilarity, final int prefixLength, boolean transpositions) throws IOException {
  if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
    throw new IllegalArgumentException("fractional edit distances are not allowed");
  if (minSimilarity < 0.0f)
    throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
  if(prefixLength < 0)
    throw new IllegalArgumentException("prefixLength cannot be less than 0");
  this.terms = terms;
  this.term = term;

  // convert the string into a utf32 int[] representation for fast comparisons
  final String utf16 = term.text();
  this.termText = new int[utf16.codePointCount(0, utf16.length())];
  for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
         termText[j++] = cp = utf16.codePointAt(i);
  this.termLength = termText.length;
  this.dfaAtt = atts.addAttribute(LevenshteinAutomataAttribute.class);

  //The prefix could be longer than the word.
  //It's kind of silly though.  It means we must match the entire word.
  this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
  // if minSimilarity >= 1, we treat it as number of edits
  if (minSimilarity >= 1f) {
    this.minSimilarity = 0; // just driven by number of edits
    maxEdits = (int) minSimilarity;
    raw = true;
  } else {
    this.minSimilarity = minSimilarity;
    // calculate the maximum k edits for this similarity
    maxEdits = initialMaxDistance(this.minSimilarity, termLength);
    raw = false;
  }
  if (transpositions && maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new UnsupportedOperationException("with transpositions enabled, distances > " 
      + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported ");
  }
  this.transpositions = transpositions;
  this.scale_factor = 1.0f / (1.0f - this.minSimilarity);

  this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
  bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
  bottomTerm = maxBoostAtt.getCompetitiveTerm();
  bottomChanged(null, true);
}
项目:read-open-source-code    文件:FuzzySuggester.java   
/**
 * Creates a {@link FuzzySuggester} instance.
 * 
 * @param indexAnalyzer Analyzer that will be used for
 *        analyzing suggestions while building the index.
 * @param queryAnalyzer Analyzer that will be used for
 *        analyzing query text during lookup
 * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
 * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
 *        surface forms to keep for a single analyzed form.
 *        When there are too many surface forms we discard the
 *        lowest weighted ones.
 * @param maxGraphExpansions Maximum number of graph paths
 *        to expand from the analyzed form.  Set this to -1 for
 *        no limit.
 * @param preservePositionIncrements Whether position holes should appear in the automaton
 * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
 * @param transpositions <code>true</code> if transpositions should be treated as a primitive 
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
 * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
 * @param unicodeAware operate Unicode code points instead of bytes.
 */
public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer,
                      int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
                      boolean preservePositionIncrements, int maxEdits, boolean transpositions,
                      int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware) {
  super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements);
  if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
  if (nonFuzzyPrefix < 0) {
    throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
  }
  if (minFuzzyLength < 0) {
    throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
  }

  this.maxEdits = maxEdits;
  this.transpositions = transpositions;
  this.nonFuzzyPrefix = nonFuzzyPrefix;
  this.minFuzzyLength = minFuzzyLength;
  this.unicodeAware = unicodeAware;
}
项目:read-open-source-code    文件:FuzzyTermsEnum.java   
/**
 * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
 * length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
 * <code>minSimilarity</code>.
 * <p>
 * After calling the constructor the enumeration is already pointing to the first 
 * valid term if such a term exists. 
 * 
 * @param terms Delivers terms.
 * @param atts {@link AttributeSource} created by the rewrite method of {@link MultiTermQuery}
 * thats contains information about competitive boosts during rewrite. It is also used
 * to cache DFAs between segment transitions.
 * @param term Pattern term.
 * @param minSimilarity Minimum required similarity for terms from the reader. Pass an integer value
 *        representing edit distance. Passing a fraction is deprecated.
 * @param prefixLength Length of required common prefix. Default value is 0.
 * @throws IOException if there is a low-level IO error
 */
public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, 
    final float minSimilarity, final int prefixLength, boolean transpositions) throws IOException {
  if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
    throw new IllegalArgumentException("fractional edit distances are not allowed");
  if (minSimilarity < 0.0f)
    throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
  if(prefixLength < 0)
    throw new IllegalArgumentException("prefixLength cannot be less than 0");
  this.terms = terms;
  this.term = term;

  // convert the string into a utf32 int[] representation for fast comparisons
  final String utf16 = term.text();
  this.termText = new int[utf16.codePointCount(0, utf16.length())];
  for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
         termText[j++] = cp = utf16.codePointAt(i);
  this.termLength = termText.length;
  this.dfaAtt = atts.addAttribute(LevenshteinAutomataAttribute.class);

  //The prefix could be longer than the word.
  //It's kind of silly though.  It means we must match the entire word.
  this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
  // if minSimilarity >= 1, we treat it as number of edits
  if (minSimilarity >= 1f) {
    this.minSimilarity = 0; // just driven by number of edits
    maxEdits = (int) minSimilarity;
    raw = true;
  } else {
    this.minSimilarity = minSimilarity;
    // calculate the maximum k edits for this similarity
    maxEdits = initialMaxDistance(this.minSimilarity, termLength);
    raw = false;
  }
  if (transpositions && maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new UnsupportedOperationException("with transpositions enabled, distances > " 
      + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported ");
  }
  this.transpositions = transpositions;
  this.scale_factor = 1.0f / (1.0f - this.minSimilarity);

  this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
  bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
  bottomTerm = maxBoostAtt.getCompetitiveTerm();
  bottomChanged(null, true);
}
项目:read-open-source-code    文件:FuzzySuggester.java   
/**
 * Creates a {@link FuzzySuggester} instance.
 * 
 * @param indexAnalyzer Analyzer that will be used for
 *        analyzing suggestions while building the index.
 * @param queryAnalyzer Analyzer that will be used for
 *        analyzing query text during lookup
 * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
 * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
 *        surface forms to keep for a single analyzed form.
 *        When there are too many surface forms we discard the
 *        lowest weighted ones.
 * @param maxGraphExpansions Maximum number of graph paths
 *        to expand from the analyzed form.  Set this to -1 for
 *        no limit.
 * @param preservePositionIncrements Whether position holes should appear in the automaton
 * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
 * @param transpositions <code>true</code> if transpositions should be treated as a primitive 
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
 * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
 * @param unicodeAware operate Unicode code points instead of bytes.
 */
public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer,
                      int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
                      boolean preservePositionIncrements, int maxEdits, boolean transpositions,
                      int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware) {
  super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements);
  if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
  }
  if (nonFuzzyPrefix < 0) {
    throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
  }
  if (minFuzzyLength < 0) {
    throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
  }

  this.maxEdits = maxEdits;
  this.transpositions = transpositions;
  this.nonFuzzyPrefix = nonFuzzyPrefix;
  this.minFuzzyLength = minFuzzyLength;
  this.unicodeAware = unicodeAware;
}
项目:read-open-source-code    文件:FuzzyTermsEnum.java   
/**
 * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
 * length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
 * <code>minSimilarity</code>.
 * <p>
 * After calling the constructor the enumeration is already pointing to the first 
 * valid term if such a term exists. 
 * 
 * @param terms Delivers terms.
 * @param atts {@link AttributeSource} created by the rewrite method of {@link MultiTermQuery}
 * thats contains information about competitive boosts during rewrite. It is also used
 * to cache DFAs between segment transitions.
 * @param term Pattern term.
 * @param minSimilarity Minimum required similarity for terms from the reader. Pass an integer value
 *        representing edit distance. Passing a fraction is deprecated.
 * @param prefixLength Length of required common prefix. Default value is 0.
 * @throws IOException if there is a low-level IO error
 */
public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, 
    final float minSimilarity, final int prefixLength, boolean transpositions) throws IOException {
  if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
    throw new IllegalArgumentException("fractional edit distances are not allowed");
  if (minSimilarity < 0.0f)
    throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
  if(prefixLength < 0)
    throw new IllegalArgumentException("prefixLength cannot be less than 0");
  this.terms = terms;
  this.term = term;

  // convert the string into a utf32 int[] representation for fast comparisons
  final String utf16 = term.text();
  this.termText = new int[utf16.codePointCount(0, utf16.length())];
  for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
         termText[j++] = cp = utf16.codePointAt(i);
  this.termLength = termText.length;
  this.dfaAtt = atts.addAttribute(LevenshteinAutomataAttribute.class);

  //The prefix could be longer than the word.
  //It's kind of silly though.  It means we must match the entire word.
  this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
  // if minSimilarity >= 1, we treat it as number of edits
  if (minSimilarity >= 1f) {
    this.minSimilarity = 0; // just driven by number of edits
    maxEdits = (int) minSimilarity;
    raw = true;
  } else {
    this.minSimilarity = minSimilarity;
    // calculate the maximum k edits for this similarity
    maxEdits = initialMaxDistance(this.minSimilarity, termLength);
    raw = false;
  }
  if (transpositions && maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    throw new UnsupportedOperationException("with transpositions enabled, distances > " 
      + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported ");
  }
  this.transpositions = transpositions;
  this.scale_factor = 1.0f / (1.0f - this.minSimilarity);

  this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
  bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
  bottomTerm = maxBoostAtt.getCompetitiveTerm();
  bottomChanged(null, true);
}