Java 类org.apache.lucene.util.automaton.CompiledAutomaton 实例源码

项目:lams    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance &&
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i, prefix);
      //System.out.println("compute automaton n=" + i);
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:lams    文件:MultiTerms.java   
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
  final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>();
  for(int i=0;i<subs.length;i++) {
    final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
    if (termsEnum != null) {
      termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
    }
  }

  if (termsEnums.size() > 0) {
    return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
  } else {
    return TermsEnum.EMPTY;
  }
}
项目:lams    文件:Terms.java   
/** Returns a TermsEnum that iterates over all terms that
 *  are accepted by the provided {@link
 *  CompiledAutomaton}.  If the <code>startTerm</code> is
 *  provided then the returned enum will only accept terms
 *  > <code>startTerm</code>, but you still must call
 *  next() first to get to the first term.  Note that the
 *  provided <code>startTerm</code> must be accepted by
 *  the automaton.
 *
 * <p><b>NOTE</b>: the returned TermsEnum cannot
 * seek</p>. */
public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
  // TODO: eventually we could support seekCeil/Exact on
  // the returned enum, instead of only being able to seek
  // at the start
  if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
    throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
  }
  if (startTerm == null) {
    return new AutomatonTermsEnum(iterator(null), compiled);
  } else {
    return new AutomatonTermsEnum(iterator(null), compiled) {
      @Override
      protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
        if (term == null) {
          term = startTerm;
        }
        return super.nextSeekTerm(term);
      }
    };
  }
}
项目:search    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance &&
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i, prefix);
      //System.out.println("compute automaton n=" + i);
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:search    文件:MultiTerms.java   
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
  final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>();
  for(int i=0;i<subs.length;i++) {
    final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
    if (termsEnum != null) {
      termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
    }
  }

  if (termsEnums.size() > 0) {
    return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
  } else {
    return TermsEnum.EMPTY;
  }
}
项目:search    文件:Terms.java   
/** Returns a TermsEnum that iterates over all terms that
 *  are accepted by the provided {@link
 *  CompiledAutomaton}.  If the <code>startTerm</code> is
 *  provided then the returned enum will only accept terms
 *  > <code>startTerm</code>, but you still must call
 *  next() first to get to the first term.  Note that the
 *  provided <code>startTerm</code> must be accepted by
 *  the automaton.
 *
 * <p><b>NOTE</b>: the returned TermsEnum cannot
 * seek</p>. */
public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
  // TODO: eventually we could support seekCeil/Exact on
  // the returned enum, instead of only being able to seek
  // at the start
  if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
    throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
  }
  if (startTerm == null) {
    return new AutomatonTermsEnum(iterator(null), compiled);
  } else {
    return new AutomatonTermsEnum(iterator(null), compiled) {
      @Override
      protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
        if (term == null) {
          term = startTerm;
        }
        return super.nextSeekTerm(term);
      }
    };
  }
}
项目:NYBC    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:NYBC    文件:MultiTerms.java   
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
  final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<MultiTermsEnum.TermsEnumIndex>();
  for(int i=0;i<subs.length;i++) {
    final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
    if (termsEnum != null) {
      termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
    }
  }

  if (termsEnums.size() > 0) {
    return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
  } else {
    return TermsEnum.EMPTY;
  }
}
项目:NYBC    文件:Terms.java   
/** Returns a TermsEnum that iterates over all terms that
 *  are accepted by the provided {@link
 *  CompiledAutomaton}.  If the <code>startTerm</code> is
 *  provided then the returned enum will only accept terms
 *  > <code>startTerm</code>, but you still must call
 *  next() first to get to the first term.  Note that the
 *  provided <code>startTerm</code> must be accepted by
 *  the automaton.
 *
 * <p><b>NOTE</b>: the returned TermsEnum cannot
 * seek</p>. */
public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
  // TODO: eventually we could support seekCeil/Exact on
  // the returned enum, instead of only being able to seek
  // at the start
  if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
    throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
  }
  if (startTerm == null) {
    return new AutomatonTermsEnum(iterator(null), compiled);
  } else {
    return new AutomatonTermsEnum(iterator(null), compiled) {
      @Override
      protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
        if (term == null) {
          term = startTerm;
        }
        return super.nextSeekTerm(term);
      }
    };
  }
}
项目:read-open-source-code    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:read-open-source-code    文件:MultiTerms.java   
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
  final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<MultiTermsEnum.TermsEnumIndex>();
  for(int i=0;i<subs.length;i++) {
    final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
    if (termsEnum != null) {
      termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
    }
  }

  if (termsEnums.size() > 0) {
    return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
  } else {
    return TermsEnum.EMPTY;
  }
}
项目:read-open-source-code    文件:Terms.java   
/** Returns a TermsEnum that iterates over all terms that
 *  are accepted by the provided {@link
 *  CompiledAutomaton}.  If the <code>startTerm</code> is
 *  provided then the returned enum will only accept terms
 *  > <code>startTerm</code>, but you still must call
 *  next() first to get to the first term.  Note that the
 *  provided <code>startTerm</code> must be accepted by
 *  the automaton.
 *
 * <p><b>NOTE</b>: the returned TermsEnum cannot
 * seek</p>. */
public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
  // TODO: eventually we could support seekCeil/Exact on
  // the returned enum, instead of only being able to seek
  // at the start
  if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
    throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
  }
  if (startTerm == null) {
    return new AutomatonTermsEnum(iterator(null), compiled);
  } else {
    return new AutomatonTermsEnum(iterator(null), compiled) {
      @Override
      protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
        if (term == null) {
          term = startTerm;
        }
        return super.nextSeekTerm(term);
      }
    };
  }
}
项目:read-open-source-code    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:read-open-source-code    文件:MultiTerms.java   
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
  final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<MultiTermsEnum.TermsEnumIndex>();
  for(int i=0;i<subs.length;i++) {
    final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
    if (termsEnum != null) {
      termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
    }
  }

  if (termsEnums.size() > 0) {
    return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
  } else {
    return TermsEnum.EMPTY;
  }
}
项目:read-open-source-code    文件:Terms.java   
/** Returns a TermsEnum that iterates over all terms that
 *  are accepted by the provided {@link
 *  CompiledAutomaton}.  If the <code>startTerm</code> is
 *  provided then the returned enum will only accept terms
 *  > <code>startTerm</code>, but you still must call
 *  next() first to get to the first term.  Note that the
 *  provided <code>startTerm</code> must be accepted by
 *  the automaton.
 *
 * <p><b>NOTE</b>: the returned TermsEnum cannot
 * seek</p>. */
public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
  // TODO: eventually we could support seekCeil/Exact on
  // the returned enum, instead of only being able to seek
  // at the start
  if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
    throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
  }
  if (startTerm == null) {
    return new AutomatonTermsEnum(iterator(null), compiled);
  } else {
    return new AutomatonTermsEnum(iterator(null), compiled) {
      @Override
      protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
        if (term == null) {
          term = startTerm;
        }
        return super.nextSeekTerm(term);
      }
    };
  }
}
项目:read-open-source-code    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance &&
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i, prefix);
      //System.out.println("compute automaton n=" + i);
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:read-open-source-code    文件:MultiTerms.java   
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
  final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>();
  for(int i=0;i<subs.length;i++) {
    final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
    if (termsEnum != null) {
      termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
    }
  }

  if (termsEnums.size() > 0) {
    return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
  } else {
    return TermsEnum.EMPTY;
  }
}
项目:read-open-source-code    文件:Terms.java   
/** Returns a TermsEnum that iterates over all terms that
 *  are accepted by the provided {@link
 *  CompiledAutomaton}.  If the <code>startTerm</code> is
 *  provided then the returned enum will only accept terms
 *  > <code>startTerm</code>, but you still must call
 *  next() first to get to the first term.  Note that the
 *  provided <code>startTerm</code> must be accepted by
 *  the automaton.
 *
 * <p><b>NOTE</b>: the returned TermsEnum cannot
 * seek</p>. */
public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
  // TODO: eventually we could support seekCeil/Exact on
  // the returned enum, instead of only being able to seek
  // at the start
  if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
    throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
  }
  if (startTerm == null) {
    return new AutomatonTermsEnum(iterator(null), compiled);
  } else {
    return new AutomatonTermsEnum(iterator(null), compiled) {
      @Override
      protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
        if (term == null) {
          term = startTerm;
        }
        return super.nextSeekTerm(term);
      }
    };
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:FuzzyTermsEnum.java   
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
项目:Maskana-Gestor-de-Conocimiento    文件:MultiTerms.java   
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
  final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<MultiTermsEnum.TermsEnumIndex>();
  for(int i=0;i<subs.length;i++) {
    final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
    if (termsEnum != null) {
      termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
    }
  }

  if (termsEnums.size() > 0) {
    return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
  } else {
    return TermsEnum.EMPTY;
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:Terms.java   
/** Returns a TermsEnum that iterates over all terms that
 *  are accepted by the provided {@link
 *  CompiledAutomaton}.  If the <code>startTerm</code> is
 *  provided then the returned enum will only accept terms
 *  > <code>startTerm</code>, but you still must call
 *  next() first to get to the first term.  Note that the
 *  provided <code>startTerm</code> must be accepted by
 *  the automaton.
 *
 * <p><b>NOTE</b>: the returned TermsEnum cannot
 * seek</p>. */
public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
  // TODO: eventually we could support seekCeil/Exact on
  // the returned enum, instead of only being able to seek
  // at the start
  if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
    throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
  }
  if (startTerm == null) {
    return new AutomatonTermsEnum(iterator(null), compiled);
  } else {
    return new AutomatonTermsEnum(iterator(null), compiled) {
      @Override
      protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
        if (term == null) {
          term = startTerm;
        }
        return super.nextSeekTerm(term);
      }
    };
  }
}
项目:lams    文件:FieldReader.java   
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
  if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
    throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
  }
  return new IntersectTermsEnum(this, compiled, startTerm);
}
项目:lams    文件:FuzzyTermsEnum.java   
/**
 * return an automata-based enum for matching up to editDistance from
 * lastTerm, if possible
 */
protected TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm)
    throws IOException {
  final List<CompiledAutomaton> runAutomata = initAutomata(editDistance);
  if (editDistance < runAutomata.size()) {
    //System.out.println("FuzzyTE.getAEnum: ed=" + editDistance + " lastTerm=" + (lastTerm==null ? "null" : lastTerm.utf8ToString()));
    final CompiledAutomaton compiled = runAutomata.get(editDistance);
    return new AutomatonFuzzyTermsEnum(terms.intersect(compiled, lastTerm == null ? null : compiled.floor(lastTerm, new BytesRefBuilder())),
                                       runAutomata.subList(0, editDistance + 1).toArray(new CompiledAutomaton[editDistance + 1]));
  } else {
    return null;
  }
}
项目:lams    文件:FuzzyTermsEnum.java   
public AutomatonFuzzyTermsEnum(TermsEnum tenum, CompiledAutomaton compiled[]) {
  super(tenum, false);
  this.matchers = new ByteRunAutomaton[compiled.length];
  for (int i = 0; i < compiled.length; i++)
    this.matchers[i] = compiled[i].runAutomaton;
  termRef = new BytesRef(term.text());
}
项目:lams    文件:FuzzyTermsEnum.java   
@Override
public void copyTo(AttributeImpl target) {
  final List<CompiledAutomaton> targetAutomata =
    ((LevenshteinAutomataAttribute) target).automata();
  targetAutomata.clear();
  targetAutomata.addAll(automata);
}
项目:lams    文件:AutomatonTermsEnum.java   
/**
 * Construct an enumerator based upon an automaton, enumerating the specified
 * field, working on a supplied TermsEnum
 * <p>
 * @lucene.experimental 
 * <p>
 * @param compiled CompiledAutomaton
 */
public AutomatonTermsEnum(TermsEnum tenum, CompiledAutomaton compiled) {
  super(tenum);
  this.finite = compiled.finite;
  this.runAutomaton = compiled.runAutomaton;
  assert this.runAutomaton != null;
  this.commonSuffixRef = compiled.commonSuffixRef;
  this.automaton = compiled.automaton;

  // used for path tracking, where each bit is a numbered state.
  visited = new long[runAutomaton.getSize()];

  termComp = getComparator();
}
项目:meltwater-elasticsearch-queries    文件:WildcardTermsProducer.java   
private TermsEnum wildcardEnumeration(final IndexReader reader)
        throws IOException {
    Terms terms = MultiFields.getTerms(reader, term.field());
    if(terms == null){
        return null;
    }
    return new AutomatonTermsEnum(
            terms.iterator(),
            new CompiledAutomaton(
                    WildcardQuery.toAutomaton(term),
                    false, false));
}
项目:search    文件:FSTTermsReader.java   
IntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
  super();
  //if (TEST) System.out.println("Enum init, startTerm=" + startTerm);
  this.fst = dict;
  this.fstReader = fst.getBytesReader();
  this.fstOutputs = dict.outputs;
  this.fsa = compiled.runAutomaton;
  this.level = -1;
  this.stack = new Frame[16];
  for (int i = 0 ; i < stack.length; i++) {
    this.stack[i] = new Frame();
  }

  Frame frame;
  frame = loadVirtualFrame(newFrame());
  this.level++;
  frame = loadFirstFrame(newFrame());
  pushFrame(frame);

  this.meta = null;
  this.metaUpto = 1;
  this.decoded = false;
  this.pending = false;

  if (startTerm == null) {
    pending = isAccept(topFrame());
  } else {
    doSeekCeil(startTerm);
    pending = (term == null || !startTerm.equals(term.get())) && isValid(topFrame()) && isAccept(topFrame());
  }
}
项目:search    文件:FSTOrdTermsReader.java   
IntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
  //if (TEST) System.out.println("Enum init, startTerm=" + startTerm);
  this.fst = index;
  this.fstReader = fst.getBytesReader();
  this.fstOutputs = index.outputs;
  this.fsa = compiled.runAutomaton;
  this.level = -1;
  this.stack = new Frame[16];
  for (int i = 0 ; i < stack.length; i++) {
    this.stack[i] = new Frame();
  }

  Frame frame;
  frame = loadVirtualFrame(newFrame());
  this.level++;
  frame = loadFirstFrame(newFrame());
  pushFrame(frame);

  this.decoded = false;
  this.pending = false;

  if (startTerm == null) {
    pending = isAccept(topFrame());
  } else {
    doSeekCeil(startTerm);
    pending = (term == null || !startTerm.equals(term.get())) && isValid(topFrame()) && isAccept(topFrame());
  }
}
项目:search    文件:OrdsFieldReader.java   
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
  if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
    throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
  }
  return new OrdsIntersectTermsEnum(this, compiled, startTerm);
}
项目:search    文件:AssertingAtomicReader.java   
@Override
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws IOException {
  TermsEnum termsEnum = in.intersect(automaton, bytes);
  assert termsEnum != null;
  assert bytes == null || bytes.isValid();
  return new AssertingTermsEnum(termsEnum);
}
项目:search    文件:LuceneTestCase.java   
/** 
 * Terms api equivalency 
 */
public void assertTermsEquals(String info, IndexReader leftReader, Terms leftTerms, Terms rightTerms, boolean deep) throws IOException {
  if (leftTerms == null || rightTerms == null) {
    assertNull(info, leftTerms);
    assertNull(info, rightTerms);
    return;
  }
  assertTermsStatisticsEquals(info, leftTerms, rightTerms);
  assertEquals(leftTerms.hasOffsets(), rightTerms.hasOffsets());
  assertEquals(leftTerms.hasPositions(), rightTerms.hasPositions());
  assertEquals(leftTerms.hasPayloads(), rightTerms.hasPayloads());

  TermsEnum leftTermsEnum = leftTerms.iterator(null);
  TermsEnum rightTermsEnum = rightTerms.iterator(null);
  assertTermsEnumEquals(info, leftReader, leftTermsEnum, rightTermsEnum, true);

  assertTermsSeekingEquals(info, leftTerms, rightTerms);

  if (deep) {
    int numIntersections = atLeast(3);
    for (int i = 0; i < numIntersections; i++) {
      String re = AutomatonTestUtil.randomRegexp(random());
      CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
      if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
        // TODO: test start term too
        TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
        TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
        assertTermsEnumEquals(info, leftReader, leftIntersection, rightIntersection, rarely());
      }
    }
  }
}
项目:search    文件:FieldReader.java   
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
  if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
    throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
  }
  return new IntersectTermsEnum(this, compiled, startTerm);
}
项目:search    文件:FuzzyTermsEnum.java   
/**
 * return an automata-based enum for matching up to editDistance from
 * lastTerm, if possible
 */
protected TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm)
    throws IOException {
  final List<CompiledAutomaton> runAutomata = initAutomata(editDistance);
  if (editDistance < runAutomata.size()) {
    //System.out.println("FuzzyTE.getAEnum: ed=" + editDistance + " lastTerm=" + (lastTerm==null ? "null" : lastTerm.utf8ToString()));
    final CompiledAutomaton compiled = runAutomata.get(editDistance);
    return new AutomatonFuzzyTermsEnum(terms.intersect(compiled, lastTerm == null ? null : compiled.floor(lastTerm, new BytesRefBuilder())),
                                       runAutomata.subList(0, editDistance + 1).toArray(new CompiledAutomaton[editDistance + 1]));
  } else {
    return null;
  }
}
项目:search    文件:FuzzyTermsEnum.java   
public AutomatonFuzzyTermsEnum(TermsEnum tenum, CompiledAutomaton compiled[]) {
  super(tenum, false);
  this.matchers = new ByteRunAutomaton[compiled.length];
  for (int i = 0; i < compiled.length; i++)
    this.matchers[i] = compiled[i].runAutomaton;
  termRef = new BytesRef(term.text());
}
项目:search    文件:FuzzyTermsEnum.java   
@Override
public void copyTo(AttributeImpl target) {
  final List<CompiledAutomaton> targetAutomata =
    ((LevenshteinAutomataAttribute) target).automata();
  targetAutomata.clear();
  targetAutomata.addAll(automata);
}
项目:search    文件:AutomatonTermsEnum.java   
/**
 * Construct an enumerator based upon an automaton, enumerating the specified
 * field, working on a supplied TermsEnum
 * <p>
 * @lucene.experimental 
 * <p>
 * @param compiled CompiledAutomaton
 */
public AutomatonTermsEnum(TermsEnum tenum, CompiledAutomaton compiled) {
  super(tenum);
  this.finite = compiled.finite;
  this.runAutomaton = compiled.runAutomaton;
  assert this.runAutomaton != null;
  this.commonSuffixRef = compiled.commonSuffixRef;
  this.automaton = compiled.automaton;

  // used for path tracking, where each bit is a numbered state.
  visited = new long[runAutomaton.getSize()];

  termComp = getComparator();
}
项目:search    文件:TestBlockPostingsFormat3.java   
public void assertTerms(Terms leftTerms, Terms rightTerms, boolean deep) throws Exception {
  if (leftTerms == null || rightTerms == null) {
    assertNull(leftTerms);
    assertNull(rightTerms);
    return;
  }
  assertTermsStatistics(leftTerms, rightTerms);

  // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different

  TermsEnum leftTermsEnum = leftTerms.iterator(null);
  TermsEnum rightTermsEnum = rightTerms.iterator(null);
  assertTermsEnum(leftTermsEnum, rightTermsEnum, true);

  assertTermsSeeking(leftTerms, rightTerms);

  if (deep) {
    int numIntersections = atLeast(3);
    for (int i = 0; i < numIntersections; i++) {
      String re = AutomatonTestUtil.randomRegexp(random());
      CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
      if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
        // TODO: test start term too
        TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
        TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
        assertTermsEnum(leftIntersection, rightIntersection, rarely());
      }
    }
  }
}
项目:search    文件:TestTermsEnum.java   
private boolean accepts(CompiledAutomaton c, BytesRef b) {
  int state = c.runAutomaton.getInitialState();
  for(int idx=0;idx<b.length;idx++) {
    assertTrue(state != -1);
    state = c.runAutomaton.step(state, b.bytes[b.offset+idx] & 0xff);
  }
  return c.runAutomaton.isAccept(state);
}
项目:NYBC    文件:AssertingAtomicReader.java   
@Override
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws IOException {
  TermsEnum termsEnum = super.intersect(automaton, bytes);
  assert termsEnum != null;
  assert bytes == null || bytes.isValid();
  return new AssertingTermsEnum(termsEnum);
}