Java 类org.apache.lucene.analysis.TokenStreamToAutomaton 实例源码

项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
final Automaton toAutomaton(TokenStream ts, final TokenStreamToAutomaton ts2a) throws IOException {
    // Create corresponding automaton: labels are bytes
    // from each analyzed token, with byte 0 used as
    // separator between tokens:
    Automaton automaton = ts2a.toAutomaton(ts);

    automaton = replaceSep(automaton);
    automaton = convertAutomaton(automaton);

    // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
    // assert SpecialOperations.isFinite(automaton);

    // Get all paths from the automaton (there can be
    // more than one path, eg if the analyzer created a
    // graph using SynFilter or WDF):

    return automaton;
}
项目:Elasticsearch    文件:XAnalyzingSuggester.java   
final Automaton toAutomaton(TokenStream ts, final TokenStreamToAutomaton ts2a) throws IOException {
    // Create corresponding automaton: labels are bytes
    // from each analyzed token, with byte 0 used as
    // separator between tokens:
    Automaton automaton = ts2a.toAutomaton(ts);

    automaton = replaceSep(automaton);
    automaton = convertAutomaton(automaton);

    // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
    // assert SpecialOperations.isFinite(automaton);

    // Get all paths from the automaton (there can be
    // more than one path, eg if the analyzer created a
    // graph using SynFilter or WDF):

    return automaton;
}
项目:NYBC    文件:AnalyzingSuggester.java   
final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
// Analyze surface form:
   TokenStream ts = indexAnalyzer.tokenStream("", new StringReader(surfaceForm.utf8ToString()));

   // Create corresponding automaton: labels are bytes
   // from each analyzed token, with byte 0 used as
   // separator between tokens:
   Automaton automaton = ts2a.toAutomaton(ts);
   ts.end();
   ts.close();

   replaceSep(automaton);

   assert SpecialOperations.isFinite(automaton);

   // Get all paths from the automaton (there can be
   // more than one path, eg if the analyzer created a
   // graph using SynFilter or WDF):

   // TODO: we could walk & add simultaneously, so we
   // don't have to alloc [possibly biggish]
   // intermediate HashSet in RAM:
   return SpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
 }
项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
    final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
    Automaton automaton;
    try (TokenStream ts = stream) {
        automaton = toAutomaton(ts, ts2a);
    }
    LimitedFiniteStringsIterator finiteStrings =
            new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
    Set<IntsRef> set = new HashSet<>();
    for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) {
        set.add(IntsRef.deepCopyOf(string));
    }
    return Collections.unmodifiableSet(set);
}
项目:elasticsearch_my    文件:CompletionSuggestSearchIT.java   
public static boolean isReservedChar(char c) {
    switch (c) {
        case '\u001F':
        case TokenStreamToAutomaton.HOLE:
        case 0x0:
        case ContextSuggestField.CONTEXT_SEPARATOR:
            return true;
        default:
            return false;
    }
}
项目:Elasticsearch    文件:XAnalyzingSuggester.java   
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
    final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
    Automaton automaton;
    try (TokenStream ts = stream) {
        automaton = toAutomaton(ts, ts2a);
    }
    LimitedFiniteStringsIterator finiteStrings =
            new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
    Set<IntsRef> set = new HashSet<>();
    for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) {
        set.add(IntsRef.deepCopyOf(string));
    }
    return Collections.unmodifiableSet(set);
}
项目:search    文件:AnalyzingSuggester.java   
final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
  // Analyze surface form:
  Automaton automaton = null;
  TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString());
  try {

    // Create corresponding automaton: labels are bytes
    // from each analyzed token, with byte 0 used as
    // separator between tokens:
    automaton = ts2a.toAutomaton(ts);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }

  automaton = replaceSep(automaton);
  automaton = convertAutomaton(automaton);

  // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
  // assert SpecialOperations.isFinite(automaton);

  // Get all paths from the automaton (there can be
  // more than one path, eg if the analyzer created a
  // graph using SynFilter or WDF):

  // TODO: we could walk & add simultaneously, so we
  // don't have to alloc [possibly biggish]
  // intermediate HashSet in RAM:

  return Operations.getFiniteStrings(automaton, maxGraphExpansions);
}
项目:NYBC    文件:AnalyzingSuggester.java   
TokenStreamToAutomaton getTokenStreamToAutomaton() {
  if (preserveSep) {
    return new EscapingTokenStreamToAutomaton();
  } else {
    // When we're not preserving sep, we don't steal 0xff
    // byte, so we don't need to do any escaping:
    return new TokenStreamToAutomaton();
  }
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
  // Analyze surface form:
  Automaton automaton = null;
  TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString());
  try {

    // Create corresponding automaton: labels are bytes
    // from each analyzed token, with byte 0 used as
    // separator between tokens:
    automaton = ts2a.toAutomaton(ts);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }

  replaceSep(automaton);
  automaton = convertAutomaton(automaton);

  assert SpecialOperations.isFinite(automaton);

  // Get all paths from the automaton (there can be
  // more than one path, eg if the analyzer created a
  // graph using SynFilter or WDF):

  // TODO: we could walk & add simultaneously, so we
  // don't have to alloc [possibly biggish]
  // intermediate HashSet in RAM:
  return SpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
  // Analyze surface form:
  Automaton automaton = null;
  TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString());
  try {

    // Create corresponding automaton: labels are bytes
    // from each analyzed token, with byte 0 used as
    // separator between tokens:
    automaton = ts2a.toAutomaton(ts);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }

  replaceSep(automaton);
  automaton = convertAutomaton(automaton);

  assert SpecialOperations.isFinite(automaton);

  // Get all paths from the automaton (there can be
  // more than one path, eg if the analyzer created a
  // graph using SynFilter or WDF):

  // TODO: we could walk & add simultaneously, so we
  // don't have to alloc [possibly biggish]
  // intermediate HashSet in RAM:
  return SpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
  // Analyze surface form:
  Automaton automaton = null;
  TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString());
  try {

    // Create corresponding automaton: labels are bytes
    // from each analyzed token, with byte 0 used as
    // separator between tokens:
    automaton = ts2a.toAutomaton(ts);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }

  automaton = replaceSep(automaton);
  automaton = convertAutomaton(automaton);

  // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
  // assert SpecialOperations.isFinite(automaton);

  // Get all paths from the automaton (there can be
  // more than one path, eg if the analyzer created a
  // graph using SynFilter or WDF):

  // TODO: we could walk & add simultaneously, so we
  // don't have to alloc [possibly biggish]
  // intermediate HashSet in RAM:

  return Operations.getFiniteStrings(automaton, maxGraphExpansions);
}
项目:Maskana-Gestor-de-Conocimiento    文件:AnalyzingSuggester.java   
final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
  // Analyze surface form:
  Automaton automaton = null;
  TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString());
  try {

    // Create corresponding automaton: labels are bytes
    // from each analyzed token, with byte 0 used as
    // separator between tokens:
    automaton = ts2a.toAutomaton(ts);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }

  replaceSep(automaton);
  automaton = convertAutomaton(automaton);

  assert SpecialOperations.isFinite(automaton);

  // Get all paths from the automaton (there can be
  // more than one path, eg if the analyzer created a
  // graph using SynFilter or WDF):

  // TODO: we could walk & add simultaneously, so we
  // don't have to alloc [possibly biggish]
  // intermediate HashSet in RAM:
  return SpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
}
项目:elasticsearch_my    文件:XFuzzySuggester.java   
@Override
public TokenStreamToAutomaton getTokenStreamToAutomaton() {
  final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton();
  tsta.setUnicodeArcs(unicodeAware);
  return tsta;
}
项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
final Automaton toAutomaton(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
    try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) {
        return toAutomaton(ts, ts2a);
    }
}
项目:Elasticsearch    文件:XFuzzySuggester.java   
@Override
public TokenStreamToAutomaton getTokenStreamToAutomaton() {
  final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton();
  tsta.setUnicodeArcs(unicodeAware);
  return tsta;
}
项目:Elasticsearch    文件:XAnalyzingSuggester.java   
final Automaton toAutomaton(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
    try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) {
        return toAutomaton(ts, ts2a);
    }
}
项目:search    文件:FuzzySuggester.java   
@Override
TokenStreamToAutomaton getTokenStreamToAutomaton() {
  final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton();
  tsta.setUnicodeArcs(unicodeAware);
  return tsta;
}
项目:search    文件:AnalyzingSuggester.java   
TokenStreamToAutomaton getTokenStreamToAutomaton() {
  final TokenStreamToAutomaton tsta = new TokenStreamToAutomaton();
  tsta.setPreservePositionIncrements(preservePositionIncrements);
  return tsta;
}
项目:read-open-source-code    文件:FuzzySuggester.java   
@Override
TokenStreamToAutomaton getTokenStreamToAutomaton() {
  final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton();
  tsta.setUnicodeArcs(unicodeAware);
  return tsta;
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
TokenStreamToAutomaton getTokenStreamToAutomaton() {
  final TokenStreamToAutomaton tsta = new TokenStreamToAutomaton();
  tsta.setPreservePositionIncrements(preservePositionIncrements);
  return tsta;
}
项目:read-open-source-code    文件:FuzzySuggester.java   
@Override
TokenStreamToAutomaton getTokenStreamToAutomaton() {
  final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton();
  tsta.setUnicodeArcs(unicodeAware);
  return tsta;
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
TokenStreamToAutomaton getTokenStreamToAutomaton() {
  final TokenStreamToAutomaton tsta = new TokenStreamToAutomaton();
  tsta.setPreservePositionIncrements(preservePositionIncrements);
  return tsta;
}
项目:read-open-source-code    文件:FuzzySuggester.java   
@Override
TokenStreamToAutomaton getTokenStreamToAutomaton() {
  final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton();
  tsta.setUnicodeArcs(unicodeAware);
  return tsta;
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
TokenStreamToAutomaton getTokenStreamToAutomaton() {
  final TokenStreamToAutomaton tsta = new TokenStreamToAutomaton();
  tsta.setPreservePositionIncrements(preservePositionIncrements);
  return tsta;
}
项目:Maskana-Gestor-de-Conocimiento    文件:FuzzySuggester.java   
@Override
TokenStreamToAutomaton getTokenStreamToAutomaton() {
  final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton();
  tsta.setUnicodeArcs(unicodeAware);
  return tsta;
}
项目:Maskana-Gestor-de-Conocimiento    文件:AnalyzingSuggester.java   
TokenStreamToAutomaton getTokenStreamToAutomaton() {
  final TokenStreamToAutomaton tsta = new TokenStreamToAutomaton();
  tsta.setPreservePositionIncrements(preservePositionIncrements);
  return tsta;
}