Java 类org.apache.lucene.util.AttributeFactory 实例源码

项目:lams    文件:PathHierarchyTokenizer.java   
public PathHierarchyTokenizer
    (AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) {
  super(factory, input);
  if (bufferSize < 0) {
    throw new IllegalArgumentException("bufferSize cannot be negative");
  }
  if (skip < 0) {
    throw new IllegalArgumentException("skip cannot be negative");
  }
  termAtt.resizeBuffer(bufferSize);

  this.delimiter = delimiter;
  this.replacement = replacement;
  this.skip = skip;
  resultToken = new StringBuilder(bufferSize);
}
项目:lams    文件:ReversePathHierarchyTokenizer.java   
public ReversePathHierarchyTokenizer
    (AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) {
  super(factory, input);
  if (bufferSize < 0) {
    throw new IllegalArgumentException("bufferSize cannot be negative");
  }
  if (skip < 0) {
    throw new IllegalArgumentException("skip cannot be negative");
  }
  termAtt.resizeBuffer(bufferSize);
  this.delimiter = delimiter;
  this.replacement = replacement;
  this.skip = skip;
  resultToken = new StringBuilder(bufferSize);
  resultTokenBuffer = new char[bufferSize];
  delimiterPositions = new ArrayList<>(bufferSize/10);
}
项目:search    文件:PathHierarchyTokenizer.java   
public PathHierarchyTokenizer
    (AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) {
  super(factory, input);
  if (bufferSize < 0) {
    throw new IllegalArgumentException("bufferSize cannot be negative");
  }
  if (skip < 0) {
    throw new IllegalArgumentException("skip cannot be negative");
  }
  termAtt.resizeBuffer(bufferSize);

  this.delimiter = delimiter;
  this.replacement = replacement;
  this.skip = skip;
  resultToken = new StringBuilder(bufferSize);
}
项目:search    文件:ReversePathHierarchyTokenizer.java   
public ReversePathHierarchyTokenizer
    (AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) {
  super(factory, input);
  if (bufferSize < 0) {
    throw new IllegalArgumentException("bufferSize cannot be negative");
  }
  if (skip < 0) {
    throw new IllegalArgumentException("skip cannot be negative");
  }
  termAtt.resizeBuffer(bufferSize);
  this.delimiter = delimiter;
  this.replacement = replacement;
  this.skip = skip;
  resultToken = new StringBuilder(bufferSize);
  resultTokenBuffer = new char[bufferSize];
  delimiterPositions = new ArrayList<>(bufferSize/10);
}
项目:IKAnalyzer    文件:IKTokenizer.java   
public IKTokenizer(AttributeFactory factory, boolean useSmart){
    super(factory);
    offsetAtt = addAttribute(OffsetAttribute.class);
    termAtt = addAttribute(CharTermAttribute.class);
    typeAtt = addAttribute(TypeAttribute.class);
    _IKImplement = new IKSegmenter(input , useSmart);
}
项目:lams    文件:PatternTokenizer.java   
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */
public PatternTokenizer(AttributeFactory factory, Reader input, Pattern pattern, int group) {
  super(factory, input);
  this.group = group;

  // Use "" instead of str so don't consume chars
  // (fillBuffer) from the input on throwing IAE below:
  matcher = pattern.matcher("");

  // confusingly group count depends ENTIRELY on the pattern but is only accessible via matcher
  if (group >= 0 && group > matcher.groupCount()) {
    throw new IllegalArgumentException("invalid group specified: pattern only has: " + matcher.groupCount() + " capturing groups");
  }
}
项目:lams    文件:EdgeNGramTokenizerFactory.java   
@Override
public Tokenizer create(AttributeFactory factory, Reader input) {
  if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
    if (!EdgeNGramTokenFilter.Side.FRONT.getLabel().equals(side)) {
      throw new IllegalArgumentException(EdgeNGramTokenizer.class.getSimpleName() + " does not support backward n-grams as of Lucene 4.4");
    }
    return new EdgeNGramTokenizer(input, minGramSize, maxGramSize);
  } else {
    return new Lucene43EdgeNGramTokenizer(luceneMatchVersion, input, side, minGramSize, maxGramSize);
  }
}
项目:lams    文件:NGramTokenizerFactory.java   
/** Creates the {@link TokenStream} of n-grams from the given {@link Reader} and {@link AttributeFactory}. */
@Override
public Tokenizer create(AttributeFactory factory, Reader input) {
  if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
    return new NGramTokenizer(luceneMatchVersion, factory, input, minGramSize, maxGramSize);
  } else {
    return new Lucene43NGramTokenizer(factory, input, minGramSize, maxGramSize);
  }
}
项目:lams    文件:NumericTokenStream.java   
/**
 * Expert: Creates a token stream for numeric values with the specified
 * <code>precisionStep</code> using the given
 * {@link org.apache.lucene.util.AttributeFactory}.
 * The stream is not yet initialized,
 * before using set a value using the various set<em>???</em>Value() methods.
 */
public NumericTokenStream(AttributeFactory factory, final int precisionStep) {
  super(new NumericAttributeFactory(factory));
  if (precisionStep < 1)
    throw new IllegalArgumentException("precisionStep must be >=1");
  this.precisionStep = precisionStep;
  numericAtt.setShift(-precisionStep);
}
项目:lams    文件:StandardTokenizerFactory.java   
@Override
public StandardTokenizer create(AttributeFactory factory, Reader input) {
  StandardTokenizer tokenizer;
  if (luceneMatchVersion == null) {
    tokenizer = new StandardTokenizer(factory, input);
  } else {
    tokenizer = new StandardTokenizer(luceneMatchVersion, factory, input);
  }
  tokenizer.setMaxTokenLength(maxTokenLength);
  return tokenizer;
}
项目:lams    文件:ClassicTokenizerFactory.java   
@Override
public ClassicTokenizer create(AttributeFactory factory, Reader input) {
  ClassicTokenizer tokenizer;
  if (luceneMatchVersion == null) {
    tokenizer = new ClassicTokenizer(factory, input);
  } else {
    tokenizer = new ClassicTokenizer(luceneMatchVersion, factory, input);
  }
  tokenizer.setMaxTokenLength(maxTokenLength);
  return tokenizer;
}
项目:lams    文件:UAX29URLEmailTokenizerFactory.java   
@Override
public UAX29URLEmailTokenizer create(AttributeFactory factory, Reader input) {
  UAX29URLEmailTokenizer tokenizer;
  if (luceneMatchVersion == null) {
    tokenizer = new UAX29URLEmailTokenizer(factory, input);
  } else {
    tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory, input);
  }
  tokenizer.setMaxTokenLength(maxTokenLength);
  return tokenizer;
}
项目:lams    文件:Tokenizer.java   
/** Construct a token stream processing the given input using the given AttributeFactory. */
protected Tokenizer(AttributeFactory factory, Reader input) {
  super(factory);
  if (input == null) {
    throw new NullPointerException("input must not be null");
  }
  this.inputPending = input;
}
项目:lams    文件:ThaiTokenizer.java   
/** Creates a new ThaiTokenizer, supplying the AttributeFactory */
public ThaiTokenizer(AttributeFactory factory, Reader reader) {
  super(factory, reader, (BreakIterator)sentenceProto.clone());
  if (!DBBI_AVAILABLE) {
    throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
  }
  wordBreaker = (BreakIterator)proto.clone();
}
项目:lams    文件:CharTokenizer.java   
/**
 * @deprecated Use {@link #CharTokenizer(AttributeFactory, Reader)}
 */
@Deprecated
public CharTokenizer(Version matchVersion, AttributeFactory factory,
    Reader input) {
  super(factory, input);
  charUtils = CharacterUtils.getInstance(matchVersion);
}
项目:lams    文件:WhitespaceTokenizerFactory.java   
@Override
public WhitespaceTokenizer create(AttributeFactory factory, Reader input) {
  if (luceneMatchVersion == null) {
    return new WhitespaceTokenizer(factory, input);
  }
  return new WhitespaceTokenizer(luceneMatchVersion, factory, input);
}
项目:lams    文件:KeywordTokenizer.java   
public KeywordTokenizer(AttributeFactory factory, Reader input, int bufferSize) {
  super(factory, input);
  if (bufferSize <= 0) {
    throw new IllegalArgumentException("bufferSize must be > 0");
  }
  termAtt.resizeBuffer(bufferSize);
}
项目:lams    文件:LetterTokenizerFactory.java   
@Override
public LetterTokenizer create(AttributeFactory factory, Reader input) {
  if (luceneMatchVersion == null) {
    return new LetterTokenizer(factory, input);
  }
  return new LetterTokenizer(luceneMatchVersion, factory, input);
}
项目:lams    文件:LowerCaseTokenizerFactory.java   
@Override
public LowerCaseTokenizer create(AttributeFactory factory, Reader input) {
  if (luceneMatchVersion == null) {
    return new LowerCaseTokenizer(factory, input);
  }
  return new LowerCaseTokenizer(luceneMatchVersion, factory, input);
}
项目:lams    文件:PathHierarchyTokenizerFactory.java   
@Override
public Tokenizer create(AttributeFactory factory, Reader input) {
  if (reverse) {
    return new ReversePathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
  }
  return new PathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
}
项目:Elasticsearch    文件:NumericTokenizer.java   
/** Make this tokenizer get attributes from the delegate token stream. */
private static final AttributeFactory delegatingAttributeFactory(final AttributeSource source) {
    return new AttributeFactory() {
        @Override
        public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
            return (AttributeImpl) source.addAttribute(attClass);
        }
    };
}
项目:mecab-ko-lucene-analyzer    文件:MeCabKoTokenizer.java   
/**
 * MeCabKoTokenizer 생성자.
 * Default AttributeFactory 사용.
 * 
 * @param option Tokenizer 옵션
 * @param appender PosAppender
 * 복합명사 분해가 필요없는 경우, TokenGenerator.NO_DECOMPOUND를 입력한다.
 */
public MeCabKoTokenizer(
    TokenizerOption option,
    PosAppender appender) {
  this(
      AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY,
      option,
      appender);
}
项目:mecab-ko-lucene-analyzer    文件:MeCabKoTokenizer.java   
/**
 * MeCabKoTokenizer 생성자.
 * 
 * @param factory the AttributeFactory to use
 * @param option MeCabTokenizer 옵션
 * @param appender PosAppender
 * 복합명사 분해가 필요없는 경우, TokenGenerator.NO_DECOMPOUND를 입력한다.
 */
public MeCabKoTokenizer(
    AttributeFactory factory,
    TokenizerOption option,
    PosAppender appender) {
  super(factory);
  posAppender = appender;
  this.option = option;
  setMeCab();
  setAttributes();
}
项目:mecab-ko-lucene-analyzer    文件:TokenizerFactoryBase.java   
@Override
public Tokenizer create(AttributeFactory factory) {
  return new MeCabKoTokenizer(
      factory,
      option,
      new StandardPosAppender(option));
}
项目:elasticsearch-analysis-opennlp    文件:MockTokenizer.java   
public MockTokenizer(AttributeFactory factory, CharacterRunAutomaton runAutomaton, boolean lowerCase, int maxTokenLength) {
    super(factory);
    this.runAutomaton = runAutomaton;
    this.lowerCase = lowerCase;
    this.state = runAutomaton.getInitialState();
    this.maxTokenLength = maxTokenLength;
}
项目:AdSearch_Endpoints    文件:QueryParserImpl.java   
@Override
  public List<String> parseQuery(String queryStr) {
    // tokenize queryStr, remove stop word, stemming
    List<String> tokens = new ArrayList<String>();
    AttributeFactory factory = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
    Tokenizer tokenizer = new StandardTokenizer(factory);
    tokenizer.setReader(new StringReader(queryStr));
    CharArraySet stopWords = EnglishAnalyzer.getDefaultStopSet();
    TokenStream tokenStream = new StopFilter(tokenizer, stopWords);
//    StringBuilder sb = new StringBuilder();
    CharTermAttribute charTermAttribute = tokenizer.addAttribute(CharTermAttribute.class);
    try {
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            String term = charTermAttribute.toString();

            tokens.add(term);
//            sb.append(term + " ");
        }
        tokenStream.end();
        tokenStream.close();

        tokenizer.close();  
    } catch (IOException e) {
        e.printStackTrace();
    }
//  System.out.println("QU="+ sb.toString());
    return tokens;  
  }
项目:elasticsearch-icu    文件:MockTokenizer.java   
public MockTokenizer(AttributeFactory factory, CharacterRunAutomaton runAutomaton, boolean lowerCase, int maxTokenLength) {
    super(factory);
    this.runAutomaton = runAutomaton;
    this.lowerCase = lowerCase;
    this.state = runAutomaton.getInitialState();
    this.maxTokenLength = maxTokenLength;
    termAtt = addAttribute(CharTermAttribute.class);
    offsetAtt = addAttribute(OffsetAttribute.class);
}
项目:search    文件:UIMAAnnotationsTokenizer.java   
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters, 
                                AttributeFactory factory, Reader input) {
  super(factory, input, descriptorPath, configurationParameters);
  this.tokenTypeString = tokenType;
  this.termAttr = addAttribute(CharTermAttribute.class);
  this.offsetAttr = addAttribute(OffsetAttribute.class);
}
项目:search    文件:UIMATypeAwareAnnotationsTokenizer.java   
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, 
                                         Map<String, Object> configurationParameters, AttributeFactory factory, Reader input) {
  super(factory, input, descriptorPath, configurationParameters);
  this.tokenTypeString = tokenType;
  this.termAttr = addAttribute(CharTermAttribute.class);
  this.typeAttr = addAttribute(TypeAttribute.class);
  this.offsetAttr = addAttribute(OffsetAttribute.class);
  this.typeAttributeFeaturePath = typeAttributeFeaturePath;
}
项目:search    文件:PatternTokenizer.java   
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */
public PatternTokenizer(AttributeFactory factory, Reader input, Pattern pattern, int group) {
  super(factory, input);
  this.group = group;

  // Use "" instead of str so don't consume chars
  // (fillBuffer) from the input on throwing IAE below:
  matcher = pattern.matcher("");

  // confusingly group count depends ENTIRELY on the pattern but is only accessible via matcher
  if (group >= 0 && group > matcher.groupCount()) {
    throw new IllegalArgumentException("invalid group specified: pattern only has: " + matcher.groupCount() + " capturing groups");
  }
}
项目:search    文件:EdgeNGramTokenizerFactory.java   
@Override
public Tokenizer create(AttributeFactory factory, Reader input) {
  if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
    if (!EdgeNGramTokenFilter.Side.FRONT.getLabel().equals(side)) {
      throw new IllegalArgumentException(EdgeNGramTokenizer.class.getSimpleName() + " does not support backward n-grams as of Lucene 4.4");
    }
    return new EdgeNGramTokenizer(input, minGramSize, maxGramSize);
  } else {
    return new Lucene43EdgeNGramTokenizer(luceneMatchVersion, input, side, minGramSize, maxGramSize);
  }
}
项目:search    文件:NGramTokenizerFactory.java   
/** Creates the {@link TokenStream} of n-grams from the given {@link Reader} and {@link AttributeFactory}. */
@Override
public Tokenizer create(AttributeFactory factory, Reader input) {
  if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
    return new NGramTokenizer(luceneMatchVersion, factory, input, minGramSize, maxGramSize);
  } else {
    return new Lucene43NGramTokenizer(factory, input, minGramSize, maxGramSize);
  }
}
项目:search    文件:StandardTokenizerFactory.java   
@Override
public StandardTokenizer create(AttributeFactory factory, Reader input) {
  StandardTokenizer tokenizer;
  if (luceneMatchVersion == null) {
    tokenizer = new StandardTokenizer(factory, input);
  } else {
    tokenizer = new StandardTokenizer(luceneMatchVersion, factory, input);
  }
  tokenizer.setMaxTokenLength(maxTokenLength);
  return tokenizer;
}
项目:search    文件:ClassicTokenizerFactory.java   
@Override
public ClassicTokenizer create(AttributeFactory factory, Reader input) {
  ClassicTokenizer tokenizer;
  if (luceneMatchVersion == null) {
    tokenizer = new ClassicTokenizer(factory, input);
  } else {
    tokenizer = new ClassicTokenizer(luceneMatchVersion, factory, input);
  }
  tokenizer.setMaxTokenLength(maxTokenLength);
  return tokenizer;
}
项目:search    文件:UAX29URLEmailTokenizerFactory.java   
@Override
public UAX29URLEmailTokenizer create(AttributeFactory factory, Reader input) {
  UAX29URLEmailTokenizer tokenizer;
  if (luceneMatchVersion == null) {
    tokenizer = new UAX29URLEmailTokenizer(factory, input);
  } else {
    tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory, input);
  }
  tokenizer.setMaxTokenLength(maxTokenLength);
  return tokenizer;
}
项目:search    文件:ThaiTokenizer.java   
/** Creates a new ThaiTokenizer, supplying the AttributeFactory */
public ThaiTokenizer(AttributeFactory factory, Reader reader) {
  super(factory, reader, (BreakIterator)sentenceProto.clone());
  if (!DBBI_AVAILABLE) {
    throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
  }
  wordBreaker = (BreakIterator)proto.clone();
}
项目:search    文件:CharTokenizer.java   
/**
 * @deprecated Use {@link #CharTokenizer(AttributeFactory, Reader)}
 */
@Deprecated
public CharTokenizer(Version matchVersion, AttributeFactory factory,
    Reader input) {
  super(factory, input);
  charUtils = CharacterUtils.getInstance(matchVersion);
}
项目:search    文件:WhitespaceTokenizerFactory.java   
@Override
public WhitespaceTokenizer create(AttributeFactory factory, Reader input) {
  if (luceneMatchVersion == null) {
    return new WhitespaceTokenizer(factory, input);
  }
  return new WhitespaceTokenizer(luceneMatchVersion, factory, input);
}
项目:search    文件:KeywordTokenizer.java   
public KeywordTokenizer(AttributeFactory factory, Reader input, int bufferSize) {
  super(factory, input);
  if (bufferSize <= 0) {
    throw new IllegalArgumentException("bufferSize must be > 0");
  }
  termAtt.resizeBuffer(bufferSize);
}
项目:search    文件:LetterTokenizerFactory.java   
@Override
public LetterTokenizer create(AttributeFactory factory, Reader input) {
  if (luceneMatchVersion == null) {
    return new LetterTokenizer(factory, input);
  }
  return new LetterTokenizer(luceneMatchVersion, factory, input);
}