Java 类org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl 实例源码

项目:mmseg4j-solr    文件:AnalyzerTest.java   
public static void printlnToken(String txt, Analyzer analyzer) throws IOException {
    System.out.println("---------"+txt.length()+"\n"+txt);
    TokenStream ts = analyzer.tokenStream("text", new StringReader(txt));
    /*//lucene 2.9 以下
    for(Token t= new Token(); (t=ts.next(t)) !=null;) {
        System.out.println(t);
    }*/
    /*while(ts.incrementToken()) {
        TermAttribute termAtt = (TermAttribute)ts.getAttribute(TermAttribute.class);
        OffsetAttribute offsetAtt = (OffsetAttribute)ts.getAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = (TypeAttribute)ts.getAttribute(TypeAttribute.class);

        System.out.println("("+termAtt.term()+","+offsetAtt.startOffset()+","+offsetAtt.endOffset()+",type="+typeAtt.type()+")");
    }*/
    ts.reset();
    for(PackedTokenAttributeImpl t= new PackedTokenAttributeImpl(); (t=TokenUtils.nextToken(ts, t)) !=null;) {
        System.out.println(t);
    }
    ts.close();
}
项目:elasticsearch-plugin-bundle    文件:SymbolnameTokenFilter.java   
@Override
public final boolean incrementToken() throws IOException {
    if (!tokens.isEmpty()) {
        if (current == null) {
            throw new IllegalArgumentException("current is null");
        }
        PackedTokenAttributeImpl token = tokens.removeFirst();
        restoreState(current);
        termAtt.setEmpty().append(token);
        posIncAtt.setPositionIncrement(0);
        return true;
    }
    if (input.incrementToken()) {
        process();
        if (!tokens.isEmpty()) {
            current = captureState();
        }
        return true;
    } else {
        return false;
    }
}
项目:elasticsearch-plugin-bundle    文件:StandardnumberTokenFilter.java   
@Override
public final boolean incrementToken() throws IOException {
    if (!tokens.isEmpty()) {
        if (current == null) {
            throw new IllegalArgumentException("current is null");
        }
        PackedTokenAttributeImpl token = tokens.removeFirst();
        restoreState(current);
        termAtt.setEmpty().append(token);
        posIncAtt.setPositionIncrement(0);
        return true;
    }
    if (input.incrementToken()) {
        detect();
        if (!tokens.isEmpty()) {
            current = captureState();
        }
        return true;
    } else {
        return false;
    }
}
项目:information-retrieval-adventure    文件:SplitTokenByChar.java   
@Override
public boolean incrementToken() throws IOException {
  while (true) {
    if (curTermBuffer == null) {
      if (!input.incrementToken()) {
        return false;
      } else {
        curTermBuffer = termAtt.buffer().clone();
        curLen =
            ((PackedTokenAttributeImpl) termAtt).endOffset()
                - ((PackedTokenAttributeImpl) termAtt).startOffset();
      }
    } else {
      if (curPos < curLen) {
        termAtt.copyBuffer(curTermBuffer, curPos, 1);
        curPos++;
        return true;
      } else {
        curTermBuffer = null;
        curPos = 0;
      }
    }
  }
}
项目:elasticsearch-analysis-phonetic-eudex    文件:EudexTokenFilter.java   
@Override
public final boolean incrementToken() throws IOException {
    if (input.incrementToken()) {
        PackedTokenAttributeImpl token = eudex();
        restoreState(current);
        termAtt.setEmpty().append(token);
        offsetAtt.setOffset(token.startOffset(), token.endOffset());
        posIncAtt.setPositionIncrement(0);
        current = captureState();
        return true;
    } else {
        return false;
    }
}
项目:elasticsearch-analysis-phonetic-eudex    文件:EudexTokenFilter.java   
protected PackedTokenAttributeImpl  eudex() throws CharacterCodingException {
    String term = new String(termAtt.buffer(), 0, termAtt.length());
    CharSequence s = Long.toHexString(eudex.encode(term));
    PackedTokenAttributeImpl impl = new PackedTokenAttributeImpl();
    impl.append(s);
    return impl;
}
项目:mmseg4j-solr    文件:CutLetterDigitFilter.java   
public CutLetterDigitFilter(TokenStream input) {
    super(input);

    reusableToken = new PackedTokenAttributeImpl();
    termAtt = addAttribute(CharTermAttribute.class);
    offsetAtt = addAttribute(OffsetAttribute.class);
    typeAtt = addAttribute(TypeAttribute.class);
}
项目:mmseg4j-solr    文件:CutLetterDigitFilter.java   
private void addToken(PackedTokenAttributeImpl oriToken, int termBufferOffset, int termBufferLength, byte type) {
    PackedTokenAttributeImpl token = TokenUtils.subToken(oriToken, termBufferOffset, termBufferLength);

    if(type == Character.DECIMAL_DIGIT_NUMBER) {
        token.setType(Word.TYPE_DIGIT);
    } else {
        token.setType(Word.TYPE_LETTER);
    }

    tokenQueue.offer(token);
}
项目:mmseg4j-solr    文件:CutLetterDigitFilter.java   
public final boolean incrementToken() throws IOException {
    clearAttributes();
    PackedTokenAttributeImpl token = nextToken(reusableToken);
    if(token != null) {
        termAtt.copyBuffer(token.buffer(), 0, token.length());
        offsetAtt.setOffset(token.startOffset(), token.endOffset());
        typeAtt.setType(token.type());
        return true;
    } else {
        return false;
    }
}
项目:elasticsearch-plugin-bundle    文件:SymbolnameTokenFilter.java   
protected void process() throws CharacterCodingException {
    String term = new String(termAtt.buffer(), 0, termAtt.length());
    for (CharSequence charSequence : process(term)) {
        if (charSequence != null) {
            PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
            token.append(charSequence);
            tokens.add(token);
        }
    }
}
项目:elasticsearch-plugin-bundle    文件:StandardnumberTokenFilter.java   
private void detect() throws CharacterCodingException {
    CharSequence term = new String(termAtt.buffer(), 0, termAtt.length());
    Collection<CharSequence> variants = service.lookup(settings, term);
    for (CharSequence ch : variants) {
        if (ch != null) {
            PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
            token.append(ch);
            tokens.add(token);
        }
    }
}
项目:elasticsearch-analysis-baseform    文件:BaseformTokenFilter.java   
protected void baseform() throws CharacterCodingException {
    CharSequence term = new String(termAtt.buffer(), 0, termAtt.length());
    CharSequence s = dictionary.lookup(term);
    if (s != null && s.length() > 0) {
        PackedTokenAttributeImpl impl = new PackedTokenAttributeImpl();
        impl.append(s);
        tokens.add(impl);
    }
}
项目:elasticsearch-analysis-german    文件:StandardNumberTokenFilter.java   
protected void detect() throws CharacterCodingException {
    CharSequence term = new String(termAtt.buffer(), 0, termAtt.length());
    Collection<CharSequence> variants = standardNumberService.lookup(term);
    for (CharSequence ch : variants) {
        if (ch != null) {
            PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
            token.append(ch);
            tokens.add(token);
        }
    }
}
项目:elasticsearch-analysis-german    文件:BaseformTokenFilter.java   
protected void baseform() throws CharacterCodingException {
    CharSequence term = new String(termAtt.buffer(), 0, termAtt.length());
    CharSequence s = dictionary.lookup(term);
    if (s != null && s.length() > 0) {
        PackedTokenAttributeImpl impl = new PackedTokenAttributeImpl();
        impl.append(s);
        tokens.add(impl);
    }
}
项目:elasticsearch-analysis-standardnumber    文件:StandardNumberTokenFilter.java   
protected void detect() throws CharacterCodingException {
    CharSequence term = new String(termAtt.buffer(), 0, termAtt.length());
    Collection<CharSequence> variants = service.lookup(settings, term);
    for (CharSequence ch : variants) {
        if (ch != null) {
            PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
            token.append(ch);
            tokens.add(token);
        }
    }
}
项目:mmseg4j-solr    文件:CutLetterDigitFilter.java   
private PackedTokenAttributeImpl nextToken(PackedTokenAttributeImpl reusableToken) throws IOException {
    assert reusableToken != null;

    //先使用上次留下来的。
    PackedTokenAttributeImpl nextToken = tokenQueue.poll();
    if(nextToken != null) {
        return nextToken;
    }

    nextToken = TokenUtils.nextToken(input, reusableToken);

    if(nextToken != null &&
            (Word.TYPE_LETTER_OR_DIGIT.equalsIgnoreCase(nextToken.type())
                || Word.TYPE_DIGIT_OR_LETTER.equalsIgnoreCase(nextToken.type()))
            ) {
        final char[] buffer = nextToken.buffer();
        final int length = nextToken.length();
        byte lastType = (byte) Character.getType(buffer[0]);    //与上次的字符是否同类
        int termBufferOffset = 0;
        int termBufferLength = 0;
        for(int i=0;i<length;i++) {
            byte type = (byte) Character.getType(buffer[i]);
            if(type <= Character.MODIFIER_LETTER) {
                type = Character.LOWERCASE_LETTER;
            }
            if(type != lastType) {  //与上一次的不同
                addToken(nextToken, termBufferOffset, termBufferLength, lastType);

                termBufferOffset += termBufferLength;
                termBufferLength = 0;

                lastType = type;
            }

            termBufferLength++;
        }
        if(termBufferLength > 0) {  //最后一次
            addToken(nextToken, termBufferOffset, termBufferLength, lastType);
        }
        nextToken = tokenQueue.poll();
    }

    return nextToken;
}
项目:elasticsearch-analysis-german    文件:StandardNumberTokenFilter.java   
protected StandardNumberTokenFilter(TokenStream input, StandardNumberService standardNumberService) {
    super(input);
    this.tokens = new LinkedList<PackedTokenAttributeImpl>();
    this.standardNumberService = standardNumberService;
}
项目:elasticsearch-analysis-german    文件:BaseformTokenFilter.java   
protected BaseformTokenFilter(TokenStream input, Dictionary dictionary) {
    super(input);
    this.tokens = new LinkedList<PackedTokenAttributeImpl>();
    this.dictionary = dictionary;
}