Java 类org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute 实例源码

项目:lucenelab    文件:SynonymFilterExample.java   
@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Tokenizer tok = new WhitespaceTokenizer();
    tok.setReader(new StringReader("dark sea green sea green"));

    final SynonymMap.Builder builder = new SynonymMap.Builder(true);
    addSynonym("dark sea green", "color", builder);
    addSynonym("green", "color", builder);
    addSynonym("dark sea", "color", builder);
    addSynonym("sea green", "color", builder);
    final SynonymMap synMap = builder.build();
    final TokenStream ts = new SynonymFilter(tok, synMap, true);

    final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    final PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
    final PositionLengthAttribute posLengthAtt = ts.addAttribute(PositionLengthAttribute.class);

    ts.reset();
    int pos = -1;
    while (ts.incrementToken()) {
        pos += posIncrAtt.getPositionIncrement();
        System.out.println("term=" + termAtt + ", pos=" + pos + ", posLen=" + posLengthAtt.getPositionLength());
    }
    ts.end();
    ts.close();
}
项目:improved-journey    文件:TestAnsj.java   
public static void main(String[] args) throws IOException {
    List<Term> parse = ToAnalysis.parse("中华人民 共和国 成立了 ");
    System.out.println(parse);
    List<Term> parse1 = IndexAnalysis.parse("你吃过饭了没有!!!!!吃过无妨论文");


    //System.out.println(parse1);
    String text11="ZW321282050000000325";

    Tokenizer tokenizer = new AnsjTokenizer(new StringReader(text11), 0, true);
    CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = 
            tokenizer.addAttribute(OffsetAttribute.class);
        PositionIncrementAttribute positionIncrementAtt = 
            tokenizer.addAttribute(PositionIncrementAttribute.class);

    tokenizer.reset();
    while (tokenizer.incrementToken()){

          System.out.print(new String(termAtt.toString()+" ") );
        //  System.out.print( offsetAtt.startOffset() + "-" + offsetAtt.endOffset() + "-" );
        //System.out.print( positionIncrementAtt.getPositionIncrement() +"/");

    }
    tokenizer.close();
}
项目:improved-journey    文件:TestAnsj.java   
public static void main(String[] args) throws IOException {
    List<Term> parse = ToAnalysis.parse("中华人民 共和国 成立了 ");
    System.out.println(parse);
    List<Term> parse1 = IndexAnalysis.parse("你吃过饭了没有!!!!!吃过无妨论文");


    //System.out.println(parse1);
    String text11="ZW321282050000000325";

    Tokenizer tokenizer = new AnsjTokenizer(new StringReader(text11), 0, true);
    CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = 
            tokenizer.addAttribute(OffsetAttribute.class);
        PositionIncrementAttribute positionIncrementAtt = 
            tokenizer.addAttribute(PositionIncrementAttribute.class);

    tokenizer.reset();
    while (tokenizer.incrementToken()){

          System.out.print(new String(termAtt.toString()+" ") );
        //  System.out.print( offsetAtt.startOffset() + "-" + offsetAtt.endOffset() + "-" );
        //System.out.print( positionIncrementAtt.getPositionIncrement() +"/");

    }
    tokenizer.close();
}
项目:elasticsearch-analysis-voikko    文件:VoikkoTokenFilterTests.java   
private List<TokenData> parse(String text) {
    NamedAnalyzer analyzer = getAnalysisService().indexAnalyzers.get("test");

    try {
        try (TokenStream ts = analyzer.tokenStream("test", new StringReader(text))) {
            List<TokenData> result = new ArrayList<>();
            CharTermAttribute charTerm = ts.addAttribute(CharTermAttribute.class);
            OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
            PositionIncrementAttribute position = ts.addAttribute(PositionIncrementAttribute.class);
            ts.reset();
            while (ts.incrementToken()) {
                String original = text.substring(offset.startOffset(), offset.endOffset());
                result.add(token(original, charTerm.toString(), position.getPositionIncrement()));
            }
            ts.end();

            return result;
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
项目:lams    文件:PrefixAwareTokenFilter.java   
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
  super(suffix);
  this.suffix = suffix;
  this.prefix = prefix;
  prefixExhausted = false;

  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  flagsAtt = addAttribute(FlagsAttribute.class);

  p_termAtt = prefix.addAttribute(CharTermAttribute.class);
  p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
  p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
  p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
  p_typeAtt = prefix.addAttribute(TypeAttribute.class);
  p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
项目:elasticsearch-dynamic-synonym    文件:SimpleSynonymMap.java   
private Set<String> analyze(String text) throws IOException {
    Set<String> result = new HashSet<String>();
    Analyzer analyzer = configuration.getAnalyzer();
    try (TokenStream ts = analyzer.tokenStream("", text)) {
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
            int length = termAtt.length();
            if (length == 0) {
                throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
            }
            if (posIncAtt.getPositionIncrement() != 1) {
                throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1");
            }

            result.add(new String(termAtt.buffer(), 0, termAtt.length()));
        }

        ts.end();
        return result;
    }
}
项目:elasticsearch-analysis-ltp    文件:LTPTokenizer.java   
/**
 * Lucene constructor
 *
 * @throws UnirestException
 * @throws JSONException
 * @throws IOException
 */
public LTPTokenizer(Set<String> filter)
        throws IOException, JSONException, UnirestException {
    super();
    logger.info("LTPTokenizer Initialize......");
    // Add token offset attribute
    offsetAttr = addAttribute(OffsetAttribute.class);
    // Add token content attribute
    charTermAttr = addAttribute(CharTermAttribute.class);
    // Add token type attribute
    typeAttr = addAttribute(TypeAttribute.class);
    // Add token position attribute
    piAttr = addAttribute(PositionIncrementAttribute.class);
    // Create a new word segmenter to get tokens
    LTPSeg = new LTPWordSegmenter(input);
    // Add filter words set
    this.filter = filter;
}
项目:elasticsearch-analysis-lc-pinyin    文件:PinyinAnalysisTest.java   
@Test
public void testSearch() throws IOException {
    LcPinyinAnalyzer analyzer = new LcPinyinAnalyzer(AnalysisSetting.search);
    TokenStream tokenStream = analyzer.tokenStream("lc", "重qing");

    CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAttribute = tokenStream.getAttribute(OffsetAttribute.class);
    PositionIncrementAttribute positionIncrementAttribute = tokenStream.getAttribute(PositionIncrementAttribute.class);

    tokenStream.reset();
    Assert.assertTrue(tokenStream.incrementToken());
    Assert.assertEquals(charTermAttribute.toString(), "重");
    Assert.assertEquals(offsetAttribute.startOffset(), 0);
    Assert.assertEquals(offsetAttribute.endOffset(), 1);
    Assert.assertEquals(positionIncrementAttribute.getPositionIncrement(), 1);

    Assert.assertTrue(tokenStream.incrementToken());
    Assert.assertEquals(charTermAttribute.toString(), "qing");
    Assert.assertEquals(offsetAttribute.startOffset(), 1);
    Assert.assertEquals(offsetAttribute.endOffset(), 5);
    Assert.assertEquals(positionIncrementAttribute.getPositionIncrement(), 1);

    tokenStream.close();
}
项目:elasticsearch-analysis-lc-pinyin    文件:PinyinFilterTest.java   
public void testFullPinyinFilter() throws IOException {

        LcPinyinAnalyzer analyzer = new LcPinyinAnalyzer(AnalysisSetting.search);
        TokenStream tokenStream = analyzer.tokenStream("lc", "作者 : 陈楠");

        LcPinyinTokenFilter lcPinyinTokenFilter = new LcPinyinTokenFilter(tokenStream, PinyinFilterSetting.full_pinyin);

        CharTermAttribute charTermAttribute = lcPinyinTokenFilter.getAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAttribute = lcPinyinTokenFilter.getAttribute(OffsetAttribute.class);
        PositionIncrementAttribute positionIncrementAttribute = lcPinyinTokenFilter.getAttribute(PositionIncrementAttribute.class);

        lcPinyinTokenFilter.reset();
        while (lcPinyinTokenFilter.incrementToken()) {
            System.out.println(charTermAttribute.toString() + ":" + offsetAttribute.startOffset() + "," + offsetAttribute.endOffset() + ":" + positionIncrementAttribute.getPositionIncrement());
        }
        lcPinyinTokenFilter.close();
    }
项目:elasticsearch-analysis-lc-pinyin    文件:PinyinFilterTest.java   
public void testFirstLetterFilter() throws IOException {

        LcPinyinAnalyzer analyzer = new LcPinyinAnalyzer(AnalysisSetting.search);
        TokenStream tokenStream = analyzer.tokenStream("lc", "作者 : 陈楠");

        LcPinyinTokenFilter lcPinyinTokenFilter = new LcPinyinTokenFilter(tokenStream, PinyinFilterSetting.first_letter);

        CharTermAttribute charTermAttribute = lcPinyinTokenFilter.getAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAttribute = lcPinyinTokenFilter.getAttribute(OffsetAttribute.class);
        PositionIncrementAttribute positionIncrementAttribute = lcPinyinTokenFilter.getAttribute(PositionIncrementAttribute.class);

        lcPinyinTokenFilter.reset();
        while (lcPinyinTokenFilter.incrementToken()) {
            System.out.println(charTermAttribute.toString() + ":" + offsetAttribute.startOffset() + "," + offsetAttribute.endOffset() + ":" + positionIncrementAttribute.getPositionIncrement());
        }
        lcPinyinTokenFilter.close();
    }
项目:fastcatsearch3    文件:Token.java   
@Override
public void copyTo(AttributeImpl target) {
  if (target instanceof Token) {
    final Token to = (Token) target;
    to.reinit(this);
    // reinit shares the payload, so clone it:
    if (payload !=null) {
      to.payload = payload.clone();
    }
  } else {
    super.copyTo(target);
    ((OffsetAttribute) target).setOffset(startOffset, endOffset);
    ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
    ((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
    ((FlagsAttribute) target).setFlags(flags);
    ((TypeAttribute) target).setType(type);
  }
}
项目:solrplugins    文件:JsonReferencePayloadTokenizerTest.java   
@Test
public void testShorthand2() throws IOException {
  JsonReferencePayloadTokenizer tokenizer = new JsonReferencePayloadTokenizer();
  tokenizer.setReader(new StringReader("{\"filing\": \"something\", \"prefix\": \"The \"}"));
  tokenizer.reset();

  assertTrue(tokenizer.incrementToken());
  assertEquals("something", tokenizer.getAttribute(CharTermAttribute.class).toString());
  assertEquals(JsonReferencePayloadTokenizer.TYPE_FILING, tokenizer.getAttribute(TypeAttribute.class).type());
  assertEquals(1, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
  assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());

  assertTrue(tokenizer.incrementToken());
  assertEquals("The ", tokenizer.getAttribute(CharTermAttribute.class).toString());
  assertEquals(JsonReferencePayloadTokenizer.TYPE_PREFIX, tokenizer.getAttribute(TypeAttribute.class).type());
  assertEquals(0, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
  assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());

  assertFalse(tokenizer.incrementToken());
}
项目:solrplugins    文件:JsonReferencePayloadTokenizerTest.java   
@Test
public void testShorthand3() throws IOException {
  JsonReferencePayloadTokenizer tokenizer = new JsonReferencePayloadTokenizer();
  tokenizer.setReader(new StringReader("{\"prefix\": \"The \", \"filing\": \"something\"}"));
  tokenizer.reset();

  assertTrue(tokenizer.incrementToken());
  assertEquals("something", tokenizer.getAttribute(CharTermAttribute.class).toString());
  assertEquals(JsonReferencePayloadTokenizer.TYPE_FILING, tokenizer.getAttribute(TypeAttribute.class).type());
  assertEquals(1, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
  assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());

  assertTrue(tokenizer.incrementToken());
  assertEquals("The ", tokenizer.getAttribute(CharTermAttribute.class).toString());
  assertEquals(JsonReferencePayloadTokenizer.TYPE_PREFIX, tokenizer.getAttribute(TypeAttribute.class).type());
  assertEquals(0, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
  assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());

  assertFalse(tokenizer.incrementToken());
}
项目:Alix    文件:Demo.java   
public static MyToken[] tokensFromAnalysis(Analyzer analyzer, String text, String field) throws IOException
{
  ;
  TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
  CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
  PositionIncrementAttribute positionIncrementAttr = stream.addAttribute(PositionIncrementAttribute.class);
  TypeAttribute typeAttr = stream.addAttribute(TypeAttribute.class);
  OffsetAttribute offsetAttr = stream.addAttribute(OffsetAttribute.class);

  ArrayList<MyToken> tokenList = new ArrayList<MyToken>();
  while (stream.incrementToken()) {
    tokenList.add(new MyToken(term.toString(), positionIncrementAttr.getPositionIncrement(), typeAttr.type(),
        offsetAttr.startOffset(), offsetAttr.endOffset()));
  }

  return tokenList.toArray(new MyToken[0]);
}
项目:hanlp-lucene-plugin    文件:HanLPAnalyzerTest.java   
public void testCreateComponents() throws Exception
{
    String text = "中华人民共和国很辽阔";
    for (int i = 0; i < text.length(); ++i)
    {
        System.out.print(text.charAt(i) + "" + i + " ");
    }
    System.out.println();
    Analyzer analyzer = new HanLPAnalyzer();
    TokenStream tokenStream = analyzer.tokenStream("field", text);
    tokenStream.reset();
    while (tokenStream.incrementToken())
    {
        CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
        // 偏移量
        OffsetAttribute offsetAtt = tokenStream.getAttribute(OffsetAttribute.class);
        // 距离
        PositionIncrementAttribute positionAttr = tokenStream.getAttribute(PositionIncrementAttribute.class);
        // 词性
        TypeAttribute typeAttr = tokenStream.getAttribute(TypeAttribute.class);
        System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
    }
}
项目:hanlp-lucene-plugin    文件:HanLPAnalyzerTest.java   
public void testIssue() throws Exception
{
    Map<String, String> args = new TreeMap<>();
    args.put("enableTraditionalChineseMode", "true");
    args.put("enableNormalization", "true");
    HanLPTokenizerFactory factory = new HanLPTokenizerFactory(args);
    Tokenizer tokenizer = factory.create();
    String text = "會辦台星保證最低價的原因?";

    tokenizer.setReader(new StringReader(text));
    tokenizer.reset();
    while (tokenizer.incrementToken())
    {
        CharTermAttribute attribute = tokenizer.getAttribute(CharTermAttribute.class);
        // 偏移量
        OffsetAttribute offsetAtt = tokenizer.getAttribute(OffsetAttribute.class);
        // 距离
        PositionIncrementAttribute positionAttr = tokenizer.getAttribute(PositionIncrementAttribute.class);
        // 词性
        TypeAttribute typeAttr = tokenizer.getAttribute(TypeAttribute.class);
        System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
    }
}
项目:hanlp-lucene-plugin    文件:HanLPIndexAnalyzerTest.java   
public void testCreateComponents() throws Exception
{
    String text = "中华人民共和国很辽阔";
    for (int i = 0; i < text.length(); ++i)
    {
        System.out.print(text.charAt(i) + "" + i + " ");
    }
    System.out.println();
    Analyzer analyzer = new HanLPIndexAnalyzer();
    TokenStream tokenStream = analyzer.tokenStream("field", text);
    tokenStream.reset();
    while (tokenStream.incrementToken())
    {
        CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
        // 偏移量
        OffsetAttribute offsetAtt = tokenStream.getAttribute(OffsetAttribute.class);
        // 距离
        PositionIncrementAttribute positionAttr = tokenStream.getAttribute(PositionIncrementAttribute.class);
        // 词性
        TypeAttribute typeAttr = tokenStream.getAttribute(TypeAttribute.class);
        System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
    }
}
项目:search    文件:PrefixAwareTokenFilter.java   
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
  super(suffix);
  this.suffix = suffix;
  this.prefix = prefix;
  prefixExhausted = false;

  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  flagsAtt = addAttribute(FlagsAttribute.class);

  p_termAtt = prefix.addAttribute(CharTermAttribute.class);
  p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
  p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
  p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
  p_typeAtt = prefix.addAttribute(TypeAttribute.class);
  p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
项目:search    文件:TestRemoveDuplicatesTokenFilter.java   
public void testDups(final String expected, final Token... tokens)
  throws Exception {

  final Iterator<Token> toks = Arrays.asList(tokens).iterator();
  final TokenStream ts = new RemoveDuplicatesTokenFilter(
    (new TokenStream() {
        CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
        PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
        @Override
        public boolean incrementToken() {
          if (toks.hasNext()) {
            clearAttributes();
            Token tok = toks.next();
            termAtt.setEmpty().append(tok);
            offsetAtt.setOffset(tok.startOffset(), tok.endOffset());
            posIncAtt.setPositionIncrement(tok.getPositionIncrement());
            return true;
          } else {
            return false;
          }
        }
      }));

  assertTokenStreamContents(ts, expected.split("\\s"));   
}
项目:search    文件:TestDuelingAnalyzers.java   
public void assertEquals(String s, TokenStream left, TokenStream right) throws Exception {
  left.reset();
  right.reset();
  CharTermAttribute leftTerm = left.addAttribute(CharTermAttribute.class);
  CharTermAttribute rightTerm = right.addAttribute(CharTermAttribute.class);
  OffsetAttribute leftOffset = left.addAttribute(OffsetAttribute.class);
  OffsetAttribute rightOffset = right.addAttribute(OffsetAttribute.class);
  PositionIncrementAttribute leftPos = left.addAttribute(PositionIncrementAttribute.class);
  PositionIncrementAttribute rightPos = right.addAttribute(PositionIncrementAttribute.class);

  while (left.incrementToken()) {
    assertTrue("wrong number of tokens for input: " + s, right.incrementToken());
    assertEquals("wrong term text for input: " + s, leftTerm.toString(), rightTerm.toString());
    assertEquals("wrong position for input: " + s, leftPos.getPositionIncrement(), rightPos.getPositionIncrement());
    assertEquals("wrong start offset for input: " + s, leftOffset.startOffset(), rightOffset.startOffset());
    assertEquals("wrong end offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
  };
  assertFalse("wrong number of tokens for input: " + s, right.incrementToken());
  left.end();
  right.end();
  assertEquals("wrong final offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
  left.close();
  right.close();
}
项目:search    文件:TestStopFilter.java   
private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException {
  log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled"));
  stpf.setEnablePositionIncrements(enableIcrements);
  CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
  PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
  stpf.reset();
  for (int i=0; i<20; i+=3) {
    assertTrue(stpf.incrementToken());
    log("Token "+i+": "+stpf);
    String w = English.intToEnglish(i).trim();
    assertEquals("expecting token "+i+" to be "+w,w,termAtt.toString());
    assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement());
  }
  assertFalse(stpf.incrementToken());
  stpf.end();
  stpf.close();
}
项目:search    文件:TestStopAnalyzer.java   
public void testStopListPositions() throws IOException {
  CharArraySet stopWordsSet = new CharArraySet(asSet("good", "test", "analyzer"), false);
  StopAnalyzer newStop = new StopAnalyzer(stopWordsSet);
  String s =             "This is a good test of the english stop analyzer with positions";
  int expectedIncr[] =  { 1,   1, 1,          3, 1,  1,      1,            2,   1};
  TokenStream stream = newStop.tokenStream("test", s);
  try {
    assertNotNull(stream);
    int i = 0;
    CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);

    stream.reset();
    while (stream.incrementToken()) {
      String text = termAtt.toString();
      assertFalse(stopWordsSet.contains(text));
      assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement());
    }
    stream.end();
  } finally {
    IOUtils.closeWhileHandlingException(stream);
  }
}
项目:search    文件:ShingleAnalyzerWrapperTest.java   
public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
  PhraseQuery q = new PhraseQuery();

  TokenStream ts = analyzer.tokenStream("content", "this sentence");
  try {
    int j = -1;

    PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);

    ts.reset();
    while (ts.incrementToken()) {
      j += posIncrAtt.getPositionIncrement();
      String termText = termAtt.toString();
      q.add(new Term("content", termText), j);
    }
    ts.end();
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }

  ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
  int[] ranks = new int[] { 0 };
  compareRanks(hits, ranks);
}
项目:search    文件:TestSnowball.java   
public void testFilterTokens() throws Exception {
  SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
  CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
  OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
  TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
  PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
  FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);

  filter.incrementToken();

  assertEquals("accent", termAtt.toString());
  assertEquals(2, offsetAtt.startOffset());
  assertEquals(7, offsetAtt.endOffset());
  assertEquals("wrd", typeAtt.type());
  assertEquals(3, posIncAtt.getPositionIncrement());
  assertEquals(77, flagsAtt.getFlags());
  assertEquals(new BytesRef(new byte[]{0,1,2,3}), payloadAtt.getPayload());
}
项目:search    文件:SpellingQueryConverter.java   
protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException {
  TokenStream stream = analyzer.tokenStream("", text);
  // TODO: support custom attributes
  CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
  TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
  PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
  OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
  stream.reset();
  while (stream.incrementToken()) {      
    Token token = new Token();
    token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
    token.setOffset(offset + offsetAtt.startOffset(), 
                    offset + offsetAtt.endOffset());
    token.setFlags(flagsAttValue); //overwriting any flags already set...
    token.setType(typeAtt.type());
    token.setPayload(payloadAtt.getPayload());
    token.setPositionIncrement(posIncAtt.getPositionIncrement());
    result.add(token);
  }
  stream.end();
  stream.close();
}
项目:cc-analysis    文件:CcWordsFilterTest.java   
private CharsRef analyze(Analyzer analyzer, String text) throws IOException {
    CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
    try (TokenStream ts = analyzer.tokenStream("", text)) {
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
            int length = termAtt.length();
            if (length == 0) {
                throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
            }
            charsRefBuilder.grow(charsRefBuilder.length() + length + 1); /* current + word + separator */
            if (charsRefBuilder.length() > 0) {
                charsRefBuilder.append(CcWordSet.WORD_SEPARATOR);
            }
            charsRefBuilder.append(termAtt);
        }
        ts.end();
    }
    if (charsRefBuilder.length() == 0) {
        return null;
    }
    charsRefBuilder.append(CcWordSet.WORD_END);
    return charsRefBuilder.get();
}
项目:solr-multilingual-analyzer    文件:MultiLangTokenizer.java   
private void handleTokenStream(Map<Integer, List<Token>> tokenPosMap, TokenStream tokenStream) throws IOException {
    tokenStream.reset();
    int pos = 0;

    CharTermAttribute charTermAttribute = getCharTermAttribute(tokenStream);
    OffsetAttribute offsetAttribute = getOffsetAttribute(tokenStream);
    TypeAttribute typeAttribute = getTypeAttribute(tokenStream);
    PositionIncrementAttribute positionIncrementAttribute = getPositionIncrementAttribute(tokenStream);

    while (tokenStream.incrementToken()) {
        if (null == charTermAttribute || null == offsetAttribute) {
            return;
        }
        Token token = new Token(charTermAttribute.buffer(), 0, charTermAttribute.length(),
                offsetAttribute.startOffset(), offsetAttribute.endOffset());
        if (null != typeAttribute) {
            token.setType(typeAttribute.type());
        }
        pos += null != positionIncrementAttribute ? positionIncrementAttribute.getPositionIncrement() : 1;
        if (!tokenPosMap.containsKey(pos)) {
            tokenPosMap.put(pos, new LinkedList<Token>());
        }
        tokenPosMap.get(pos).add(token);
    }
    tokenStream.close();
}
项目:resource-query-parser    文件:QueryBuilder.java   
/**
 * Creates complex boolean query from the cached tokenstream contents
 */
protected Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator)
        throws IOException {
    BooleanQuery.Builder q = newBooleanQuery();
    List<Term> currentQuery = new ArrayList<>();

    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);

    stream.reset();
    while (stream.incrementToken()) {
        if (posIncrAtt.getPositionIncrement() != 0) {
            add(q, currentQuery, operator);
            currentQuery.clear();
        }
        currentQuery.add(new Term(field, termAtt.getBytesRef()));
    }
    add(q, currentQuery, operator);

    return q.build();
}
项目:resource-query-parser    文件:QueryBuilder.java   
/**
 * Creates simple phrase query from the cached tokenstream contents
 */
protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    builder.setSlop(slop);

    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
    int position = -1;

    stream.reset();
    while (stream.incrementToken()) {
        if (enablePositionIncrements) {
            position += posIncrAtt.getPositionIncrement();
        } else {
            position += 1;
        }
        builder.add(new Term(field, termAtt.getBytesRef()), position);
    }

    return builder.build();
}
项目:community-edition-old    文件:PathTokenFilterTest.java   
public void testAttributesAfterStreamEnd() throws IOException
{
    final String path = "uri1:one";
    StringReader reader = new StringReader(path);
    PathTokenFilter ts = new PathTokenFilter(reader, PathTokenFilter.PATH_SEPARATOR,
            PathTokenFilter.SEPARATOR_TOKEN_TEXT, PathTokenFilter.NO_NS_TOKEN_TEXT,
            PathTokenFilter.NAMESPACE_START_DELIMITER, PathTokenFilter.NAMESPACE_END_DELIMITER, true);

    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
    OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

    // PathTokenFilter.end() will be called after all tokens consumed.
    tokenise(ts, new String[]{"uri1", "one"});

    // Check attributes cleaned up
    assertEquals("", termAtt.toString());
    assertEquals("word", typeAtt.type()); // the default
    assertEquals(0, posIncAtt.getPositionIncrement());
    // Final offset...
    assertEquals(path.length(), offsetAtt.startOffset());
    assertEquals(path.length(), offsetAtt.endOffset());
}
项目:pinyinTokenFilter    文件:TestPinyinTransformTokenFilter.java   
@Test
public void testFull() throws IOException {
    this.filter = new PinyinTransformTokenFilter(tokenizer);
    this.filter.reset();
    int position = 0;
    while (this.filter.incrementToken()) {
        CharTermAttribute termAtt = this.filter.getAttribute(CharTermAttribute.class);
        String token = termAtt.toString();
        int increment = this.filter.getAttribute(PositionIncrementAttribute.class).getPositionIncrement();
        position += increment;
        OffsetAttribute offset = this.filter.getAttribute(OffsetAttribute.class);
        TypeAttribute type = this.filter.getAttribute(TypeAttribute.class);
        System.out.println(position + "[" + offset.startOffset() + "," + offset.endOffset() + "} (" + type
                .type() + ") " + token);
    }
    assertTrue(position == 4);
}
项目:pinyinTokenFilter    文件:TestPinyinTransformTokenFilter.java   
@Test
public void testFullWithNoChineseOut() throws IOException {
    this.filter = new PinyinTransformTokenFilter(tokenizer, false, 1, false);
    this.filter.reset();
    int position = 0;
    while (this.filter.incrementToken()) {
        CharTermAttribute termAtt = this.filter.getAttribute(CharTermAttribute.class);
        String token = termAtt.toString();
        int increment = this.filter.getAttribute(PositionIncrementAttribute.class).getPositionIncrement();
        position += increment;
        OffsetAttribute offset = this.filter.getAttribute(OffsetAttribute.class);
        TypeAttribute type = this.filter.getAttribute(TypeAttribute.class);
        System.out.println(position + "[" + offset.startOffset() + "," + offset.endOffset() + "} (" + type
                .type() + ") " + token);
    }
    assertTrue(position == 3);
}
项目:pinyinTokenFilter    文件:TestPinyinTransformTokenFilter.java   
@Test
public void testShort() throws IOException {
    this.filter = new PinyinTransformTokenFilter(tokenizer, true);
    this.filter.reset();
    int position = 0;
    while (this.filter.incrementToken()) {
        CharTermAttribute termAtt = this.filter.getAttribute(CharTermAttribute.class);
        String token = termAtt.toString();
        int increment = this.filter.getAttribute(PositionIncrementAttribute.class).getPositionIncrement();
        position += increment;
        OffsetAttribute offset = this.filter.getAttribute(OffsetAttribute.class);
        TypeAttribute type = this.filter.getAttribute(TypeAttribute.class);
        System.out.println(position + "[" + offset.startOffset() + "," + offset.endOffset() + "} (" + type
                .type() + ") " + token);
    }
    assertTrue(position == 4);
}
项目:lucenelab    文件:PreAnnotatedTokenFilterTest.java   
private static void assertTokenInfos(TokenStream ts, TokenInfo... infos) throws IOException {
    ts.reset();
    final CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
    final PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
    final PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
    final ByteArrayDataInput in = new ByteArrayDataInput();
    int pos = -1;
    for (final TokenInfo info : infos) {
        assertThat(ts.incrementToken()).isTrue();
        pos += posIncrAtt.getPositionIncrement();
        int len = -1;
        final BytesRef payload = payloadAtt.getPayload();
        if (info.len != -1) {
            assertThat(payload).isNotNull();
            in.reset(payload.bytes);
            len = in.readVInt();
        } else {
            assertThat(payload).isNull();
        }
        assertThat(new TokenInfo(term.toString(), pos, len)).isEqualTo(info);
    }
    assertThat(ts.incrementToken()).isFalse();
}
项目:auto-phrase-tokenfilter    文件:AutoPhrasingTokenFilter.java   
private void emit( char[] token ) {
System.out.println( "emit: " + new String( token ) );
if (replaceWhitespaceWith != null) {
    token = replaceWhiteSpace( token );
}
CharTermAttribute termAttr = getTermAttribute( );
termAttr.setEmpty( );
termAttr.append( new StringBuilder( ).append( token ) );

OffsetAttribute offAttr = getOffsetAttribute( );
if (offAttr != null && offAttr.endOffset() >= token.length){ 
  int start = offAttr.endOffset() - token.length;
  offAttr.setOffset( start, offAttr.endOffset());
}

PositionIncrementAttribute pia = getPositionIncrementAttribute( );
if (pia != null) {
    pia.setPositionIncrement( ++positionIncr );
}

lastEmitted = token;
 }
项目:lucene-korean    文件:KoreanAnalyzerTest.java   
public void testStandardTokenizer() throws Exception {

        String source = "우리나라라면에서부터 일본라면이 파생되었잖니?";
        source = "너는 너는 다시 내게 돌아 올거야. school is a good place 呵呵大笑 呵呵大笑";

        long start = System.currentTimeMillis();

        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        TokenStream stream = analyzer.tokenStream("s", new StringReader(source));
        TokenStream tok = new StandardFilter(Version.LUCENE_36, stream);

        while (tok.incrementToken()) {
            CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
            OffsetAttribute offAttr = stream.getAttribute(OffsetAttribute.class);
            PositionIncrementAttribute posAttr = stream.getAttribute(PositionIncrementAttribute.class);
            TypeAttribute typeAttr = stream.getAttribute(TypeAttribute.class);

            System.out.println(new String(termAttr.buffer(), 0, termAttr.length()));
        }

        System.out.println((System.currentTimeMillis() - start) + "ms");
    }
项目:lucene-korean    文件:KoreanAnalyzerTest.java   
public void testHanjaConvert() throws Exception {

        String source = "呵呵大笑  落落長松 ";

        long start = System.currentTimeMillis();

        KoreanAnalyzer analyzer = new KoreanAnalyzer();
        TokenStream stream = analyzer.tokenStream("s", new StringReader(source));
        TokenStream tok = new KoreanFilter(stream);

        while (tok.incrementToken()) {
            CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
            OffsetAttribute offAttr = stream.getAttribute(OffsetAttribute.class);
            PositionIncrementAttribute posAttr = stream.getAttribute(PositionIncrementAttribute.class);
            TypeAttribute typeAttr = stream.getAttribute(TypeAttribute.class);

            System.out.println(new String(termAttr.buffer()));
        }

        System.out.println((System.currentTimeMillis() - start) + "ms");
    }
项目:NYBC    文件:PrefixAwareTokenFilter.java   
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
  super(suffix);
  this.suffix = suffix;
  this.prefix = prefix;
  prefixExhausted = false;

  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  flagsAtt = addAttribute(FlagsAttribute.class);

  p_termAtt = prefix.addAttribute(CharTermAttribute.class);
  p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
  p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
  p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
  p_typeAtt = prefix.addAttribute(TypeAttribute.class);
  p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
项目:NYBC    文件:TestRemoveDuplicatesTokenFilter.java   
public void testDups(final String expected, final Token... tokens)
  throws Exception {

  final Iterator<Token> toks = Arrays.asList(tokens).iterator();
  final TokenStream ts = new RemoveDuplicatesTokenFilter(
    (new TokenStream() {
        CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
        PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
        @Override
        public boolean incrementToken() {
          if (toks.hasNext()) {
            clearAttributes();
            Token tok = toks.next();
            termAtt.setEmpty().append(tok);
            offsetAtt.setOffset(tok.startOffset(), tok.endOffset());
            posIncAtt.setPositionIncrement(tok.getPositionIncrement());
            return true;
          } else {
            return false;
          }
        }
      }));

  assertTokenStreamContents(ts, expected.split("\\s"));   
}
项目:NYBC    文件:TestDuelingAnalyzers.java   
public void assertEquals(String s, TokenStream left, TokenStream right) throws Exception {
  left.reset();
  right.reset();
  CharTermAttribute leftTerm = left.addAttribute(CharTermAttribute.class);
  CharTermAttribute rightTerm = right.addAttribute(CharTermAttribute.class);
  OffsetAttribute leftOffset = left.addAttribute(OffsetAttribute.class);
  OffsetAttribute rightOffset = right.addAttribute(OffsetAttribute.class);
  PositionIncrementAttribute leftPos = left.addAttribute(PositionIncrementAttribute.class);
  PositionIncrementAttribute rightPos = right.addAttribute(PositionIncrementAttribute.class);

  while (left.incrementToken()) {
    assertTrue("wrong number of tokens for input: " + s, right.incrementToken());
    assertEquals("wrong term text for input: " + s, leftTerm.toString(), rightTerm.toString());
    assertEquals("wrong position for input: " + s, leftPos.getPositionIncrement(), rightPos.getPositionIncrement());
    assertEquals("wrong start offset for input: " + s, leftOffset.startOffset(), rightOffset.startOffset());
    assertEquals("wrong end offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
  };
  assertFalse("wrong number of tokens for input: " + s, right.incrementToken());
  left.end();
  right.end();
  assertEquals("wrong final offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
  left.close();
  right.close();
}