Java 类org.apache.lucene.analysis.tokenattributes.CharTermAttribute 实例源码

项目:elasticsearch-analysis-openkoreantext    文件:TokenStreamAssertions.java   
public static void assertTokenStream(TokenStream tokenStream, String[] expectedCharTerms, String[] expectedTypes, int[] expectedStartOffsets, int[] expectedEndOffsets) throws IOException {
    tokenStream.reset();
    int index = 0;
    while (tokenStream.incrementToken() == true) {
        assertEquals(expectedCharTerms[index], tokenStream.getAttribute(CharTermAttribute.class).toString());

        if(expectedTypes != null) {
            assertEquals(expectedTypes[index], tokenStream.getAttribute(TypeAttribute.class).type());
        }

        OffsetAttribute offsets = tokenStream.getAttribute(OffsetAttribute.class);

        if(expectedStartOffsets != null) {
            assertEquals(expectedStartOffsets[index], offsets.startOffset());
        }

        if(expectedEndOffsets != null) {
            assertEquals(expectedEndOffsets[index], offsets.endOffset());
        }

        index++;
    }
    tokenStream.end();
}
项目:improved-journey    文件:TestAnsj.java   
public static void main(String[] args) throws IOException {
    List<Term> parse = ToAnalysis.parse("中华人民 共和国 成立了 ");
    System.out.println(parse);
    List<Term> parse1 = IndexAnalysis.parse("你吃过饭了没有!!!!!吃过无妨论文");


    //System.out.println(parse1);
    String text11="ZW321282050000000325";

    Tokenizer tokenizer = new AnsjTokenizer(new StringReader(text11), 0, true);
    CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = 
            tokenizer.addAttribute(OffsetAttribute.class);
        PositionIncrementAttribute positionIncrementAtt = 
            tokenizer.addAttribute(PositionIncrementAttribute.class);

    tokenizer.reset();
    while (tokenizer.incrementToken()){

          System.out.print(new String(termAtt.toString()+" ") );
        //  System.out.print( offsetAtt.startOffset() + "-" + offsetAtt.endOffset() + "-" );
        //System.out.print( positionIncrementAtt.getPositionIncrement() +"/");

    }
    tokenizer.close();
}
项目:improved-journey    文件:TestAnsj.java   
public static void main(String[] args) throws IOException {
    List<Term> parse = ToAnalysis.parse("中华人民 共和国 成立了 ");
    System.out.println(parse);
    List<Term> parse1 = IndexAnalysis.parse("你吃过饭了没有!!!!!吃过无妨论文");


    //System.out.println(parse1);
    String text11="ZW321282050000000325";

    Tokenizer tokenizer = new AnsjTokenizer(new StringReader(text11), 0, true);
    CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = 
            tokenizer.addAttribute(OffsetAttribute.class);
        PositionIncrementAttribute positionIncrementAtt = 
            tokenizer.addAttribute(PositionIncrementAttribute.class);

    tokenizer.reset();
    while (tokenizer.incrementToken()){

          System.out.print(new String(termAtt.toString()+" ") );
        //  System.out.print( offsetAtt.startOffset() + "-" + offsetAtt.endOffset() + "-" );
        //System.out.print( positionIncrementAtt.getPositionIncrement() +"/");

    }
    tokenizer.close();
}
项目:memory-graph    文件:ElasticsearchSearchQueryBase.java   
private String[] splitStringIntoTerms(String value) {
    try {
        List<String> results = new ArrayList<>();
        try (TokenStream tokens = analyzer.tokenStream("", value)) {
            CharTermAttribute term = tokens.getAttribute(CharTermAttribute.class);
            tokens.reset();
            while (tokens.incrementToken()) {
                String t = term.toString().trim();
                if (t.length() > 0) {
                    results.add(t);
                }
            }
        }
        return results.toArray(new String[results.size()]);
    } catch (IOException e) {
        throw new MemgraphException("Could not tokenize string: " + value, e);
    }
}
项目:elasticsearch-analysis-voikko    文件:VoikkoTokenFilterTests.java   
private List<TokenData> parse(String text) {
    NamedAnalyzer analyzer = getAnalysisService().indexAnalyzers.get("test");

    try {
        try (TokenStream ts = analyzer.tokenStream("test", new StringReader(text))) {
            List<TokenData> result = new ArrayList<>();
            CharTermAttribute charTerm = ts.addAttribute(CharTermAttribute.class);
            OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
            PositionIncrementAttribute position = ts.addAttribute(PositionIncrementAttribute.class);
            ts.reset();
            while (ts.incrementToken()) {
                String original = text.substring(offset.startOffset(), offset.endOffset());
                result.add(token(original, charTerm.toString(), position.getPositionIncrement()));
            }
            ts.end();

            return result;
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
项目:elasticsearch_my    文件:CommonTermsQueryBuilder.java   
private static Query parseQueryString(ExtendedCommonTermsQuery query, Object queryString, String field, Analyzer analyzer,
                                     String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
    // Logic similar to QueryParser#getFieldQuery
    try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
        source.reset();
        CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
        BytesRefBuilder builder = new BytesRefBuilder();
        while (source.incrementToken()) {
            // UTF-8
            builder.copyChars(termAtt);
            query.add(new Term(field, builder.toBytesRef()));
        }
    }

    query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
    query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
    return query;
}
项目:elasticsearch_my    文件:UniqueTokenFilterTests.java   
public void testSimple() throws IOException {
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(t, new UniqueTokenFilter(t));
        }
    };

    TokenStream test = analyzer.tokenStream("test", "this test with test");
    test.reset();
    CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("this"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("test"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("with"));

    assertThat(test.incrementToken(), equalTo(false));
}
项目:elasticsearch_my    文件:CompoundAnalysisTests.java   
private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
    AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {
        @Override
        public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
            return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
        }
    }));
    IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
    Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();

    AllEntries allEntries = new AllEntries();
    allEntries.addText("field1", text, 1.0f);

    TokenStream stream = AllTokenStream.allTokenStream("_all", text, 1.0f, analyzer);
    stream.reset();
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);

    List<String> terms = new ArrayList<>();
    while (stream.incrementToken()) {
        String tokText = termAtt.toString();
        terms.add(tokText);
    }
    return terms;
}
项目:elasticsearch_my    文件:DocumentFieldMapperTests.java   
@Override
protected TokenStreamComponents createComponents(String fieldName) {
    Tokenizer tokenizer = new Tokenizer() {
        boolean incremented = false;
        CharTermAttribute term = addAttribute(CharTermAttribute.class);

        @Override
        public boolean incrementToken() throws IOException {
            if (incremented) {
                return false;
            }
            term.setLength(0).append(output);
            incremented = true;
            return true;
        }
    };
    return new TokenStreamComponents(tokenizer);
}
项目:elasticsearch_my    文件:SimplePolishTokenFilterTests.java   
private void testToken(String source, String expected) throws IOException {
    Index index = new Index("test", "_na_");
    Settings settings = Settings.builder()
            .put("index.analysis.filter.myStemmer.type", "polish_stem")
            .build();
    TestAnalysis analysis = createTestAnalysis(index, settings, new AnalysisStempelPlugin());

    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myStemmer");

    Tokenizer tokenizer = new KeywordTokenizer();
    tokenizer.setReader(new StringReader(source));
    TokenStream ts = filterFactory.create(tokenizer);

    CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    assertThat(ts.incrementToken(), equalTo(true));

    assertThat(term1.toString(), equalTo(expected));
}
项目:elasticsearch_my    文件:SimpleIcuCollationTokenFilterTests.java   
private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException {
    CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class);
    CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class);

    stream1.reset();
    stream2.reset();

    assertThat(stream1.incrementToken(), equalTo(true));
    assertThat(stream2.incrementToken(), equalTo(true));
    assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison)));
    assertThat(stream1.incrementToken(), equalTo(false));
    assertThat(stream2.incrementToken(), equalTo(false));

    stream1.end();
    stream2.end();

    stream1.close();
    stream2.close();
}
项目:vind    文件:FieldAnalyzerService.java   
/**
 * analyzes string like the given field
 * @param field the name of the field
 * @param value the string to analyze
 * @return the analyzed string
 */
public static String analyzeString(SolrCore core, String field, String value) {
    try {
        StringBuilder b = new StringBuilder();
        try (TokenStream ts = core.getLatestSchema().getFieldType(field).getQueryAnalyzer().tokenStream(field, new StringReader(value))) {
            ts.reset();
            while (ts.incrementToken()) {
                b.append(" ");
                CharTermAttribute attr = ts.getAttribute(CharTermAttribute.class);
                b.append(attr);
            }
        }

        return b.toString().trim();
    } catch (IOException e) {
        //FIXME: This error should be properly logged!
        e.printStackTrace();
        return value;
    }
}
项目:apache    文件:AnalyzerTest.java   
@After
public void after(){

    if(analyzer != null){
        try {
            TokenStream ts = analyzer.tokenStream("field", text);
            CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);
            ts.reset();
            int i = 0;
            while (ts.incrementToken()) {
                i++;
                System.out.print(ch.toString() + "\t");
                if(i % 7 == 0){
                    System.out.println();
                }
            }
            ts.end();
            ts.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
项目:lams    文件:SlowSynonymFilterFactory.java   
private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws IOException{
  StringReader reader = new StringReader( source );
  TokenStream ts = loadTokenizer(tokFactory, reader);
  List<String> tokList = new ArrayList<>();
  try {
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()){
      if( termAtt.length() > 0 )
        tokList.add( termAtt.toString() );
    }
  } finally{
    reader.close();
  }
  return tokList;
}
项目:lams    文件:DateRecognizerSinkFilter.java   
@Override
public boolean accept(AttributeSource source) {
  if (termAtt == null) {
    termAtt = source.addAttribute(CharTermAttribute.class);
  }
  try {
    Date date = dateFormat.parse(termAtt.toString());//We don't care about the date, just that we can parse it as a date
    if (date != null) {
      return true;
    }
  } catch (ParseException e) {

  }

  return false;
}
项目:lams    文件:PrefixAwareTokenFilter.java   
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
  super(suffix);
  this.suffix = suffix;
  this.prefix = prefix;
  prefixExhausted = false;

  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  flagsAtt = addAttribute(FlagsAttribute.class);

  p_termAtt = prefix.addAttribute(CharTermAttribute.class);
  p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
  p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
  p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
  p_typeAtt = prefix.addAttribute(TypeAttribute.class);
  p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
项目:lucene-bo    文件:TibetanAnalyzerTest.java   
static private void assertTokenStream(TokenStream tokenStream, List<String> expected) {
    try {
        List<String> termList = new ArrayList<String>();
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        while (tokenStream.incrementToken()) {
            termList.add(charTermAttribute.toString());
        }
        System.out.println(String.join(" ", termList));
        assertThat(termList, is(expected));
    } catch (IOException e) {
        assertTrue(false);
    }
}
项目:Elasticsearch    文件:CommonTermsQueryParser.java   
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String field, QueryParseContext parseContext,
        Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
    // Logic similar to QueryParser#getFieldQuery
    int count = 0;
    try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
        source.reset();
        CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
        BytesRefBuilder builder = new BytesRefBuilder();
        while (source.incrementToken()) {
            // UTF-8
            builder.copyChars(termAtt);
            query.add(new Term(field, builder.toBytesRef()));
            count++;
        }
    }

    if (count == 0) {
        return null;
    }
    query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
    query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
    return query;
}
项目:ontonethub    文件:AbstractIndexingJob.java   
protected String lemmatize(String query) {
    ItalianAnalyzer analyzer = new ItalianAnalyzer();
    TokenStream tokenStream = analyzer.tokenStream("label", query);


    StringBuilder sb = new StringBuilder();
    CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class);
    try {
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            if (sb.length() > 0) {
                sb.append(" ");
            }
            sb.append(token.toString());
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    return sb.toString();
}
项目:elasticsearch-dynamic-synonym    文件:SimpleSynonymMap.java   
private Set<String> analyze(String text) throws IOException {
    Set<String> result = new HashSet<String>();
    Analyzer analyzer = configuration.getAnalyzer();
    try (TokenStream ts = analyzer.tokenStream("", text)) {
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
            int length = termAtt.length();
            if (length == 0) {
                throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
            }
            if (posIncAtt.getPositionIncrement() != 1) {
                throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1");
            }

            result.add(new String(termAtt.buffer(), 0, termAtt.length()));
        }

        ts.end();
        return result;
    }
}
项目:hinglish-stemmer    文件:HinglishTokenFilter.java   
public static void stemHinglish(CharTermAttribute termAtt)
{
    char [] buffer                  = termAtt.buffer();
    String strInput         = new String(termAtt.toString());
    //System.out.println("Before " + strInput + " " + termAtt.toString());
    Iterator itr            = lsRegexs.iterator();
    while (itr.hasNext())
    {
            List<Object> lsInputs   = (List<Object>)itr.next();
            Matcher matcher     = ((Pattern)lsInputs.get(0)).matcher(strInput);
            if (matcher.matches())
            {
                    Matcher replMatcher = ((Pattern)lsInputs.get(1)).matcher(strInput);
                    strInput        = replMatcher.replaceAll((String)lsInputs.get(2));
            }
    }

    //strInput = strInput.trim();
    for (int iCounter = 0; iCounter < strInput.length(); iCounter++)
    {
        buffer[iCounter] = strInput.charAt(iCounter);
    }
    termAtt.setLength(strInput.length());
    //System.out.println("After " + strInput + " " + termAtt.toString());
}
项目:Sefaria-ElasticSearch    文件:PrefixFilter.java   
@Override
public boolean incrementToken() throws IOException {

    if (savePrevToken())
        return true;

    // Reached the end of the token stream being processed
    if ( ! this.input.incrementToken()) {
        return false;
    }

    // Get text of the current token and remove any leading/trailing whitespace.
    String currToken =
            this.input.getAttribute(CharTermAttribute.class).toString().trim();

    if (! hebTokAttribute.isExact()) {
        List<String> prefixedStrippedList = getPrefixStrippedList(currToken);
        for (String preStrip : prefixedStrippedList) {
            previousTokens.add(preStrip);
        }

        savePrevToken();
    }
    return true;
}
项目:Sefaria-ElasticSearch    文件:StopLetterFilter.java   
@Override
public boolean incrementToken() throws IOException {
    // Reached the end of the token stream being processed
    if ( ! this.input.incrementToken()) {
        return false;
    }

    String currToken =
            this.input.getAttribute(CharTermAttribute.class).toString().trim();
    if ( ! this.hebTokAttribute.isExact()) {
        this.charTermAttribute.setEmpty().append(this.slr.filterStopLetters(currToken));
        this.hebTokAttribute.setExact(false);
    }

    return true;
}
项目:Sefaria-ElasticSearch    文件:SefariaNGramTokenFilter.java   
@Override
public boolean incrementToken() throws IOException {

    if (savePrevToken())
        return true;

    // Reached the end of the token stream being processed
    if ( ! this.input.incrementToken()) {
        return false;
    }

    // Get text of the current token and remove any leading/trailing whitespace.
    String currToken =
            this.input.getAttribute(CharTermAttribute.class).toString().trim();

    if (! hebTokAttribute.isExact()) {
        List<String> ngrams = ngramizer.ngramize(currToken);
        for (String ngram : ngrams) {
            previousTokens.add(ngram);
        }

        savePrevToken();
    }

    return true;
}
项目:Sefaria-ElasticSearch    文件:SofitLetterFilter.java   
@Override
public boolean incrementToken() throws IOException {
    // Reached the end of the token stream being processed
    if ( ! this.input.incrementToken()) {
        return false;
    }

    String currToken =
            this.input.getAttribute(CharTermAttribute.class).toString().trim();
    if ( ! this.hebTokAttribute.isExact()) {
        this.charTermAttribute.setEmpty().append(replaceSofits(currToken));
        this.hebTokAttribute.setExact(false);
    }

    return true;
}
项目:Sefaria-ElasticSearch    文件:StopLetterFilter.java   
@Override
public boolean incrementToken() throws IOException {
    // Reached the end of the token stream being processed
    if ( ! this.input.incrementToken()) {
        return false;
    }

    String currToken =
            this.input.getAttribute(CharTermAttribute.class).toString().trim();
    if ( ! this.hebTokAttribute.isExact()) {
        this.charTermAttribute.setEmpty().append(filterStopLetters(currToken));
        this.hebTokAttribute.setExact(false);
    }

    return true;
}
项目:Sefaria-ElasticSearch    文件:SefariaNGramTokenFilter.java   
@Override
public boolean incrementToken() throws IOException {

    if (savePrevToken())
        return true;

    // Reached the end of the token stream being processed
    if ( ! this.input.incrementToken()) {
        return false;
    }

    // Get text of the current token and remove any leading/trailing whitespace.
    String currToken =
            this.input.getAttribute(CharTermAttribute.class).toString().trim();

    if (! hebTokAttribute.isExact()) {
        List<String> ngrams = ngramizer.ngramize(currToken);
        for (String ngram : ngrams) {
            previousTokens.add(ngram);
        }

        savePrevToken();
    }

    return true;
}
项目:Sefaria-ElasticSearch    文件:PrefixFilter.java   
@Override
public boolean incrementToken() throws IOException {

    if (savePrevToken())
        return true;

    // Reached the end of the token stream being processed
    if ( ! this.input.incrementToken()) {
        return false;
    }

    // Get text of the current token and remove any leading/trailing whitespace.
    String currToken =
            this.input.getAttribute(CharTermAttribute.class).toString().trim();

    if (! hebTokAttribute.isExact()) {
        List<String> prefixedStrippedList = getPrefixStrippedList(currToken);
        for (String preStrip : prefixedStrippedList) {
            previousTokens.add(preStrip);
        }

        savePrevToken();
    }
    return true;
}
项目:Sefaria-ElasticSearch    文件:StopLetterFilter.java   
@Override
public boolean incrementToken() throws IOException {
    // Reached the end of the token stream being processed
    if ( ! this.input.incrementToken()) {
        return false;
    }

    String currToken =
            this.input.getAttribute(CharTermAttribute.class).toString().trim();
    if ( ! this.hebTokAttribute.isExact()) {
        this.charTermAttribute.setEmpty().append(this.slr.filterStopLetters(currToken));
        this.hebTokAttribute.setExact(false);
    }

    return true;
}
项目:Sefaria-ElasticSearch    文件:SefariaNGramTokenFilter.java   
@Override
public boolean incrementToken() throws IOException {

    if (savePrevToken())
        return true;

    // Reached the end of the token stream being processed
    if ( ! this.input.incrementToken()) {
        return false;
    }

    // Get text of the current token and remove any leading/trailing whitespace.
    String currToken =
            this.input.getAttribute(CharTermAttribute.class).toString().trim();

    if (! hebTokAttribute.isExact()) {
        List<String> ngrams = ngramizer.ngramize(currToken);
        for (String ngram : ngrams) {
            previousTokens.add(ngram);
        }

        savePrevToken();
    }

    return true;
}
项目:Sefaria-ElasticSearch    文件:SofitLetterFilter.java   
@Override
public boolean incrementToken() throws IOException {
    // Reached the end of the token stream being processed
    if ( ! this.input.incrementToken()) {
        return false;
    }

    String currToken =
            this.input.getAttribute(CharTermAttribute.class).toString().trim();
    if ( ! this.hebTokAttribute.isExact()) {
        this.charTermAttribute.setEmpty().append(replaceSofits(currToken));
        this.hebTokAttribute.setExact(false);
    }

    return true;
}
项目:voice-IT    文件:DroneActionLookup.java   
private String removeStopWords(String action) {
    StringBuilder builder = new StringBuilder();
    try {
        FrenchAnalyzer frenchAnalyzer = new FrenchAnalyzer();
        TokenStream tokenStream = frenchAnalyzer.tokenStream("contents", action);
        CharTermAttribute attribute = tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            String term = attribute.toString();
            builder.append(term + " ");
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return builder.toString();
}
项目:STEM    文件:LuceneDatabase.java   
/**
 * Parses the query. Using this instead of a QueryParser in order
 * to avoid thread-safety issues with Lucene's query parser.
 *
 * @param fieldName the name of the field
 * @param value the value of the field
 * @return the parsed query
 */
private Query parseTokens(String fieldName, String value) {
  BooleanQuery searchQuery = new BooleanQuery();
  if (value != null) {
    Analyzer analyzer = new KeywordAnalyzer();

    try {
      TokenStream tokenStream =
        analyzer.tokenStream(fieldName, new StringReader(value));
      tokenStream.reset();
      CharTermAttribute attr =
        tokenStream.getAttribute(CharTermAttribute.class);

      while (tokenStream.incrementToken()) {
        String term = attr.toString();
        Query termQuery = new TermQuery(new Term(fieldName, term));
        searchQuery.add(termQuery, Occur.SHOULD);
      }
    } catch (IOException e) {
      throw new DukeException("Error parsing input string '" + value + "' " +
                              "in field " + fieldName);
    }
  }

  return searchQuery;
}
项目:elasticsearch-analysis-ltp    文件:LTPTokenizer.java   
/**
 * Lucene constructor
 *
 * @throws UnirestException
 * @throws JSONException
 * @throws IOException
 */
public LTPTokenizer(Set<String> filter)
        throws IOException, JSONException, UnirestException {
    super();
    logger.info("LTPTokenizer Initialize......");
    // Add token offset attribute
    offsetAttr = addAttribute(OffsetAttribute.class);
    // Add token content attribute
    charTermAttr = addAttribute(CharTermAttribute.class);
    // Add token type attribute
    typeAttr = addAttribute(TypeAttribute.class);
    // Add token position attribute
    piAttr = addAttribute(PositionIncrementAttribute.class);
    // Create a new word segmenter to get tokens
    LTPSeg = new LTPWordSegmenter(input);
    // Add filter words set
    this.filter = filter;
}
项目:elasticsearch-analysis-lc-pinyin    文件:PinyinAnalysisTest.java   
@Test
public void testSearch() throws IOException {
    LcPinyinAnalyzer analyzer = new LcPinyinAnalyzer(AnalysisSetting.search);
    TokenStream tokenStream = analyzer.tokenStream("lc", "重qing");

    CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAttribute = tokenStream.getAttribute(OffsetAttribute.class);
    PositionIncrementAttribute positionIncrementAttribute = tokenStream.getAttribute(PositionIncrementAttribute.class);

    tokenStream.reset();
    Assert.assertTrue(tokenStream.incrementToken());
    Assert.assertEquals(charTermAttribute.toString(), "重");
    Assert.assertEquals(offsetAttribute.startOffset(), 0);
    Assert.assertEquals(offsetAttribute.endOffset(), 1);
    Assert.assertEquals(positionIncrementAttribute.getPositionIncrement(), 1);

    Assert.assertTrue(tokenStream.incrementToken());
    Assert.assertEquals(charTermAttribute.toString(), "qing");
    Assert.assertEquals(offsetAttribute.startOffset(), 1);
    Assert.assertEquals(offsetAttribute.endOffset(), 5);
    Assert.assertEquals(positionIncrementAttribute.getPositionIncrement(), 1);

    tokenStream.close();
}
项目:elasticsearch-analysis-lc-pinyin    文件:PinyinFilterTest.java   
public void testFullPinyinFilter() throws IOException {

        LcPinyinAnalyzer analyzer = new LcPinyinAnalyzer(AnalysisSetting.search);
        TokenStream tokenStream = analyzer.tokenStream("lc", "作者 : 陈楠");

        LcPinyinTokenFilter lcPinyinTokenFilter = new LcPinyinTokenFilter(tokenStream, PinyinFilterSetting.full_pinyin);

        CharTermAttribute charTermAttribute = lcPinyinTokenFilter.getAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAttribute = lcPinyinTokenFilter.getAttribute(OffsetAttribute.class);
        PositionIncrementAttribute positionIncrementAttribute = lcPinyinTokenFilter.getAttribute(PositionIncrementAttribute.class);

        lcPinyinTokenFilter.reset();
        while (lcPinyinTokenFilter.incrementToken()) {
            System.out.println(charTermAttribute.toString() + ":" + offsetAttribute.startOffset() + "," + offsetAttribute.endOffset() + ":" + positionIncrementAttribute.getPositionIncrement());
        }
        lcPinyinTokenFilter.close();
    }
项目:elasticsearch-analysis-lc-pinyin    文件:PinyinFilterTest.java   
public void testFirstLetterFilter() throws IOException {

        LcPinyinAnalyzer analyzer = new LcPinyinAnalyzer(AnalysisSetting.search);
        TokenStream tokenStream = analyzer.tokenStream("lc", "作者 : 陈楠");

        LcPinyinTokenFilter lcPinyinTokenFilter = new LcPinyinTokenFilter(tokenStream, PinyinFilterSetting.first_letter);

        CharTermAttribute charTermAttribute = lcPinyinTokenFilter.getAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAttribute = lcPinyinTokenFilter.getAttribute(OffsetAttribute.class);
        PositionIncrementAttribute positionIncrementAttribute = lcPinyinTokenFilter.getAttribute(PositionIncrementAttribute.class);

        lcPinyinTokenFilter.reset();
        while (lcPinyinTokenFilter.incrementToken()) {
            System.out.println(charTermAttribute.toString() + ":" + offsetAttribute.startOffset() + "," + offsetAttribute.endOffset() + ":" + positionIncrementAttribute.getPositionIncrement());
        }
        lcPinyinTokenFilter.close();
    }
项目:fastcatsearch3    文件:BasicHighlightAndSummary.java   
public WrappedTokenStream(TokenStream tokenStream, String pText) {
    this.pText = pText;
    this.tokenStream = tokenStream;
    if(tokenStream.hasAttribute(CharTermAttribute.class)) {
        charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
    }
    if(tokenStream.hasAttribute(OffsetAttribute.class)) {
        offsetAttribute = tokenStream.getAttribute(OffsetAttribute.class);
    }
    if(tokenStream.hasAttribute(CharsRefTermAttribute.class)) {
        charsRefTermAttribute = tokenStream.getAttribute(CharsRefTermAttribute.class);
    }

    if(tokenStream.hasAttribute(AdditionalTermAttribute.class)) {
        additionalTermAttribute = tokenStream.getAttribute(AdditionalTermAttribute.class);
    }

    additionalTermAttributeLocal.init(this);
}
项目:fastcatsearch3    文件:PrimaryWordAnalyzerTest.java   
@Test
public void test() throws IOException {
    PrimaryWordAnalyzer analyzer = new PrimaryWordAnalyzer();
    String text = "서울 지하철(300만명)";
    TokenStream tokenStream = analyzer.tokenStream("", new StringReader(text));
    CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
    assertTrue(tokenStream.incrementToken());
    assertEquals("서울", charTermAttribute.toString());

    assertTrue(tokenStream.incrementToken());
    assertEquals("지하철", charTermAttribute.toString());

    assertTrue(tokenStream.incrementToken());
    assertEquals("300", charTermAttribute.toString());

    assertTrue(tokenStream.incrementToken());
    assertEquals("만명", charTermAttribute.toString());

    assertFalse(tokenStream.incrementToken());

}
项目:fastcatsearch3    文件:CSVAnalyzerTest.java   
@Test
public void testBulk() throws IOException {
    String str = "";
    str = "SK,  하이닉스";
    //str = "하이닉스";

    StringReader input = new StringReader(str);
    CSVAnalyzer analyzer = new CSVAnalyzer();
    TokenStream tokenStream = analyzer.tokenStream("", input);
    tokenStream.reset();
    logger.debug("tokenStream:{}", tokenStream);
    CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAttribute = tokenStream.getAttribute(OffsetAttribute.class);
    for(int inx=0;tokenStream.incrementToken();inx++) {
        String term = charTermAttribute.toString();
        logger.debug("[{}] \"{}\" {}~{}", inx, term, offsetAttribute.startOffset(), offsetAttribute.endOffset());
    }
    analyzer.close();
}