Java 类org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute 实例源码

项目：analyzers-ja 文件：PosConcatenationFilterTest.java

@Test
public void testNoPos() throws IOException {
    final Set<String> posTags = new HashSet<>();
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(final String fieldName) {
            final Tokenizer tokenizer = new JapaneseTokenizer(null, false, JapaneseTokenizer.Mode.SEARCH);
            final PartOfSpeechAttribute posAtt = tokenizer.addAttribute(PartOfSpeechAttribute.class);
            return new TokenStreamComponents(tokenizer,
                    new PosConcatenationFilter(tokenizer, posTags, new PosConcatenationFilter.PartOfSpeechSupplier() {
                @Override
                public String get() {
                    return posAtt.getPartOfSpeech();
                }
            }));
        }
    };

    assertAnalyzesTo(analyzer, "明日は詳細設計です。", //
            new String[] { "明日", "は", "詳細", "設計", "です", "。" }, //
            new int[] { 0, 2, 3, 5, 7, 9 }, //
            new int[] { 2, 3, 5, 7, 9, 10 }, //
            new int[] { 1, 1, 1, 1, 1, 1 });

}

项目：analyzers-ja 文件：PosConcatenationFilterTest.java

@Test
public void testBasic() throws IOException {
    final Set<String> posTags = new HashSet<>();
    posTags.add("名詞-副詞可能");
    posTags.add("名詞-形容動詞語幹");
    posTags.add("名詞-サ変接続");
    posTags.add("名詞-一般");
    posTags.add("名詞-接尾-一般");
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(final String fieldName) {
            final Tokenizer tokenizer = new JapaneseTokenizer(null, false, JapaneseTokenizer.Mode.SEARCH);
            final PartOfSpeechAttribute posAtt = tokenizer.addAttribute(PartOfSpeechAttribute.class);
            return new TokenStreamComponents(tokenizer,
                    new PosConcatenationFilter(tokenizer, posTags, new PosConcatenationFilter.PartOfSpeechSupplier() {
                @Override
                public String get() {
                    return posAtt.getPartOfSpeech();
                }
            }));
        }
    };

    assertAnalyzesTo(analyzer, "歯科医院の歯科衛生士", //
            new String[] { "歯科医院", "の", "歯科衛生士" }, //
            new int[] { 0, 4, 5 }, //
            new int[] { 4, 5, 10 }, //
            new int[] { 1, 1, 1 });

    assertAnalyzesTo(analyzer, "明日は詳細設計です。", //
            new String[] { "明日", "は", "詳細設計", "です", "。" }, //
            new int[] { 0, 2, 3, 7, 9 }, //
            new int[] { 2, 3, 7, 9, 10 }, //
            new int[] { 1, 1, 1, 1, 1 });
}

项目：easyjasub 文件：LuceneParser.java

private void addAttributes(TokenStream tokenStream) {
    tokenStream.addAttribute(OffsetAttribute.class);
    tokenStream.addAttribute(ReadingAttribute.class);
    tokenStream.addAttribute(PartOfSpeechAttribute.class);
    tokenStream.addAttribute(InflectionAttribute.class);
    tokenStream.addAttribute(BaseFormAttribute.class);
}

项目：easyjasub 文件：LuceneParser.java

private void readPartOfSpeech(TokenStream tokenStream, LuceneToken token) {
    PartOfSpeechAttribute partOfSpeech = tokenStream
            .getAttribute(PartOfSpeechAttribute.class);
    if (partOfSpeech != null) {
        String str = partOfSpeech.getPartOfSpeech();
        if (str != null) {
            token.setPartOfSpeech(LuceneUtil.translatePartOfSpeech(str));
        }
    }
}

项目：elasticsearch-analysis-ja 文件：PosConcatenationFilterFactory.java

@Override
public TokenStream create(final TokenStream tokenStream) {
    final PartOfSpeechAttribute posAtt = tokenStream.addAttribute(PartOfSpeechAttribute.class);
    return new PosConcatenationFilter(tokenStream, posTags, () -> posAtt.getPartOfSpeech());
}