Java 类org.apache.lucene.analysis.cjk.CJKAnalyzer 实例源码

项目:elasticsearch_my    文件:CjkAnalyzerProvider.java   
public CjkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    CharArraySet stopWords = Analysis.parseStopWords(
        env, indexSettings.getIndexVersionCreated(), settings, CJKAnalyzer.getDefaultStopSet());

    analyzer = new CJKAnalyzer(stopWords);
    analyzer.setVersion(version);
}
项目:Elasticsearch    文件:CjkAnalyzerProvider.java   
@Inject
public CjkAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet());

    analyzer = new CJKAnalyzer(stopWords);
    analyzer.setVersion(version);
}
项目:solr-researcher    文件:TestRemoveOneClauseHeuristic.java   
private Map<Pattern, Analyzer> createCJKAnalyzer() {
  Analyzer analyzer = new CJKAnalyzer();
  Map<Pattern, Analyzer> fieldAnalyzerMaps = new LinkedHashMap<Pattern, Analyzer>();
  Pattern fieldPattern = Pattern.compile("cjk");
  fieldAnalyzerMaps.put(fieldPattern, analyzer);
  return fieldAnalyzerMaps;
}
项目:CadalWorkspace    文件:IndexSearch.java   
/**
 * Create index
 */
@SuppressWarnings("deprecation")
public boolean createIndex(int num) {
    boolean signal = false;
    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_35);
    String path = "E:/topicIndex/querytopic" + num;
    //
    System.out.println("--------createIndex--------");
    //
    File file = new File(path);
    if(file.exists()){
        file.delete();
    }
    file.mkdir();

    try{
        Directory dir = FSDirectory.open(file);

        IndexWriter TextIndex = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
        Document doc;

        for(int i = 0; i < this.arrQueryTopic.size(); ++i) {
            doc = new Document();
            doc.add(new Field("ID", this.arrQueryTopic.get(i).toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));      // ע������ID��String���͵�
            doc.add(new Field("QueryWord", this.arrQueryWord.get(i), Field.Store.YES, Field.Index.ANALYZED));
            doc.add(new Field("Times", this.arrQueryWordTimes.get(i), Field.Store.YES, Field.Index.ANALYZED));
            TextIndex.addDocument(doc);
        }
        TextIndex.optimize();
        TextIndex.close();
        signal = true;
    }catch(Exception e) {
        signal = false;
        e.printStackTrace();
    }

    return signal;
}
项目:CadalWorkspace    文件:IndexSearch.java   
/**
 * Search
 */
@SuppressWarnings({ "deprecation", "static-access" })
public boolean search(int num, String str) {
    boolean signal = false;
    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_35);

    String path = "E:/topicIndex/querytopic" + num;
    File file = new File(path);

    try{
        FSDirectory dir = FSDirectory.open(file);

        IndexSearcher indexSearcher = new IndexSearcher(dir);

        QueryParser parse = new QueryParser(Version.LUCENE_35, "QueryWord", analyzer);
        Query query = parse.parse(str);

        TopDocs topDocs = indexSearcher.search(query, this.QUERYWORDNUM);
        ScoreDoc[] docs = topDocs.scoreDocs;

        //
        for(int i = 0; i < docs.length; ++i) {
            System.out.println(indexSearcher.doc(docs[i].doc).get("ID") + "   " 
                             + indexSearcher.doc(docs[i].doc).get("QueryWord") + "   "
                             + indexSearcher.doc(docs[i].doc).get("Times"));
        }
        //

        indexSearcher.close();
        signal = true;
    }catch(Exception e) {
        e.printStackTrace();
        signal = false;
    }

    return signal;
}
项目:CadalWorkspace    文件:IndexSearch.java   
/**
 * Create index
 */
@SuppressWarnings("deprecation")
public boolean createIndex(int num) {
    boolean signal = false;
    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_35);
    String path = "E:/topicIndex/querytopic" + num;
    //
    System.out.println("--------createIndex--------");
    //
    File file = new File(path);
    if(file.exists()){
        file.delete();
    }
    file.mkdir();

    try{
        Directory dir = FSDirectory.open(file);

        IndexWriter TextIndex = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
        Document doc;

        for(int i = 0; i < this.arrQueryTopic.size(); ++i) {
            doc = new Document();
            doc.add(new Field("ID", this.arrQueryTopic.get(i).toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));      // ע������ID��String���͵�
            doc.add(new Field("QueryWord", this.arrQueryWord.get(i), Field.Store.YES, Field.Index.ANALYZED));
            doc.add(new Field("Times", this.arrQueryWordTimes.get(i), Field.Store.YES, Field.Index.ANALYZED));
            TextIndex.addDocument(doc);
        }
        TextIndex.optimize();
        TextIndex.close();
        signal = true;
    }catch(Exception e) {
        signal = false;
        e.printStackTrace();
    }

    return signal;
}
项目:CadalWorkspace    文件:IndexSearch.java   
/**
 * Search
 */
@SuppressWarnings({ "deprecation", "static-access" })
public boolean search(int num, String str) {
    boolean signal = false;
    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_35);

    String path = "E:/topicIndex/querytopic" + num;
    File file = new File(path);

    try{
        FSDirectory dir = FSDirectory.open(file);

        IndexSearcher indexSearcher = new IndexSearcher(dir);

        QueryParser parse = new QueryParser(Version.LUCENE_35, "QueryWord", analyzer);
        Query query = parse.parse(str);

        TopDocs topDocs = indexSearcher.search(query, this.QUERYWORDNUM);
        ScoreDoc[] docs = topDocs.scoreDocs;

        //
        for(int i = 0; i < docs.length; ++i) {
            System.out.println(indexSearcher.doc(docs[i].doc).get("ID") + "   " 
                             + indexSearcher.doc(docs[i].doc).get("QueryWord") + "   "
                             + indexSearcher.doc(docs[i].doc).get("Times"));
        }
        //

        indexSearcher.close();
        signal = true;
    }catch(Exception e) {
        e.printStackTrace();
        signal = false;
    }

    return signal;
}
项目:hello_luence    文件:TxtFileIndexer.java   
public TxtFileIndexer(String dataDir_s, String indexDir_s) throws Exception {

        dataDir = new File(dataDir_s);

        indexDir = new File(indexDir_s);
        index_dir = FSDirectory.open(indexDir);// (indexDir,null);
        // public static FSDirectory open(File path)throws IOException
        // Creates an FSDirectory instance 创建一个FSDirectory实例

        indexDirCN = new File(indexDir_s + File.separator + "cn");
        index_dir_cn = FSDirectory.open(indexDirCN);

        // -----------初始化第二步:【分析器】和【索引写入器】---------------------
        luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_47);
        // 在文档被索引前,先要对文档内容进行分词处理,由 Analyzer 来做的
        // Analyzer 类是一个抽象类,它有多个实现。针对不同的语言和应用需选择适合的 Analyzer
        // Analyzer 把分词后的内容交给 IndexWriter 来建立索引

        // StandardAnalyzer 标准分析
        // public StandardAnalyzer(Version matchVersion)
        // Builds an analyzer with the default stop words (STOP_WORDS_SET).
        // Parameters:matchVersion - Lucene version to match See above
        indexWriter = new IndexWriter(index_dir, new IndexWriterConfig(
                Version.LUCENE_47, luceneAnalyzer));

        chineseAnalyzer = new CJKAnalyzer(Version.LUCENE_47);// 中文分析器
        indexWriter_cn = new IndexWriter(index_dir_cn, new IndexWriterConfig(
                Version.LUCENE_47, chineseAnalyzer));
    }
项目:elasticsearch_my    文件:CjkAnalyzerProvider.java   
@Override
public CJKAnalyzer get() {
    return this.analyzer;
}
项目:Elasticsearch    文件:CjkAnalyzerProvider.java   
@Override
public CJKAnalyzer get() {
    return this.analyzer;
}
项目:CadalWorkspace    文件:IndexSearch.java   
/**
 * Search -->���ݸ�����һ�����֣��Ӷ���ö�Ӧ��topic����Ȼ����Ӧ�������ļ��в�ѯ����ȡǰ10�����
 */
@SuppressWarnings({ "deprecation", "static-access" })
public boolean search(int num) {
    boolean signal = false;
    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_35);

    String path = "E:/topicIndex/querytopic" + (num + 1);
    File file = new File(path);

    try{
        String topic = this.arrTopic.get(num);
        //
        System.out.println("----------" + topic + "--------" + num);
        //
        FSDirectory dir = FSDirectory.open(file);

        IndexSearcher indexSearcher = new IndexSearcher(dir);

        QueryParser parse = new QueryParser(Version.LUCENE_35, "QueryWord", analyzer);
        Query query = parse.parse(topic);

        TopDocs topDocs = indexSearcher.search(query, this.QUERYWORDNUM);
        ScoreDoc[] docs = topDocs.scoreDocs;

        System.out.println(docs.length);

        this.countTmp = docs.length;

        System.out.println(countTmp);

        // ������ŵ�this.listResult��ȥ
        for(int i = 0; i < this.countTmp; ++i) {
            TopicSearchResult resultTmp = new TopicSearchResult();

            resultTmp.setID(Integer.parseInt(indexSearcher.doc(docs[i].doc).get("ID")));
            resultTmp.setQueryWord(indexSearcher.doc(docs[i].doc).get("QueryWord"));
            resultTmp.setTimes(indexSearcher.doc(docs[i].doc).get("Times"));

            this.listResult[i] = resultTmp;
        }

        System.out.println("---------------" + this.listResult.length + "----------------------");

        signal = true;
    }catch(Exception e)  {
        signal = false;
        e.printStackTrace();
    }

    return signal;
}
项目:CadalWorkspace    文件:IndexSearch.java   
/**
 * Search -->���ݸ�����һ�����֣��Ӷ���ö�Ӧ��topic����Ȼ����Ӧ�������ļ��в�ѯ����ȡǰ10�����
 */
@SuppressWarnings({ "deprecation", "static-access" })
public boolean search(int num) {
    boolean signal = false;
    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_35);

    String path = "C:/data/topicIndex/querytopic" + (num + 1);
    File file = new File(path);

    try{
        String topic = this.arrTopic.get(num);
        //
        System.out.println("----------" + topic + "--------" + num);
        //
        FSDirectory dir = FSDirectory.open(file);

        IndexSearcher indexSearcher = new IndexSearcher(dir);

        QueryParser parse = new QueryParser(Version.LUCENE_35, "QueryWord", analyzer);
        Query query = parse.parse(topic);

        TopDocs topDocs = indexSearcher.search(query, this.QUERYWORDNUM);
        ScoreDoc[] docs = topDocs.scoreDocs;

        System.out.println(docs.length);

        this.countTmp = docs.length;

        System.out.println(countTmp);

        // ������ŵ�this.listResult��ȥ
        for(int i = 0; i < this.countTmp; ++i) {
            TopicSearchResult resultTmp = new TopicSearchResult();

            resultTmp.setID(Integer.parseInt(indexSearcher.doc(docs[i].doc).get("ID")));
            resultTmp.setQueryWord(indexSearcher.doc(docs[i].doc).get("QueryWord"));
            resultTmp.setTimes(indexSearcher.doc(docs[i].doc).get("Times"));

            this.listResult[i] = resultTmp;
        }

        System.out.println("---------------" + this.listResult.length + "----------------------");

        signal = true;
    }catch(Exception e)  {
        signal = false;
        e.printStackTrace();
    }

    return signal;
}
项目:hello_luence    文件:TxtFileIndexer.java   
public static void IndexerInAdvance(String indexDir_s) throws Exception {

        // --------indexWriter初始化---------------
        chineseAnalyzer = new CJKAnalyzer(Version.LUCENE_47);
        // 中文分析器,可以使用其他,庖丁解牛分词器 code.google.com/p/paoding/

        indexDir = new File(indexDir_s);
        nioD = new NIOFSDirectory(indexDir);

        iwc = new IndexWriterConfig(Version.LUCENE_47, chineseAnalyzer);

        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        // Creates a new index if one does not exist
        // otherwise it opens the index and documents will be appended.

        iwc.setRAMBufferSizeMB(2048);// 内存上限
        IndexWriterConfig.setDefaultWriteLockTimeout(10);

        // http://space.itpub.net/28624388/viewspace-766134

    }