Python jieba 模块，cut_for_search() 实例源码

我们从Python开源项目中，提取了以下20个代码示例，用于说明如何使用jieba.cut_for_search()。

项目：AlphaLogo 作者：gigaflw | 项目源码 | 文件源码

def _index_docs(indexFile, writer):
    for line in indexFile:

        ind, ent_name, info, keywords, imgurl, filename, url = line.split('\t')
        print("adding %s" % ind)

        filename = "{:05d}".format(int(ind)) + '.jpg'
        keywords = keywords.replace('%', ' ')

        ent_name = " ".join(x.strip() for x in jieba.cut_for_search(ent_name))
        keywords = " ".join(x.strip() for x in jieba.cut_for_search(keywords))

        try:
            doc = Document()

            doc.add(Field('ind', ind, Field.Store.YES, Field.Index.NO))
            doc.add(Field('ent_name', ent_name, Field.Store.NO, Field.Index.ANALYZED))
            doc.add(Field('keywords', keywords, Field.Store.NO, Field.Index.ANALYZED))
            # doc.add(Field('n_colors', n_colors, Field.Store.NO, Field.Index.ANALYZED))

            writer.addDocument(doc)

        except Exception, e:
            print("Failed in indexDocs: %r" % e)

项目：Information_retrieva_Projectl- 作者：Google1234 | 项目源码 | 文件源码

def GET(self):
        data=web.input()
        if data:
            searchword=data.searchword
        else:
            searchword=''
        news_list=list()
        topic=list()
        if searchword:
            cut = jieba.cut_for_search(searchword)
            word_list = []
            for word in cut:
                if word not in punct and word not in Letters_and_numbers:
                    word_list.append(word.encode("utf-8"))
            topK=query.calculate(word_list,config.query_return_numbers)
            for k in topK:
                data = dict()
                title, content, url= id_index.get_data(k)
                data['id'] = k
                data['content'] = content.decode("utf-8")[:config.query_return_snipper_size]
                data['title']=title.decode("utf-8")
                data['url'] = url.decode("utf-8")
                news_list.append(data)
            del data,cut,word_list,word,topK,title,content,url
            #word2Vec??????
            word2vec.cal(searchword.encode('utf-8'))
            print word2vec.result.length
            if word2vec.result.length==0:#????????1
                pass
            else:
                for i in range(config.recommand_topic_numbers):
                    topic.append(word2vec.result.word[i].char)
        return render.index(searchword,news_list,topic)

项目：Information_retrieva_Projectl- 作者：Google1234 | 项目源码 | 文件源码

def GET(self):
        data=web.input()
        if data:
            ID=data.id
            news = dict()
            title, content, url=id_index.get_data(int(ID))
            news['content'] = content.decode("utf-8")
            news['title'] = title.decode("utf-8")
            news['url'] = url.decode("utf-8")
            recomand=[]
            #????
            cut = jieba.cut_for_search(content)
            word_list = []
            for word in cut:
                if word not in punct and word not in Letters_and_numbers:
                    # ????????????????????
                    if recommand.stopword.has_key(word.encode("utf-8")):
                        pass
                    else:
                        word_list.append(word.encode("utf-8"))
            topk= recommand.calculate(word_list, config.recommand_numbers, 10)
            for i in topk:#????
            #for i in recommand.dic[int(ID)]:#????
                if i !=int(ID):
                    title, content, url=id_index.get_data(i)
                    recomand.append([title.decode('utf-8'),content.decode('utf-8'),url.decode('utf-8')])
            news['recommand']=recomand
            del title,content,url,recomand
        else:
            ID=''
            news = dict()
            news['title'] = "No Such News"
            news['content'] = "Oh No!"
            news['url'] = "#"
            news['recommand']=[['','',''] for m in range(config.recommand_numbers)]
        return render.news(news)

项目：Information_retrieva_Projectl- 作者：Google1234 | 项目源码 | 文件源码

def calculate(self,doc_id,Top_numbers=10,multiple=10):
        title,content,url=self.index.get_data(doc_id)
        cut=jieba.cut_for_search(content)
        word_list=[]
        for word in cut:
            if  word not in self.punct and word not in self.Letters_and_numbers :
                #????????????????????
                if self.stopword.has_key(word.encode("utf-8")):
                    pass
                else:
                    word_list.append(word.encode("utf-8"))
    return self.FastCos.calculate(word_list,Top_numbers,multiple)

项目：HtmlExtract-Python 作者：xinyi-spark | 项目源码 | 文件源码

def cut_search(data):
    '''
    ?????????????????????????????
    ??????-->??/??/??/????
    '''
    temp_result = jieba.cut_for_search(data)
    temp_result = '/'.join(temp_result)
    return temp_result

项目：AlphaLogo 作者：gigaflw | 项目源码 | 文件源码

def search_func_factory(analyzer, searcher, vm_env):
    """Search function factory"""

    def retrieve(doc):
        return doc.get('ind')

    def search(**kwargs):
        vm_env.attachCurrentThread()
        query = BooleanQuery() 

        print("Searched keywords:")
        for field_name, keywords in kwargs.items():
            # assert field_name in SearchConfig.searchable_fields

            # keywords = list(filter(None, jieba.cut(keywords, cut_all=True)))
            keywords = list(filter(None, (k.strip() for k in jieba.cut_for_search(keywords))))

            for kw in keywords:
                print(kw)

            # construct query
            for kw in keywords:
                q = QueryParser(Version.LUCENE_CURRENT, field_name, analyzer).parse(kw)
                query.add(q, BooleanClause.Occur.SHOULD)

            if field_name == 'keywords':
                for kw in keywords:
                    q = QueryParser(Version.LUCENE_CURRENT, 'ent_name', analyzer).parse(kw)
                    query.add(q, BooleanClause.Occur.SHOULD)

        # search
        scoreDocs = searcher.search(query, 50).scoreDocs

        return [retrieve(searcher.doc(scoreDoc.doc)) for scoreDoc in scoreDocs]

    return search

项目：newsAnalyzer 作者：Rolight | 项目源码 | 文件源码

def CUT(f):
    s = jieba.cut_for_search(f)
    return ' '.join(s)