Python jieba 模块，add_word() 实例源码

我们从Python开源项目中，提取了以下14个代码示例，用于说明如何使用jieba.add_word()。

项目：entity-linker 作者：seucs | 项目源码 | 文件源码

def getData(Mentions,S,E,contextMention,contextEntity, id):
    for mention in Mentions:
        jieba.add_word(mention.name)

        S.append(mention.name)
        id.append('-') #????????????
        contextMention[mention.name] = mention.context
    for item in Mentions:
        temp = []
        cnt = 0
        for candidate in item.candidates:
            if cnt > 100:
                break
            cnt += 1
            temp.append(candidate.title)
            contextEntity[candidate.title] = candidate.context

            id.append(candidate.id)
        E.append(temp)

# element{???mention/entity}
# ??element

项目：KnowledgeGraph 作者：SilverHelmet | 项目源码 | 文件源码

def test():
    x = u"??????????????????Nintendo???2006-11-02???????????????????????????3???????????????????????????????????????"
    x = u'???????????????'
    sentences = split_sentences(x)
    # jieba.add_word(u'????????', 5, 'baike')
    # jieba.add_word(u'Nintendo', 5, 'baike')
    # jieba.add_word(u'????', 5, 'baike')
    # jieba.add_word(u'???', 5, 'baike')
    # jieba.add_word(u'????', 5, 'baike')

    # name2fb_path = os.path.join(cache_dir, 'DatasetFinder.name2fb.sample.cache')
    # fb_ttls_path = os.path.join(cache_dir, 'DatasetFinder.fb_ttls.sample.cache')
    name2fb_path = os.path.join(cache_dir, 'DatasetFinder.name2fb.cache')
    fb_ttls_path = os.path.join(cache_dir, 'DatasetFinder.fb_ttls.cache')
    finder = DatasetFinder.load_from_cache(name2fb_path, fb_ttls_path)
    for x in gen_dataset(sentences[0], finder):
        print x
    print '-' * 50
    for x in gen_dataset(sentences[1], finder):
        print x

项目：HtmlExtract-Python 作者：xinyi-spark | 项目源码 | 文件源码

def add_word_dict(word, freq=None, tag=None):
    '''
    ?????????
    '''
    jieba.add_word(word, freq=None, tag=None)

项目：PTTChatBot_DL2017 作者：thisray | 项目源码 | 文件源码

def jiebaCustomSetting(self, dict_path, usr_dict_path):

        jieba.set_dictionary(dict_path)
        with open(usr_dict_path, 'r', encoding='utf-8') as dic:
            for word in dic:
                jieba.add_word(word.strip('\n'))

项目：PTTChatBot_DL2017 作者：thisray | 项目源码 | 文件源码

def TaibaCustomSetting(self, usr_dict):

        with open(usr_dict, 'r', encoding='utf-8') as dic:
            for word in dic:
                Taiba.add_word(word.strip('\n'))

项目：FineGrainedOpinionMining 作者：chaoming0625 | 项目源码 | 文件源码

def __init():
    user_dict_path = os.path.join(root_filepath, "f_seg/user_dict.txt")
    jieba.load_userdict(user_dict_path)
    jieba.add_word(u"??", 10000)
    jieba.suggest_freq((u"?", u"??"))
    jieba.suggest_freq((u"??", u"??"))
    jieba.suggest_freq((u"??", u"??"))
    jieba.suggest_freq((u"??", u"?"))

项目：FineGrainedOpinionMining 作者：chaoming0625 | 项目源码 | 文件源码

def __init():
    user_dict_path = os.path.join(root_filepath, "f_seg/user_dict.txt")
    jieba.load_userdict(user_dict_path)
    jieba.add_word("??", 10000)
    jieba.suggest_freq(("?", "??"))
    jieba.suggest_freq(("??", "??"))
    jieba.suggest_freq(("??", "??"))
    jieba.suggest_freq(("??", "?"))

项目：Neural-Headline-Generator-CN 作者：QuantumLiu | 项目源码 | 文件源码

def cut(text,custom_words=['FLOAT','TIME','DATE','EOS']):
    jieba.enable_parallel(32)
    for word in custom_words:
        jieba.add_word(word)
    words=jieba.lcut(text)
    return words

项目：slack_emoji_bot 作者：linnil1 | 项目源码 | 文件源码

def __init__(self, slack, custom):
        self.slack = slack
        self.rundata = custom['data']
        self.colorPrint = custom['colorPrint']

        self.food_dir = "data/midnight.json"
        self.food_dic = "data/dict.txt.big"

        # find midnight channel
        self.nochannel = False
        rep = self.slack.api_call("channels.list")
        self.channel_id = ""
        for c in rep['channels']:
            if c['name'].lower() == custom['food_channelname']:
                self.channel_id = c['id']
                break
        if not self.channel_id:
            self.colorPrint(
                "No midnight channel",
                "Restart when midnight channel can use",
                color="FAIL")
            self.nochannel = True
            return

        jieba.set_dictionary(self.food_dic)
        jieba.initialize()

        # add and del words
        for word in self.rundata.get('FOOD_addword'):
            jieba.add_word(word)
        for word in self.rundata.get('FOOD_delword'):
            jieba.del_word(word)

        self.init()

项目：TextClassification 作者：mosu027 | 项目源码 | 文件源码

def __init__(self):
        self.negative = []
        self.adverb = []
        self.questionMark = []
        self.rootPath = "E:\workout\data\senitment_data"
        self.wordtypeDict, self.wordfreqDict = self.UserDefineLibrary()
        for word in self.wordfreqDict.keys():
            jieba.add_word(str(word))
        self.initialize()

    # @staticmethod

项目：PTT-Chat-Generator 作者：zake7749 | 项目源码 | 文件源码

def jiebaCustomSetting(self, dict_path, usr_dict_path):

        jieba.set_dictionary(dict_path)
        with open(usr_dict_path, 'r', encoding='utf-8') as dic:
            for word in dic:
                jieba.add_word(word.strip('\n'))

项目：PTT-Chat-Generator 作者：zake7749 | 项目源码 | 文件源码

def TaibaCustomSetting(self, usr_dict):

        with open(usr_dict, 'r', encoding='utf-8') as dic:
            for word in dic:
                Taiba.add_word(word.strip('\n'))

项目：WaiMaiOpinionMiner 作者：chaoming0625 | 项目源码 | 文件源码

def __init():
    user_dict_path = os.path.join(root_filepath, "f_seg/user_dict.txt")
    jieba.load_userdict(user_dict_path)
    jieba.add_word("??", 10000)
    jieba.suggest_freq(("?", "??"))
    jieba.suggest_freq(("??", "??"))
    jieba.suggest_freq(("??", "??"))
    jieba.suggest_freq(("??", "?"))

项目：slack_emoji_bot 作者：linnil1 | 项目源码 | 文件源码

def main(self, datadict):
        if self.nochannel:
            return
        if datadict['type'] == 'message' and \
                datadict.get('subtype') == "file_share" and \
                datadict.get('channel') == self.channel_id:
            self.imageAdd(datadict['file'])
        if not datadict['type'] == 'message' or 'subtype' in datadict:
            return
        if datadict['text'].startswith("food "):
            text = re.search(
                r"(?<=food ).*", datadict['text'], re.DOTALL).group().strip()

            payload = {
                "username": "?? Midnight",
                "icon_emoji": ":_e9_a3_9f:",
                "thread_ts": datadict.get("thread_ts")or'',
                "channel": datadict['channel']}

            try:
                ans = self.wordSearch(text)

                self.slack.api_call("chat.postMessage",
                                    attachments=[self.wordParse(ans)],
                                    **payload
                                    )
            except BaseException:
                self.slack.api_call("chat.postMessage",
                                    text="Sorry Not Found",
                                    **payload
                                    )

        elif datadict['text'].startswith("foodadd "):
            text = re.search(r"(?<=foodadd ).*",
                             datadict['text']).group().strip()
            jieba.add_word(text)
            self.rundata.append("FOOD_addword", text)
            self.init()
        elif datadict['text'].startswith("fooddel "):
            text = re.search(r"(?<=fooddel ).*",
                             datadict['text']).group().strip()
            jieba.del_word(text)
            self.rundata.append("FOOD_delword", text)
            self.init()