我们从Python开源项目中,提取了以下14个代码示例,用于说明如何使用jieba.add_word()。
def getData(Mentions,S,E,contextMention,contextEntity, id): for mention in Mentions: jieba.add_word(mention.name) S.append(mention.name) id.append('-') #???????????? contextMention[mention.name] = mention.context for item in Mentions: temp = [] cnt = 0 for candidate in item.candidates: if cnt > 100: break cnt += 1 temp.append(candidate.title) contextEntity[candidate.title] = candidate.context id.append(candidate.id) E.append(temp) # element{???mention/entity} # ??element
def test(): x = u"??????????????????Nintendo???2006-11-02???????????????????????????3???????????????????????????????????????" x = u'???????????????' sentences = split_sentences(x) # jieba.add_word(u'????????', 5, 'baike') # jieba.add_word(u'Nintendo', 5, 'baike') # jieba.add_word(u'????', 5, 'baike') # jieba.add_word(u'???', 5, 'baike') # jieba.add_word(u'????', 5, 'baike') # name2fb_path = os.path.join(cache_dir, 'DatasetFinder.name2fb.sample.cache') # fb_ttls_path = os.path.join(cache_dir, 'DatasetFinder.fb_ttls.sample.cache') name2fb_path = os.path.join(cache_dir, 'DatasetFinder.name2fb.cache') fb_ttls_path = os.path.join(cache_dir, 'DatasetFinder.fb_ttls.cache') finder = DatasetFinder.load_from_cache(name2fb_path, fb_ttls_path) for x in gen_dataset(sentences[0], finder): print x print '-' * 50 for x in gen_dataset(sentences[1], finder): print x
def add_word_dict(word, freq=None, tag=None): ''' ????????? ''' jieba.add_word(word, freq=None, tag=None)
def jiebaCustomSetting(self, dict_path, usr_dict_path): jieba.set_dictionary(dict_path) with open(usr_dict_path, 'r', encoding='utf-8') as dic: for word in dic: jieba.add_word(word.strip('\n'))
def TaibaCustomSetting(self, usr_dict): with open(usr_dict, 'r', encoding='utf-8') as dic: for word in dic: Taiba.add_word(word.strip('\n'))
def __init(): user_dict_path = os.path.join(root_filepath, "f_seg/user_dict.txt") jieba.load_userdict(user_dict_path) jieba.add_word(u"??", 10000) jieba.suggest_freq((u"?", u"??")) jieba.suggest_freq((u"??", u"??")) jieba.suggest_freq((u"??", u"??")) jieba.suggest_freq((u"??", u"?"))
def __init(): user_dict_path = os.path.join(root_filepath, "f_seg/user_dict.txt") jieba.load_userdict(user_dict_path) jieba.add_word("??", 10000) jieba.suggest_freq(("?", "??")) jieba.suggest_freq(("??", "??")) jieba.suggest_freq(("??", "??")) jieba.suggest_freq(("??", "?"))
def cut(text,custom_words=['FLOAT','TIME','DATE','EOS']): jieba.enable_parallel(32) for word in custom_words: jieba.add_word(word) words=jieba.lcut(text) return words
def __init__(self, slack, custom): self.slack = slack self.rundata = custom['data'] self.colorPrint = custom['colorPrint'] self.food_dir = "data/midnight.json" self.food_dic = "data/dict.txt.big" # find midnight channel self.nochannel = False rep = self.slack.api_call("channels.list") self.channel_id = "" for c in rep['channels']: if c['name'].lower() == custom['food_channelname']: self.channel_id = c['id'] break if not self.channel_id: self.colorPrint( "No midnight channel", "Restart when midnight channel can use", color="FAIL") self.nochannel = True return jieba.set_dictionary(self.food_dic) jieba.initialize() # add and del words for word in self.rundata.get('FOOD_addword'): jieba.add_word(word) for word in self.rundata.get('FOOD_delword'): jieba.del_word(word) self.init()
def __init__(self): self.negative = [] self.adverb = [] self.questionMark = [] self.rootPath = "E:\workout\data\senitment_data" self.wordtypeDict, self.wordfreqDict = self.UserDefineLibrary() for word in self.wordfreqDict.keys(): jieba.add_word(str(word)) self.initialize() # @staticmethod
def main(self, datadict): if self.nochannel: return if datadict['type'] == 'message' and \ datadict.get('subtype') == "file_share" and \ datadict.get('channel') == self.channel_id: self.imageAdd(datadict['file']) if not datadict['type'] == 'message' or 'subtype' in datadict: return if datadict['text'].startswith("food "): text = re.search( r"(?<=food ).*", datadict['text'], re.DOTALL).group().strip() payload = { "username": "?? Midnight", "icon_emoji": ":_e9_a3_9f:", "thread_ts": datadict.get("thread_ts")or'', "channel": datadict['channel']} try: ans = self.wordSearch(text) self.slack.api_call("chat.postMessage", attachments=[self.wordParse(ans)], **payload ) except BaseException: self.slack.api_call("chat.postMessage", text="Sorry Not Found", **payload ) elif datadict['text'].startswith("foodadd "): text = re.search(r"(?<=foodadd ).*", datadict['text']).group().strip() jieba.add_word(text) self.rundata.append("FOOD_addword", text) self.init() elif datadict['text'].startswith("fooddel "): text = re.search(r"(?<=fooddel ).*", datadict['text']).group().strip() jieba.del_word(text) self.rundata.append("FOOD_delword", text) self.init()