我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用textblob.Word()。
def _suggest_synonyms(self, target_words, words): suggestions = [] word_synonyms = [(Word(w[0]).get_synsets(pos=VERB), w[1]) for w in target_words] for w in words: found = False synset = (Word(w[0]).get_synsets(pos=VERB), w[1]) if len(synset[0]): for synonym in [s for s in word_synonyms if len(s[0])]: similarity = synset[0][0].path_similarity(synonym[0][0]) if similarity == 1.0: found = True if 1.0 > similarity > 0.4 and not found: suggestions.append((synset[0][0].name().split(".")[0], synonym[0][0].name().split(".")[0])) return suggestions
def resolution(surfaceName): ''' input: a surface name of entity output: the "normalized" entity name process: 1) lowercase 2) lemmatization ''' tmp = [Word(ele.lower()).lemmatize() for ele in surfaceName.split()] # tmp = [ele.lower() for ele in surfaceName.split()] return " ".join(tmp)
def normalize(self, word, tag="N"): """ Normalizes word using given tag. If no tag is given, NOUN is assumed. """ kind = NOUN if tag.startswith("V"): kind = VERB elif tag.startswith("RB"): kind = ADV elif tag.startswith("J"): kind = ADJ return Word(word).lemmatize(kind).lower()
def to_files_special_dict(dict_name): writer = open("out_files/" + str(dict_name) + ".txt", "w") count = 0 for t in words_by_pos: for w in words_by_pos[t]: print "\t\t", w try: writer.write(w) writer.write("\n") count += 1 except: nevermind = 1 writer.close() print "[+] saved to files. Saved", count, "words." # rawjson = open("secret_societies_corpora.json").read() #puts the file as a big string into the variable rawjson # data = json.loads(rawjson) #json.loads take a string and turns it into a data structure # for elem in data: # w = elem["name"] # pos = tag(w)[-1][1] # # print "-"*20 # # print w, pos # add_word(w,pos) # if pos.startswith("VB") and Word(w).lemmatize('v') is not w: # w = Word(w).lemmatize('v') # pos = tag("to " + w)[-1][1] # # print "-"*5 # # print w, pos # add_word(w,pos) # if pos.startswith("NN") and Word(w).lemmatize('n') is not w: # w = Word(w).lemmatize('n') # pos = tag(w)[-1][1] # # print "-"*5 # # print w, pos # add_word(w,pos) # if pos.startswith("JJ") and Word(w).lemmatize('a') is not w: # w = Word(w).lemmatize('a') # pos = tag("a " + w + " thing")[-2][1] # # print "-"*5 # # print w, pos # add_word(w,pos)