我们从Python开源项目中,提取了以下4个代码示例,用于说明如何使用nltk.trigrams()。
def tokenize(text): # text = NB.remove_punctuation(text) try: text = text.decode('utf-8').encode('ascii', 'replace').strip().lower() except: text = text.encode('ascii', 'replace').strip().lower() word = [porter.stem(w) for w in re.findall(r"[\w'-]+|[^\s\w]", text)] # split punctuations but dont split single quotes for words like don't biword = [b for b in nltk.bigrams(word)] triword = [t for t in nltk.trigrams(word)] # word = [w for w in word if w not in stopwords.words('english')] return word # triword
def posTrigramsScore(trigrams,category,pos_tags_trigrams,labels): #keep pos tags bigrams of specific category trigrams_category = subList(pos_tags_trigrams,labels,category) #initialize dictionary d = {} #calculate score for every bigram for trigram in trigrams: d[trigram] = score(trigram,category,trigrams_category,pos_tags_trigrams) return d #calculate bigram's f1 score
def getBigrams(l): b = [] for x in l: b.append(list(bigrams(x))) return b #calculate trigrams of every item of the list l
def getTrigrams(l): tr = [] for x in l: tr.append(list(trigrams(x))) return tr #calculate pos tag score