我们从Python开源项目中,提取了以下4个代码示例,用于说明如何使用nltk.RegexpTagger()。
def train(self): train_data = nltk.corpus.brown.tagged_sents(categories='news') regexp_tagger = nltk.RegexpTagger([ (r'^-?[0-9]+(.[0-9]+)?$', 'CD'), (r'(-|:|;)$', ':'), (r'\'*$', 'MD'), (r'(The|the|A|a|An|an)$', 'AT'), (r'.*able$', 'JJ'), (r'^[A-Z].*$', 'NNP'), (r'.*ness$', 'NN'), (r'.*ly$', 'RB'), (r'.*s$', 'NNS'), (r'.*ing$', 'VBG'), (r'.*ed$', 'VBD'), (r'.*', 'NN'), ]) unigram_tagger = nltk.UnigramTagger(train_data, backoff=regexp_tagger) self.tagger = nltk.BigramTagger(train_data, backoff=unigram_tagger) self._trained = True return None