我们从Python开源项目中,提取了以下1个代码示例,用于说明如何使用nltk.ChunkParserI()。
def chunker(sent): #a = [("I","PRP"),("hear","VBP"),("Jerusalem","NNP"),("bells","NNS"),("ringing","VBG")] #input_sent = " Rockwell said the agreement calls for it to supply 200 addititonal so-called shipsets for the planes." input_sent = sent text = nltk.word_tokenize(input_sent) a = nltk.pos_tag(text) phrases = [] tup = () '''test_sents = conll2000.chunked_sents('test.txt', chunk_types=['VP']) train_sents = conll2000.chunked_sents('train.txt', chunk_types=['VP']) test_sents = conll2000.chunked_sents('test.txt', chunk_types=['NP'])''' NP_sents = conll2000.chunked_sents('train.txt', chunk_types=['NP']) VP_sents = conll2000.chunked_sents('train.txt', chunk_types=['VP']) class ChunkParser(nltk.ChunkParserI): def __init__(self, train_sents): train_data = [[(t,c) for w,t,c in nltk.chunk.tree2conlltags(sent)] for sent in train_sents] self.tagger = nltk.TrigramTagger(train_data) def parse(self, sentence): pos_tags = [pos for (word,pos) in sentence] tagged_pos_tags = self.tagger.tag(pos_tags) chunktags = [chunktag for (pos, chunktag) in tagged_pos_tags] conlltags = [(word, pos, chunktag) for ((word,pos),chunktag) in zip(sentence, chunktags)] return nltk.chunk.util.conlltags2tree(conlltags) NPChunker = ChunkParser(NP_sents) VPChunker = ChunkParser(VP_sents) #print (NPChunker.parse("I hear Jerusalem bells ringing")) parsed_sent = NPChunker.parse(a) for i in parsed_sent: if (type(i)!=type(tup)): l=[] for t in tuple(i): l.append(t[0]) phrases.append({"NP":" ".join(l)}) parsed_sent = VPChunker.parse(a) for i in parsed_sent: if (type(i)!=type(tup)): l=[] for t in tuple(i): l.append(t[0]) phrases.append({"VP":" ".join(l)}) return phrases