我们从Python开源项目中,提取了以下16个代码示例,用于说明如何使用nltk.induce_pcfg()。
def induce_pcfg(start, productions): """ Induce a PCFG grammar from a list of productions. The probability of a production A -> B C in a PCFG is: | count(A -> B C) | P(B, C | A) = --------------- where \* is any right hand side | count(A -> \*) :param start: The start symbol :type start: Nonterminal :param productions: The list of productions that defines the grammar :type productions: list(Production) """ # Production count: the number of times a given production occurs pcount = {} # LHS-count: counts the number of times a given lhs occurs lcount = {} for prod in productions: lcount[prod.lhs()] = lcount.get(prod.lhs(), 0) + 1 pcount[prod] = pcount.get(prod, 0) + 1 prods = [ProbabilisticProduction(p.lhs(), p.rhs(), prob=pcount[p] / lcount[p.lhs()]) for p in pcount] return PCFG(start, prods) ################################################################# # Helper functions for reading productions #################################################################
def induce_pcfg(start, productions): """ Induce a PCFG grammar from a list of productions. The probability of a production A -> B C in a PCFG is: | count(A -> B C) | P(B, C | A) = --------------- where \* is any right hand side | count(A -> \*) :param start: The start symbol :type start: Nonterminal :param productions: The list of productions that defines the grammar :type productions: list(Production) """ # Production count: the number of times a given production occurs pcount = {} # LHS-count: counts the number of times a given lhs occurs lcount = {} for prod in productions: lcount[prod.lhs()] = lcount.get(prod.lhs(), 0) + 1 pcount[prod] = pcount.get(prod, 0) + 1 prods = [ProbabilisticProduction(p.lhs(), p.rhs(), prob=float(pcount[p]) / lcount[p.lhs()]) for p in pcount] return PCFG(start, prods) ################################################################# # Helper functions for reading productions #################################################################
def pcfg_demo(): """ A demonstration showing how a ``PCFG`` can be created and used. """ from nltk.corpus import treebank from nltk import treetransforms from nltk import induce_pcfg from nltk.parse import pchart pcfg_prods = toy_pcfg1.productions() pcfg_prod = pcfg_prods[2] print('A PCFG production:', repr(pcfg_prod)) print(' pcfg_prod.lhs() =>', repr(pcfg_prod.lhs())) print(' pcfg_prod.rhs() =>', repr(pcfg_prod.rhs())) print(' pcfg_prod.prob() =>', repr(pcfg_prod.prob())) print() grammar = toy_pcfg2 print('A PCFG grammar:', repr(grammar)) print(' grammar.start() =>', repr(grammar.start())) print(' grammar.productions() =>', end=' ') # Use .replace(...) is to line-wrap the output. print(repr(grammar.productions()).replace(',', ',\n'+' '*26)) print() # extract productions from three trees and induce the PCFG print("Induce PCFG grammar from treebank data:") productions = [] item = treebank._fileids[0] for tree in treebank.parsed_sents(item)[:3]: # perform optional tree transformations, e.g.: tree.collapse_unary(collapsePOS = False) tree.chomsky_normal_form(horzMarkov = 2) productions += tree.productions() S = Nonterminal('S') grammar = induce_pcfg(S, productions) print(grammar) print() print("Parse sentence using induced grammar:") parser = pchart.InsideChartParser(grammar) parser.trace(3) # doesn't work as tokens are different: #sent = treebank.tokenized('wsj_0001.mrg')[0] sent = treebank.parsed_sents(item)[0].leaves() print(sent) for parse in parser.parse(sent): print(parse)