我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用nltk.Tree()。
def __init__(self, chunk_struct, debug_level=1): """ Construct a new ``ChunkString`` that encodes the chunking of the text ``tagged_tokens``. :type chunk_struct: Tree :param chunk_struct: The chunk structure to be further chunked. :type debug_level: int :param debug_level: The level of debugging which should be applied to transformations on the ``ChunkString``. The valid levels are: - 0: no checks - 1: full check on to_chunkstruct - 2: full check on to_chunkstruct and cursory check after each transformation. - 3: full check on to_chunkstruct and full check after each transformation. We recommend you use at least level 1. You should probably use level 3 if you use any non-standard subclasses of ``RegexpChunkRule``. """ self._root_label = chunk_struct.label() self._pieces = chunk_struct[:] tags = [self._tag(tok) for tok in self._pieces] self._str = '<' + '><'.join(tags) + '>' self._debug = debug_level
def parse(self, chunk_struct, trace=None): """ Apply the chunk parser to this input. :type chunk_struct: Tree :param chunk_struct: the chunk structure to be (further) chunked (this tree is modified, and is also returned) :type trace: int :param trace: The level of tracing that should be used when parsing a text. ``0`` will generate no tracing output; ``1`` will generate normal tracing output; and ``2`` or highter will generate verbose tracing output. This value overrides the trace level value that was given to the constructor. :return: the chunked output. :rtype: Tree """ if trace is None: trace = self._trace for i in range(self._loop): for parser in self._stages: chunk_struct = parser.parse(chunk_struct, trace=trace) return chunk_struct
def to_nltk_tree_general(node, attr_list=("dep_", "pos_"), level=99999): """Tranforms a Spacy dependency tree into an NLTK tree, with certain spacy tree node attributes serving as parts of the NLTK tree node label content for uniqueness. Args: node: The starting node from the tree in which the transformation will occur. attr_list: Which attributes from the Spacy nodes will be included in the NLTK node label. level: The maximum depth of the tree. Returns: A NLTK Tree (nltk.tree) """ # transforms attributes in a node representation value_list = [getattr(node, attr) for attr in attr_list] node_representation = "/".join(value_list) if level == 0: return node_representation if node.n_lefts + node.n_rights > 0: return Tree(node_representation, [to_nltk_tree_general(child, attr_list, level-1) for child in node.children]) else: return node_representation
def get_node_representation(tetre_format, token): """Given a format and a SpaCy node (spacy.token), returns this node representation using the NLTK tree (nltk.tree). It recursivelly builds a NLTK tree and returns it, not only the node itself. Args: tetre_format: The attributes of this node that will be part of its string representation. token: The SpaCy node itself (spacy.token). Returns: A NLTK Tree (nltk.tree) """ params = tetre_format.split(",") node_representation = token.pos_ if token.n_lefts + token.n_rights > 0: tree = Tree(node_representation, [to_nltk_tree_general(child, attr_list=params, level=0) for child in token.children]) else: tree = Tree(node_representation, []) return tree
def nltk_tree_to_qtree(tree): """Transforms a NLTK Tree in a QTREE. A QTREE is a string representation of a tree. For details, please see: http://www.ling.upenn.edu/advice/latex/qtree/qtreenotes.pdf Args: tree: The NLTK Tree (nltk.tree). Returns: A string with the QTREE representation of the NLTK Tree (nltk.tree). """ self_result = " [ " if isinstance(tree, Tree): self_result += " " + tree.label() + " " if len(tree) > 0: self_result += " ".join([nltk_tree_to_qtree(node) for node in sorted(tree)]) else: self_result += " " + str(tree) + " " self_result += " ] " return self_result
def rightBinarize(tr): children = [] for child in tr: children.append(child) tmpNode = children[-1] i = len(children) - 2 while i > 0: tmpNode2 = nltk.Tree("(X)") tmpNode2.append(children[i]) tmpNode2.append(tmpNode) tmpNode = tmpNode2 i -= 1 while len(tr) > 1: tr.pop() tr.append(tmpNode)
def leftBinarize(tr): children = [] for child in tr: children.append(child) tmpNode = children[0] i = 1 while i < len(children) - 1: tmpNode2 = nltk.Tree("(X)") tmpNode2.append(tmpNode) tmpNode2.append(children[i]) tmpNode = tmpNode2 i += 1 while len(tr) > 1: tr.pop(0) tr.insert(0, tmpNode)
def vvBinarize(tr): children = [] vvIndex = None for i, child in enumerate(tr): children.append(child) if child.node in vvTags: vvIndex = i if vvIndex == None: print >> sys.stderr, "no vv in the children!!!", output(tr) return tmpNode = nltk.Tree("(X)") for i in xrange(vvIndex, len(tr)): tmpNode.append(children[i]) leftBinarize(tmpNode) while len(tr) > vvIndex: tr.pop() tr.append(tmpNode) rightBinarize(tr)
def match_rules_context(tree, rules, parent_context={}): """Recursively matches a Tree structure with rules and returns context Args: tree (Tree): Parsed tree structure rules (dict): See match_rules parent_context (dict): Context of parent call Returns: dict: Context matched dictionary of matched rules or None if no match """ for template, match_rules in rules.items(): context = parent_context.copy() if match_template(tree, template, context): for key, child_rules in match_rules.items(): child_context = match_rules_context(context[key], child_rules, context) if child_context: for k, v in child_context.items(): context[k] = v else: return None return context return None
def match_rules_context_multi(tree, rules, parent_context={}): """Recursively matches a Tree structure with rules and returns context Args: tree (Tree): Parsed tree structure rules (dict): See match_rules parent_context (dict): Context of parent call Returns: dict: Context matched dictionary of matched rules or None if no match """ all_contexts = [] for template, match_rules in rules.items(): context = parent_context.copy() if match_template(tree, template, context): child_contextss = [] if not match_rules: all_contexts += [context] else: for key, child_rules in match_rules.items(): child_contextss.append(match_rules_context_multi(context[key], child_rules, context)) all_contexts += cross_context(child_contextss) return all_contexts
def match_template(tree, template, args=None): """Check if match string matches Tree structure Args: tree (Tree): Parsed Tree structure of a sentence template (str): String template to match. Example: "( S ( NP ) )" Returns: bool: If they match or not """ tokens = get_tokens(template.split()) cur_args = {} if match_tokens(tree, tokens, cur_args): if args is not None: for k, v in cur_args.items(): args[k] = v logger.debug('MATCHED: {0}'.format(template)) return True else: return False
def get_object(tree): """Get the object in the tree object. Method should remove unnecessary letters and words:: the a/an 's Args: tree (Tree): Parsed tree structure Returns: Resulting string of tree ``(Ex: "red car")`` """ if isinstance(tree, Tree): if tree.label() == 'DT' or tree.label() == 'POS': return '' words = [] for child in tree: words.append(get_object(child)) return ' '.join([_f for _f in words if _f]) else: return tree
def get_continuous_chunks(self, text): chunked = nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(text))) prev = None continuous_chunk = [] current_chunk = [] for i in chunked: if type(i) == nltk.Tree: current_chunk.append(" ".join([token for token, pos in i.leaves()])) elif current_chunk: named_entity = " ".join(current_chunk) if named_entity not in continuous_chunk: continuous_chunk.append(named_entity) current_chunk = [] else: continue return continuous_chunk
def _to_nltk_format(self): from nltk import Tree return Tree(self.parent_relation, [Tree(self.pos, [self.word] + [c._to_nltk_format() for c in self.children] )]) # from nltk import Tree # label = "({0}) {1} ({2})".format(self.parent_relation,self.word,self.pos) # if not self.children: # return label # return Tree(label,[c._to_nltk_format() for c in self.children]) # Feature functions, should conform to naming _(PREDICATE/ARGUMENT)_FEATURE_(feature_name) # and return a tuple of (value,span) #return the head of the
def find_tree_matches(tree,pat): """ Get all subtrees matching pattern @type tree: DepTree @param tree: tree in which to search for matches @type pat: nltk.Tree @param pat: a pattern to match against tree @rtype: list [unification of pat] @return: all possible unification of pat in tree """ ret = [] curMatch = tree.match(pat) if curMatch: ret.append(curMatch) for c in tree.children: ret.extend(find_tree_matches(c,pat)) return ret
def bft(tree): """ Perform a breadth-first traversal of a tree. Return the nodes in a list in level-order. Args: tree: a tree node Returns: lst: a list of tree nodes in left-to-right level-order """ lst = [] queue = Queue.Queue() queue.put(tree) while not queue.empty(): node = queue.get() lst.append(node) for child in node: if isinstance(child, nltk.Tree): queue.put(child) return lst
def traverse_tree(tree, pro): """ Traverse a tree in a left-to-right, breadth-first manner, proposing any NP encountered as an antecedent. Returns the tree and the position of the first possible antecedent. Args: tree: the tree being searched pro: the pronoun being resolved (string) """ # Initialize a queue and enqueue the root of the tree queue = Queue.Queue() queue.put(tree) while not queue.empty(): node = queue.get() # if the node is an NP, return it as a potential antecedent if "NP" in node.label() and match(tree, get_pos(tree,node), pro): return tree, get_pos(tree, node) for child in node: if isinstance(child, nltk.Tree): queue.put(child) # if no antecedent is found, return None return None, None
def calc(param): p = ["He", "he", "Him", "him", "She", "she", "Her", "her", "It", "it", "They", "they"] r = ["Himself", "himself", "Herself", "herself", "Itself", "itself", "Themselves", "themselves"] fname = param[1] pro = param[2] with open(fname) as f: sents = f.readlines() trees = [Tree.fromstring(s) for s in sents] pos = get_pos(trees[-1], pro) pos = pos[:-1] if pro in p: tree, pos = hobbs(trees, pos) #for t in trees: # print t, '\n' #print "Proposed antecedent for '"+pro+"':", tree[pos] return tree, tree[pos] elif pro in r: tree, pos = resolve_reflexive(trees, pos) #for t in trees: # print t, '\n' #print "Proposed antecedent for '"+pro+"':", tree[pos] return tree, tree[pos]
def create_xsvversion_of_tree(t): founded, sub_tree = find_deepleftfirst_verb(t) if sub_tree != None: t._label = "S1" temp = Tree("S", [sub_tree]+[t]) return temp return t
def _tag(self, tok): if isinstance(tok, tuple): return tok[1] elif isinstance(tok, Tree): return tok.label() else: raise ValueError('chunk structures must contain tagged ' 'tokens or trees')
def to_chunkstruct(self, chunk_label='CHUNK'): """ Return the chunk structure encoded by this ``ChunkString``. :rtype: Tree :raise ValueError: If a transformation has generated an invalid chunkstring. """ if self._debug > 0: self._verify(self._str, 1) # Use this alternating list to create the chunkstruct. pieces = [] index = 0 piece_in_chunk = 0 for piece in re.split('[{}]', self._str): # Find the list of tokens contained in this piece. length = piece.count('<') subsequence = self._pieces[index:index+length] # Add this list of tokens to our pieces. if piece_in_chunk: pieces.append(Tree(chunk_label, subsequence)) else: pieces += subsequence # Update index, piece_in_chunk index += length piece_in_chunk = not piece_in_chunk return Tree(self._root_label, pieces)
def parse(self, chunk_struct, trace=None): """ :type chunk_struct: Tree :param chunk_struct: the chunk structure to be (further) chunked :type trace: int :param trace: The level of tracing that should be used when parsing a text. ``0`` will generate no tracing output; ``1`` will generate normal tracing output; and ``2`` or highter will generate verbose tracing output. This value overrides the trace level value that was given to the constructor. :rtype: Tree :return: a chunk structure that encodes the chunks in a given tagged sentence. A chunk is a non-overlapping linguistic group, such as a noun phrase. The set of chunks identified in the chunk structure depends on the rules used to define this ``RegexpChunkParser``. """ if len(chunk_struct) == 0: print('Warning: parsing empty text') return Tree(self._root_label, []) try: chunk_struct.label() except AttributeError: chunk_struct = Tree(self._root_label, chunk_struct) # Use the default trace value? if trace is None: trace = self._trace chunkstr = ChunkString(chunk_struct) # Apply the sequence of rules to the chunkstring. if trace: verbose = (trace>1) self._trace_apply(chunkstr, verbose) else: self._notrace_apply(chunkstr) # Use the chunkstring to create a chunk structure. return chunkstr.to_chunkstruct(self._chunk_label)
def to_nltk_tree(node): """Creates a fixed representation of a Spacy dependency tree as a NLTK tree. This fixed representation will be formed by the Spacy's node attributes: dep_, orth_ and pos_. Args: node: The starting node from the tree in which the transformation will occur. Returns: A NLTK Tree (nltk.tree) """ if node.n_lefts + node.n_rights > 0: return Tree(node.dep_+"/"+node.orth_+"/"+node.pos_, [to_nltk_tree(child) for child in node.children]) else: return node.dep_+"/"+node.orth_+"/"+node.pos_
def isLegalTree(line, i): try: t = nltk.Tree(line) pt = nltk.ParentedTree(line) except ValueError: print >> sys.stderr, "illegal tree!!! #" + str(i) print >> sys.stderr, line exit(1)
def binarize(line, lan = "en"): assert lan in ['en', 'ch'], "illegal language (en or ch): %s" % lan root = nltk.Tree(line) stack = [root] while stack: curNode = stack.pop() if len(curNode) > 2: if curNode.node == 'NP': rightBinarize(curNode) elif curNode.node == 'VP': if lan == 'en': vvBinarize(curNode) elif lan == 'ch': if curNode[0].node in vvTags: leftBinarize(curNode) elif curNode[-1].node in vvTags: rightBinarize(curNode) else: vvBinarize(curNode) for child in curNode: #print >> sys.stderr, child if child.height() > 2: stack.append(child) continue return ' '.join(root.pprint().split()) + '\n'