我们从Python开源项目中,提取了以下43个代码示例,用于说明如何使用nltk.tree()。
def tree2conlltags(t): """ Return a list of 3-tuples containing ``(word, tag, IOB-tag)``. Convert a tree to the CoNLL IOB tag format. :param t: The tree to be converted. :type t: Tree :rtype: list(tuple) """ tags = [] for child in t: try: category = child.label() prefix = "B-" for contents in child: if isinstance(contents, Tree): raise ValueError("Tree is too deeply nested to be printed in CoNLL format") tags.append((contents[0], contents[1], prefix+category)) prefix = "I-" except AttributeError: tags.append((child[0], child[1], "O")) return tags
def ieer_headlines(): from nltk.corpus import ieer from nltk.tree import Tree print("IEER: First 20 Headlines") print("=" * 45) trees = [(doc.docno, doc.headline) for file in ieer.fileids() for doc in ieer.parsed_docs(file)] for tree in trees[:20]: print() print("%s:\n%s" % tree) ############################################# ## Dutch CONLL2002: take_on_role(PER, ORG #############################################
def isNegationWord(token): import nltk if not isinstance(token, nltk.tree.ParentedTree): print "something went terribly wrong with", token return None if (token.label().startswith("V")) or (token.label().startswith("J")): word = token[0] if not isinstance(word, unicode): return False word = word.lower() word = _stem_(word) stemmed_negation_verbs = [_stem_(verb) for verb in NEGATION_VERBS] return word in stemmed_negation_verbs word = token[0] if not isinstance(word, unicode): return False word = word.lower() word = _stem_(word) return word in NEGATION_ADVERBS
def findSentencePTreeToken(sentence, keyword): import nltk from nltk.tree import ParentedTree stemmed = _lemma_(keyword) tmp = proc.parse_doc(sentence) i = 0 numSentences = len(tmp['sentences']) rs = [] for i in range(0, numSentences): p = tmp['sentences'][i]['parse'] ptree = ParentedTree.fromstring(p) # rs = [] for i in range(0, len(ptree.leaves())): tree_position = ptree.leaf_treeposition(i) node = ptree[tree_position] if _stem_(node)==stemmed: tree_position = tree_position[0:len(tree_position)-1] rs.append(ptree[tree_position]) # if len(rs)>0: # return rs return rs
def trees2label_sents(trees,only_root=False,pos_neg_label=False,remove_double_count_sentence=False): #print 'trees2label_sents',flag_word_lower,flag_stemmer,flag_remove_double_count_sentence,only_root,pos_neg_label #sys.exit() lss=[] for tree in trees: lss_tmp=tree2label_sent(tree) if pos_neg_label and lss_tmp[0][0] == 2 : continue if pos_neg_label : lss_tmp2 = [ [1 if ls[0] > 2 else 0 ,ls[1]] for ls in lss_tmp] else: lss_tmp2 =lss_tmp if len(lss_tmp2) > 0 and only_root: lss.append(lss_tmp2[0]) elif len(lss_tmp2) > 0: lss.extend(lss_tmp2) if remove_double_count_sentence : uss=label_sents2uni_sent(lss) lss_new =[[np.mean([lss[id][0] for id in uss[s]]),lss[uss[s][0]][1] ] for s in uss ] return lss_new else: return lss
def tree(self): """ Starting with the ``root`` node, build a dependency tree using the NLTK ``Tree`` constructor. Dependency labels are omitted. """ node = self.root word = node['word'] deps = chain.from_iterable(node['deps'].values()) return Tree(word, [self._tree(dep) for dep in deps])
def conlltags2tree(sentence, chunk_types=('NP','PP','VP'), root_label='S', strict=False): """ Convert the CoNLL IOB format to a tree. """ tree = Tree(root_label, []) for (word, postag, chunktag) in sentence: if chunktag is None: if strict: raise ValueError("Bad conll tag sequence") else: # Treat as O tree.append((word,postag)) elif chunktag.startswith('B-'): tree.append(Tree(chunktag[2:], [(word,postag)])) elif chunktag.startswith('I-'): if (len(tree)==0 or not isinstance(tree[-1], Tree) or tree[-1].label() != chunktag[2:]): if strict: raise ValueError("Bad conll tag sequence") else: # Treat as B-* tree.append(Tree(chunktag[2:], [(word,postag)])) else: tree[-1].append((word,postag)) elif chunktag == 'O': tree.append((word,postag)) else: raise ValueError("Bad conll tag %r" % chunktag) return tree
def _untag(self, tree): for i, child in enumerate(tree): if isinstance(child, Tree): self._untag(child) elif isinstance(child, tuple): tree[i] = child[0] else: raise ValueError('expected child to be Tree or tuple') return tree
def tree2semi_rel(tree): """ Group a chunk structure into a list of 'semi-relations' of the form (list(str), ``Tree``). In order to facilitate the construction of (``Tree``, string, ``Tree``) triples, this identifies pairs whose first member is a list (possibly empty) of terminal strings, and whose second member is a ``Tree`` of the form (NE_label, terminals). :param tree: a chunk tree :return: a list of pairs (list(str), ``Tree``) :rtype: list of tuple """ from nltk.tree import Tree semi_rels = [] semi_rel = [[], None] for dtr in tree: if not isinstance(dtr, Tree): semi_rel[0].append(dtr) else: # dtr is a Tree semi_rel[1] = dtr semi_rels.append(semi_rel) semi_rel = [[], None] return semi_rels
def getLeaves(ptree): import nltk rs = [] if isinstance(ptree, nltk.tree.ParentedTree): if len(ptree)>0: if isinstance(ptree[0], unicode): rs.append(ptree) for node in ptree: if isinstance(node, nltk.tree.ParentedTree): if len(node)>0: if isinstance(node[0], unicode): rs.append(node) else: rs.extend(getLeaves(node)) return rs
def conlltags2tree(sentence, chunk_types=('NP','PP','VP'), root_label='S', strict=False): """ Convert the CoNLL IOB format to a tree. """ tree = Tree(root_label, []) for (word, postag, chunktag) in sentence: if chunktag is None: if strict: raise ValueError("Bad conll tag sequence") else: # Treat as O tree.append((word,postag)) elif chunktag.startswith('B-'): tree.append(Tree(chunktag[2:], [(word,postag)])) elif chunktag.startswith('I-'): if (len(tree)==0 or not isinstance(tree[-1], Tree) or tree[-1].label() != chunktag[2:]): if strict: raise ValueError("Bad conll tag sequence") else: # Treat as B-* tree.append(Tree(chunktag[2:], [(word,postag)])) else: tree[-1].append((word,postag)) elif chunktag == 'O': tree.append((word,postag)) else: raise ValueError("Bad conll tag {0!r}".format(chunktag)) return tree
def check_if_atomic(sentence, parsed_sentence, tags): counter = 0 atomic_check = re.compile("|".join(not_atomic_list)) tree = Tree('s', parsed_sentence) for child in tree: string = str(child) if string.startswith("(S"): counter += 1 if atomic_check.search(sentence_lower): return False elif counter > 1: return False else: return True
def trainSVMTK(docs, pairs, dditype, model="svm_tk_classifier.model", excludesentences=[]): if os.path.isfile("ddi_models/" + model): os.remove("ddi_models/" + model) if os.path.isfile("ddi_models/" + model + ".txt"): os.remove("ddi_models/" + model + ".txt") #docs = use_external_data(docs, excludesentences, dditype) xerrors = 0 with open("ddi_models/" + model + ".txt", 'w') as train: #print pairs for p in pairs: if dditype != "all" and pairs[p][relations.PAIR_DDI] and pairs[p][relations.PAIR_TYPE] != dditype: continue sid = relations.getSentenceID(p) if sid not in excludesentences: tree = pairs[p][relations.PAIR_DEP_TREE][:] #print "tree1:", tree #if len(docs[sid][ddi.SENTENCE_ENTITIES]) > 20: #print line # line = "1 |BT| (ROOT (NP (NN candidatedrug) (, ,) (NN candidatedrug))) |ET|" # xerrors += 1 #else: line = get_svm_train_line(tree, pairs[p], sid, docs[sid][relations.SENTENCE_PAIRS][p]) if not pairs[p][relations.PAIR_DDI]: line = '-' + line elif pairs[p][relations.PAIR_TYPE] != dditype and dditype != "all": line = '-' + line train.write(line) #print "tree errors:", xerrors svmlightcall = Popen(["./svm-light-TK-1.2/svm-light-TK-1.2.1/svm_learn", "-t", "5", "-L", "0.4", "-T", "2", "-S", "2", "-g", "10", "-D", "0", "-C", "T", basedir + model + ".txt", basedir + model], stdout = PIPE, stderr = PIPE) res = svmlightcall.communicate() if not os.path.isfile("ddi_models/" + model): print "failed training model " + basedir + model print res sys.exit()