我们从Python开源项目中,提取了以下21个代码示例,用于说明如何使用tree.Tree()。
def init_estimator(self): indices = [i for i in np.random.choice(X.shape[0], self.n_samples, p=self.weights)] X_tree = np.array([X[i, :] for i in indices]) y_tree = np.array([y[i] for i in indices]) print "%s / %s" % (self.count, self.n_estimators) while True: t1 = time.time() tree = Tree(X_tree, y_tree) t2 = time.time() print "tree generation time: %s" % (t2 - t1) predictions = tree.predict(self.X) accuracy = accuracy_score(self.y, predictions) print "accuracy: %s" % accuracy if accuracy != 0.50: self.estimators.append(tree) break return tree, predictions
def _get_dependencies(self, tree, sent_len): def rec(subtree): if isinstance(subtree, Tree): children = subtree.children if len(children) == 2: head = rec(children[0 if subtree.left_is_head else 1]) dep = rec(children[1 if subtree.left_is_head else 0]) res[dep] = head else: head = rec(children[0]) return head else: return subtree.pos res = [-1 for _ in range(sent_len)] rec(tree) res = [i + 1 for i in res] assert len(filter(lambda i:i == 0, res)) == 1 return res
def loadDataset(filename): """ Load and return the dataset given in parameter """ dataFile = open(filename, 'r') lines = dataFile.readlines() dataFile.close() dataset = [] # Extract rows for line in lines: dataset.append(tree.Tree(line)) # Create the tree for each sentence return dataset
def read_tree(self, line): parents = list(map(int, line.split())) trees = dict() root = None for i in range(1, len(parents) + 1): if i - 1 not in trees.keys() and parents[i - 1] != -1: idx = i prev = None while True: parent = parents[idx - 1] if parent == -1: break tree = Tree() if prev is not None: tree.add_child(prev) trees[idx - 1] = tree tree.idx = idx - 1 if parent - 1 in trees.keys(): trees[parent - 1].add_child(tree) break elif parent == 0: root = tree break else: prev = tree idx = parent return root
def load(txt_fn): hrchy = tt.Tree() with open(txt_fn, 'rb') as fp: while True: node = readNode(fp) if not node: break if node['parent_id'] == 'root': hrchy.create_node({'desc': node['desc'], 'classes': node['classes']}, node['id']) else: hrchy.create_node({'desc': node['desc'], 'classes': node['classes']}, node['id'], node['parent_id']) return hrchy
def read_tree(self, line): parents = list(map(int, line.split())) trees = dict() root = None for i in range(1, len(parents) + 1): # if not trees[i-1] and parents[i-1]!=-1: if i - 1 not in trees.keys() and parents[i - 1] != -1: idx = i prev = None while True: parent = parents[idx - 1] if parent == -1: break tree = Tree() if prev is not None: tree.add_child(prev) trees[idx - 1] = tree tree.idx = idx - 1 # if trees[parent-1] is not None: if parent - 1 in trees.keys(): trees[parent - 1].add_child(tree) break elif parent == 0: root = tree break else: prev = tree idx = parent return root
def load_data(data_dir, order='top_down'): '''construct vocab and load data with a specified traversal order''' general_predicate_dir = os.path.join(data_dir, "general_predicate") general_predicate = [] with open(general_predicate_dir, 'r') as f: general_predicate = f.read().split('\n') word_vocab = Vocab() nt_vocab = Vocab() ter_vocab = Vocab() act_vocab = Vocab() word_tokens = collections.defaultdict(list) tree_tokens = collections.defaultdict(list) tran_actions = collections.defaultdict(list) for fname in ('train', 'valid', 'test'): print('reading', fname) pname = os.path.join(data_dir, fname) with codecs.open(pname, 'r', 'utf-8') as f: for line in f: sen, sexp = line.rstrip().split('\t') sen = sen.split(' ') word_vocab.feed_all(sen) word_tokens[fname].append(sen) parse_tree = Tree() parse_tree.construct_from_sexp(sexp) nt, ter = parse_tree.get_nt_ter() nt_vocab.feed_all(nt) ter_vocab.feed_all(ter) tree_token, action = parse_tree.get_oracle(order, general_predicate) act_vocab.feed_all(action) tree_tokens[fname].append(tree_token) tran_actions[fname].append(action) return word_vocab, nt_vocab, ter_vocab, act_vocab, word_tokens, tree_tokens, tran_actions
def load_data(data_dir, order='pre_order'): '''construct vocab and load data with a specified traversal order''' word_vocab = Vocab() nt_vocab = Vocab() ter_vocab = Vocab() act_vocab = Vocab() act_vocab.feed_all(['NT', 'TER', 'ACT']) word_tokens = collections.defaultdict(list) tree_tokens = collections.defaultdict(list) tran_actions = collections.defaultdict(list) for fname in ('train', 'valid', 'test'): print('reading', fname) pname = os.path.join(data_dir, fname) with codecs.open(pname, 'r', 'utf-8') as f: for line in f: sen, sexp = line.rstrip().split('\t') sen = sen.split(' ') word_vocab.feed_all(sen) word_tokens[fname].append(sen) parse_tree = Tree() parse_tree.construct_from_sexp(sexp) nt, ter = parse_tree.get_nt_ter() nt_vocab.feed_all(nt) ter_vocab.feed_all(ter) traverse_method = getattr(parse_tree, order) tree_token, action = traverse_method(_ROOT) tree_tokens[fname].append(tree_token) tran_actions[fname].append(action) return word_vocab, nt_vocab, ter_vocab, act_vocab, word_tokens, tree_tokens, tran_actions
def __init__(self, sent_id, parse_tree, dep_tree, words): self.leaves = [] self.id = sent_id self.tree = Tree(parse_tree, sent_id) self.get_leaves() self.words = words self.begin_offset = words[0][1]['CharacterOffsetBegin'] self.end_offset = words[-1][1]['CharacterOffsetEnd'] self.word_ids = [] self.true_connectives = [] self.checked_connectives = [] self.stem_leaf() self.depTree = DepTree(self, dep_tree) self.clauses = [] self.break_clauses()
def __init__(self, sent_id, parse_tree, dep_tree, words): self.leaves = [] self.id = sent_id self.tree = Tree(parse_tree, sent_id) self.get_leaves() self.words = words self.begin_offset = words[0][1]['CharacterOffsetBegin'] self.end_offset = words[-1][1]['CharacterOffsetEnd'] self.word_ids = [] self.true_connectives = [] self.checked_connectives = [] self.depTree = DepTree(self, dep_tree) self.clauses = [] self.break_clauses()
def build_forest(self): forest = {} for t in range(self.f_size): forest[t] = Tree(self, rho=self.rho) forest[t].tree_leaf_plots(fname='tree_opt%s.png'%t) path = os.getcwd() + '/plots/' mkdir_p(path) fig = plt.figure(figsize=(10,10)) ax = fig.add_subplot(111) color = ['lightcoral', 'dodgerblue', 'mediumseagreen', 'darkorange'] for t in range(self.f_size): for c, n in enumerate(forest[t].leaf_nodes): [[i1, i2], [j1, j2]] = n.quad x1, x2 = self.grid[0][i1], self.grid[0][i2] y1, y2 = self.grid[1][j1], self.grid[1][j2] ax.fill_between([x1,x2], y1, y2, alpha=.15, color=color[c]) pd.DataFrame(self.data, columns=['x', 'y']).plot(ax=ax, x='x', y='y', kind='scatter', lw=0, alpha=.6, s=20, c='k') plt.savefig(path + 'combined.png', format='png') plt.close() return forest # Implement Online L-curve optimization like EWMA to get rid of input depth
def tune_entropy_threshold(self, n=5, depth=6, plot_debug=False): """ Compute mean optimal entropy based on L-curve elbow method. """ e_arr = [] for i in range(n): var = Tree(self, rho=.5, depth=depth) e_arr += [pair + [i] for pair in var.entropy_gain_evol] var.domain_splits_plots(subpath='%s/'%i) entropy_evol = pd.DataFrame(e_arr, columns=['depth', 'entropy', 'tree']) entropy_evol = entropy_evol.groupby(['tree', 'depth'])[['entropy']].mean().reset_index().pivot(columns='tree', index='depth', values='entropy').fillna(0) entropy_elbow_cand = entropy_evol.apply(lambda x: opt_L_curve(np.array(x.index), np.array(x))) avg_opt_entropy = entropy_elbow_cand.mean() if plot_debug: fig = plt.figure(figsize=(10,10)) ax = fig.add_subplot(111) entropy_evol.plot(ax=ax, kind='line', alpha=.6, lw=3., title='Avg. Opt. Entropy = %.2f'%avg_opt_entropy) plt.savefig('evol.png', format='png') plt.close() return avg_opt_entropy
def validate(self, tree, key, value): if isinstance(value, Tree): p = '.'.join(tree._path + [key]) raise ValidationError(p, 'This key must be a value, not a tree.') from None
def read_tree(self, line): parents = map(int,line.split()) trees = dict() root = None for i in xrange(1,len(parents)+1): #if not trees[i-1] and parents[i-1]!=-1: if i-1 not in trees.keys() and parents[i-1]!=-1: idx = i prev = None while True: parent = parents[idx-1] if parent == -1: break tree = Tree() if prev is not None: tree.add_child(prev) trees[idx-1] = tree tree.idx = idx-1 #if trees[parent-1] is not None: if parent-1 in trees.keys(): trees[parent-1].add_child(tree) break elif parent==0: root = tree break else: prev = tree idx = parent return root
def read_tree(self, line, label_line): # FIXED: tree.idx, also tree dict() use base 1 as it was in dataset # parents is list base 0, keep idx-1 # labels is list base 0, keep idx-1 parents = map(int,line.split()) # split each number and turn to int trees = dict() # this is dict root = None labels = map(self.parse_dlabel_token, label_line.split()) for i in xrange(1,len(parents)+1): #if not trees[i-1] and parents[i-1]!=-1: if i not in trees.keys() and parents[i-1]!=-1: idx = i prev = None while True: parent = parents[idx-1] if parent == -1: break tree = Tree() if prev is not None: tree.add_child(prev) trees[idx] = tree tree.idx = idx # -1 remove -1 here to prevent embs[tree.idx -1] = -1 while tree.idx = 0 tree.gold_label = labels[idx-1] # add node label #if trees[parent-1] is not None: if parent in trees.keys(): trees[parent].add_child(tree) break elif parent==0: root = tree break else: prev = tree idx = parent return root
def testCheckGradient(): """ Gradient checking by comparing to an approximation value """ # Create an arbitrary sample sample = tree.Tree("(4 (2 (2 But) (2 (3 (3 (2 believe) (2 it)) (2 or)) (1 not))) (4 (2 ,) (4 (2 it) (4 (4 (2 's) (4 (2 one) (4 (2 of) (4 (4 (2 the) (4 (4 (2 most) (4 (4 beautiful) (3 (2 ,) (3 evocative)))) (2 works))) (2 (2 I) (2 (2 've) (2 seen))))))) (2 .)))))") #sample.printTree() # Check parsing and sample loading # Initialize the model model = rntnmodel.Model( randInitMaxValueNN = 2.0, # Try bigger values for the initial values #regularisationTerm = 0 # Check without regularisation regularisationTerm = 0.02 # Check gradient with regularisation ) # Compute the gradient using the direct formula model.evaluateSample(sample) analyticGradient = model.backpropagate(sample) analyticGradient = model.addRegularisation(analyticGradient, 1) # Don't forget to add the regularisation # Compute the gradient using the numerical approximation numericalGradient = computeNumericalGradient(sample, model) # Show results (detailled values) print("Computed dV[3]=\n", numericalGradient.dV[3]) print("Numerical dV[3]=\n", analyticGradient.dV[3]) # We plot a random layer instead of the whole tensor print("Computed dW=\n", numericalGradient.dW) print("Numerical dW=\n", analyticGradient.dW) print("Computed db=\n", numericalGradient.db) print("Numerical db=\n", analyticGradient.db) print("Computed dWs=\n", numericalGradient.dWs) print("Numerical dWs=\n", analyticGradient.dWs) print("Computed dbs=\n", numericalGradient.dbs) print("Numerical dbs=\n", analyticGradient.dbs) # Show results (distance) distV = np.linalg.norm(analyticGradient.dV - numericalGradient.dV) / np.linalg.norm(analyticGradient.dV + numericalGradient.dV) distW = np.linalg.norm(analyticGradient.dW - numericalGradient.dW) / np.linalg.norm(analyticGradient.dW + numericalGradient.dW) distb = np.linalg.norm(analyticGradient.db - numericalGradient.db) / np.linalg.norm(analyticGradient.db + numericalGradient.db) distWs = np.linalg.norm(analyticGradient.dWs - numericalGradient.dWs) / np.linalg.norm(analyticGradient.dWs + numericalGradient.dWs) distbs = np.linalg.norm(analyticGradient.dbs - numericalGradient.dbs) / np.linalg.norm(analyticGradient.dbs + numericalGradient.dbs) print("Distances: dV=", distV) print("Distances: dW=", distW) print("Distances: db=", distb) print("Distances: dWs=", distWs) print("Distances: dbs=", distbs)
def load_data(data_dir, order='top_down'): '''construct vocab and load data with a specified traversal order''' general_predicate_dir = os.path.join(data_dir, "general_nts") action_dir = os.path.join(data_dir, "actions") general_predicate = [] word_vocab = Vocab() nt_vocab = Vocab() ter_vocab = Vocab() act_vocab = Vocab() with open(general_predicate_dir, 'r') as f: general_predicate = f.read().split('\n') nt_vocab.feed_all(general_predicate) with open(action_dir, 'r') as f: actions = f.read().split('\n') act_vocab.feed_all(actions) word_tokens = collections.defaultdict(list) tree_tokens = collections.defaultdict(list) tran_actions = collections.defaultdict(list) for fname in ('train', 'valid', 'test'): print('reading', fname) pname = os.path.join(data_dir, fname) with codecs.open(pname, 'r', 'utf-8') as f: for line in f: sen, sexp = line.rstrip().split('\t') sen = sen.split(' ') word_vocab.feed_all(sen) word_tokens[fname].append(sen) parse_tree = Tree() parse_tree.construct_from_sexp(sexp) nt, ter = parse_tree.get_nt_ter() nt_vocab.feed_all(nt) ter_vocab.feed_all(ter) tree_token, action = parse_tree.get_oracle(order, general_predicate) #print (tree_token, action) tree_tokens[fname].append(tree_token) tran_actions[fname].append(action) return word_vocab, nt_vocab, ter_vocab, act_vocab, word_tokens, tree_tokens, tran_actions
def convert_graph(data_dir): _allowed_error = 0.000001 rname = os.path.join(data_dir, 'train_lf_spade') rf = open(rname, 'w') for fname in ['spades.bow.graphs.train.json']: print('reading', fname) pname = os.path.join(data_dir, fname) with codecs.open(pname, 'r', 'utf-8') as f: for line in f: try: line = json.loads(line) except: continue sen = line['words'] sen = [x['word'] for x in sen] forest, answer = line['graphs'], line['answerString'] if not line.has_key('entities'): continue entity_list = line['entities'] good_lf = [] bad_lf = [] if len(forest) == 0: continue find_lf = 0 for graph in forest: lf = graph2lf(graph['graph'], entity_list) if lf is None: continue parse_tree = Tree() parse_tree.construct_from_sexp(lf) find_lf = 1 nt, ter = parse_tree.get_nt_ter() if set(graph['denotation']) & set(answer): good_lf.append((lf, graph['denotation'])) else: bad_lf.append((lf, graph['denotation'])) if not find_lf: continue json.dump(sen, rf) rf.write('\t') json.dump(answer, rf) rf.write('\t') json.dump(good_lf, rf) rf.write('\t') json.dump(bad_lf, rf) rf.write('\n')
def convert_graph(data_dir): _allowed_error = 0.000001 rname = os.path.join(data_dir, 'train_lf') rf = open(rname, 'w') for fname in ('train.graph', 'valid.graph'): print('reading', fname) pname = os.path.join(data_dir, fname) with codecs.open(pname, 'r', 'utf-8') as f: for line in f: line = json.loads(line) sen = line['sentence'] sen = sen.split(' ') forest, answer = line['forest'], line['answerF1'] good_lf = [] bad_lf = [] for choice in forest: entity_list = choice['entities'] for graph in choice['graphs']: lf = graph2lf(graph['graph'], entity_list) parse_tree = Tree() parse_tree.construct_from_sexp(lf) nt, ter = parse_tree.get_nt_ter() if set(graph['denotation']) & set(answer): good_lf.append((lf, graph['denotation'])) else: bad_lf.append((lf, graph['denotation'])) json.dump(sen, rf) rf.write('\t') json.dump(answer, rf) rf.write('\t') json.dump(good_lf, rf) rf.write('\t') json.dump(bad_lf, rf) rf.write('\n')
def load_data(data_dir, order='top_down'): '''construct vocab and load data with a specified traversal order''' general_predicate_dir = os.path.join(data_dir, "general_nts") action_dir = os.path.join(data_dir, "actions") general_predicate = [] word_vocab = Vocab() nt_vocab = Vocab() ter_vocab = Vocab() act_vocab = Vocab() with codecs.open(general_predicate_dir, 'r', 'utf-8') as f: general_predicate = f.read().split('\n') nt_vocab.feed_all(general_predicate) with codecs.open(action_dir, 'r', 'utf-8') as f: actions = f.read().split('\n') act_vocab.feed_all(actions) word_tokens = collections.defaultdict(list) tree_tokens = collections.defaultdict(list) tran_actions = collections.defaultdict(list) for fname in ('train', 'valid', 'test'): print('reading', fname) pname = os.path.join(data_dir, fname) with codecs.open(pname, 'r', 'utf-8') as f: for line in f: sen, sexp = line.rstrip().split('\t') sen = sen.split(' ') word_vocab.feed_all(sen) word_tokens[fname].append(sen) parse_tree = Tree() parse_tree.construct_from_sexp(sexp) nt, ter = parse_tree.get_nt_ter() nt_vocab.feed_all(nt) ter_vocab.feed_all(ter) tree_token, action = parse_tree.get_oracle(order, general_predicate) act_vocab.feed_all(action) #print (tree_token, action) tree_tokens[fname].append(tree_token) tran_actions[fname].append(action) return word_vocab, nt_vocab, ter_vocab, act_vocab, word_tokens, tree_tokens, tran_actions
def __init__(self, world, x, y, w, h, tile): pygame.sprite.Sprite.__init__(self) self.world = world self.x = x self.y = y self.w = w self.h = h self.tile = tile self.max_food = 10 self.fertility_mult = 0.5 self.colour = (255, 0, 0) if self.tile.terrain == 'meadow': self.fertility_mult = 0.0025 self.colour = (80, 180, 80) self.max_food = 2 elif self.tile.terrain == 'lake': self.fertility_mult = 0 self.colour = (0, 0, 215) self.max_food = 0 elif self.tile.terrain == 'forest': self.fertility_mult = 0.005 self.colour = (0, 120, 0) self.max_food = 10 else: print('unknown terrain type: %r' % self.tile) self.image = pygame.Surface((self.w, self.h)).convert() self.image.fill((0,0,255)) self.redraw = True self.rect = self.image.get_rect() self.rect.x = self.x * self.w self.rect.y = self.y * self.h self.alltrees = group.Group() self.allfood = group.Group() self.allcharacters = group.Group() if tile.terrain == 'forest': t = tree.Tree( self, random.randint(4, 18), # radius random.randint(0, w), # x random.randint(0, h)) # y self.alltrees.add(t) world.alltrees.add(t)