我们从Python开源项目中,提取了以下30个代码示例,用于说明如何使用sklearn.tree.export_graphviz()。
def analyseReasonWithDecisonTree(anamolySample,normalSample,name): data = anamolySample target = [] for i in range(0,len(anamolySample)): target.append(1) data.extend(normalSample) for i in range(0,len(normalSample)): target.append(0) clf = tree.DecisionTreeClassifier() clf = clf.fit(data,target) dot_data = tree.export_graphviz(clf, out_file=None,feature_names=name,filled = True,special_characters=True) graph = pydotplus.graph_from_dot_data(dot_data) s = str(time.time()) graph.write_pdf(s+"DT.pdf")
def analyseReasonWithDecisonTree(anamolySample,normalSample,name): data = anamolySample target = [] for i in range(0,len(anamolySample)): target.append(1) data = data.append(normalSample) for i in range(0,len(normalSample)): target.append(0) print len(data) clf = tree.DecisionTreeClassifier() clf = clf.fit(data, target) dot_data = tree.export_graphviz(clf, out_file=None,feature_names=name,filled = True,special_characters=True) graph = pydotplus.graph_from_dot_data(dot_data) s = str(time.time()) graph.write_pdf(s+"DT.pdf")
def analyseReasonWithDecisonTree(anamolySample,normalSample): data = anamolySample target = [] for i in range(0,len(anamolySample)): target.append(1) data = data.append(normalSample) for i in range(0,len(normalSample)): target.append(0) print len(data) clf = tree.DecisionTreeClassifier() clf = clf.fit(data, target) name = [] for i in data.columns: name.append(i) dot_data = tree.export_graphviz(clf, out_file=None,feature_names=name,filled = True,special_characters=True) graph = pydotplus.graph_from_dot_data(dot_data) s = str(time.time()) graph.write_pdf(s+"DT.pdf")
def train(self, training_set, training_target, fea_index): clf = tree.DecisionTreeClassifier(criterion="entropy", min_samples_split=30, class_weight="balanced") clf = clf.fit(training_set, training_target) class_names = np.unique([str(i) for i in training_target]) feature_names = [attr_list[i] for i in fea_index] dot_data = tree.export_graphviz(clf, out_file=None, feature_names=feature_names, class_names=class_names, filled=True, rounded=True, special_characters=True) graph = pydotplus.graph_from_dot_data(dot_data) graph.write_pdf("output/tree-vis.pdf") joblib.dump(clf, 'output/CART.pkl')
def visualize_tree(clf, feature_names, class_names, output_file, method='pdf'): dot_data = StringIO() tree.export_graphviz(clf, out_file=dot_data, feature_names=iris.feature_names, class_names=iris.target_names, filled=True, rounded=True, special_characters=True, impurity=False) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) if method == 'pdf': graph.write_pdf(output_file + ".pdf") elif method == 'inline': Image(graph.create_png()) return graph # An example using the iris dataset
def visualize_tree(tree, feature_name, dot_file): """Create tree png using graphviz. tree -- scikit-learn DecsisionTree. feature_names -- list of feature names. dot_file -- dot file name and path """ with open("tree.dot", 'w') as f: export_graphviz(tree, out_file=f, feature_names=feature_name) dt_png = dot_file.replace('dot', 'png') command = ["dot", "-Tpng", dot_file, "-o", dt_png] try: subprocess.check_call(command) except Exception as e: print e exit("Could not run dot, ie graphviz, to " "produce visualization")
def test_friedman_mse_in_graphviz(): clf = DecisionTreeRegressor(criterion="friedman_mse", random_state=0) clf.fit(X, y) dot_data = StringIO() export_graphviz(clf, out_file=dot_data) clf = GradientBoostingClassifier(n_estimators=2, random_state=0) clf.fit(X, y) for estimator in clf.estimators_: export_graphviz(estimator[0], out_file=dot_data) for finding in finditer("\[.*?samples.*?\]", dot_data.getvalue()): assert_in("friedman_mse", finding.group())
def scikitExportDecisionTree2Dot (fname): assert(DT_MODEL is not None) fdot = open(fname, "w") tree.export_graphviz(DT_MODEL, out_file=fdot) fdot.close()
def use_tree(X_data,y_data): tree = DecisionTreeClassifier(criterion='entropy',max_features='sqrt',max_depth=3,random_state=0) tree.fit(X_data,y_data) return tree # hua tu # X_comined = np.vstack((X_data,X_valid)) # y_comined = np.hstack((y_data,y_valid)) # plot_decision_regions(X_comined,y_comined,classifier=tree,test_idx=range(105,150)) # plt.show() # export_graphviz(tree,out_file='tree.dot',feature_names = ['petal length','petal width'])
def constructModel(corpus, classList, features, modelOutput): """ Trains a Decision Tree model on the test corpus. Args: corpus: A list of lists, containing the GC content, coverage, and class number. classList: A list of class names. features: List of variables used by each contig. modelOutput: Location to save model as GraphViz DOT, or False to save no model. Returns: classifier: A DecisionTreeClassifier object that has been trained on the test corpus. """ corpus.sort() # just in case X = [] Y = [] for item in corpus: X.append(item[:-1]) # all but the last item Y.append(item[-1]) # only the last item X_train, X_test, Y_train, Y_test = mscv.train_test_split(X, Y, test_size=0.3, random_state=0) # TODO: implement classifier testing and comparison, now only baggingClassifier is used as per paper #treeClassifier = tree.DecisionTreeClassifier() #treeClassifier = treeClassifier.fit(X_train, Y_train) #click.echo("Decision tree classifier built, score is %s out of 1.00" % treeClassifier.score(X_test, Y_test)) baggingClassifier = ensemble.BaggingClassifier() baggingClassifier = baggingClassifier.fit(X_train, Y_train) click.echo("Bagging classifier built, score is %s out of 1.00" % baggingClassifier.score(X_test, Y_test)) #forestClassifier = ensemble.RandomForestClassifier(n_estimators=10) #forestClassifier = forestClassifier.fit(X_train, Y_train) #click.echo("Random forest classifier built, score is %s out of 1.00" % forestClassifier.score(X_test, Y_test)) #adaClassifier = ensemble.AdaBoostClassifier(n_estimators=100) #adaClassifier = adaClassifier.fit(X_train, Y_train) #click.echo("AdaBoost classifier built, score is %s out of 1.00" % adaClassifier.score(X_test, Y_test)) #gradientClassifier = ensemble.GradientBoostingClassifier(n_estimators=100) #gradientClassifier = gradientClassifier.fit(X_train, Y_train) #click.echo("Gradient tree boosting classifier built, score is %s out of 1.00" % gradientClassifier.score(X_test, Y_test)) if modelOutput: with open(modelOutput, 'w') as dotfile: tree.export_graphviz(baggingClassifier, out_file=dotfile, feature_names=features, class_names=classList, filled=True, rounded=True, special_characters=True) return baggingClassifier
def export_model(self, IDcol): #Export the model into the model file as well as create a submission #with model index. This will be used for creating an ensemble. self.export_model_base(IDcol,'decision_tree') ## UNDER DEVELOPMENT CODE FOR PRINTING TREES # def get_tree(self): # return self.alg.tree_ # Print the tree in visual format # Inputs: # export_pdf - if True, a pdf will be exported with the # filename as specified in pdf_name argument # pdf_name - name of the pdf file if export_pdf is True # def printTree(self, export_pdf=True, file_name="Decision_Tree.pdf"): # dot_data = StringIO() # export_graphviz( # self.alg, out_file=dot_data, feature_names=self.predictors, # filled=True, rounded=True, special_characters=True) # export_graphviz( # self.alg, out_file='data.dot', feature_names=self.predictors, # filled=True, rounded=True, special_characters=True # ) # graph = pydot.graph_from_dot_data(dot_data.getvalue()) # if export_pdf: # graph.write_pdf(file_name) # return graph ##################################################################### ##### RANDOM FOREST #####################################################################
def save_decision_tree(treePath, model, fold_idx, featNames): if not os.path.exists(treePath): os.makedirs(treePath) export_graphviz(model, out_file=treePath+'fold'+str(fold_idx)+'.dot', feature_names=featNames, filled=True, class_names=["absent","mild","moderate","severe"], proportion = True)
def classify(y, x, test_y, test_x): global data_df, factor_name, left, right, feature, ratio, threshold y_c = np.zeros(len(y)) y_c[y > 0.02] = 1 y_c[y < -0.02] = -1 min_n = int(0.05 * len(y)) clf = DecisionTreeClassifier(max_depth=4, min_samples_leaf=min_n) clf.fit(x, y_c) y_p = clf.predict(x) fname = "D:\\Cache\\tree.txt" test_y = y with open(fname, 'w') as f: tree.export_graphviz(clf, out_file=f) f.close() factor_exchange(factor_name, fname) left = clf.tree_.children_left right = clf.tree_.children_right feature = clf.tree_.feature threshold = clf.tree_.threshold disp_tree() # precision, recall, thresholds = precision_recall_curve(y_c, clf.predict(x)) '''''???????''' print("mean income is:", str(np.average(test_y)), "\nwin ratio is: ", str(np.sum(test_y > 0) / len(test_y))) print("after training\n" "mean class_1 is: ", str(np.average(test_y[y_p > 0])), "\nwin ratio is: ", str(np.sum(test_y[y_p > 0] > 0) / np.sum(y_p > 0)), "\ntotal class_1 is:", str(np.sum(np.sum(y_p > 0))), "\nmean class_0 is: ", str(np.average(test_y[y_p < 0])))
def DecisionTreeModel(self, dummy_x, dummy_y): clf = DecisionTreeClassifier(criterion='entropy') clf.fit(dummy_x, dummy_y) return clf # with open('dt_information_gain.dot', 'w') as f: # f = export_graphviz(clf, feature_names=vec.get_feature_names(), out_file=f)
def create_graphviz_file(self, file_name): dot_data = tree.export_graphviz( self.clf, out_file=None, feature_names=self.feature_names, class_names=self.target_names, filled=True, rounded=True, special_characters=True) graph = pdp.graph_from_dot_data(dot_data) graph.write_pdf(file_name + ".pdf") print "Decision graph created"
def decisionTree(X,y,attributeNames,classNames,fileName,s="",X_train=None,y_train=None, X_test=None, y_test=None): print "Doing decision tree for: " print s if(X_train is None or X_test is None or y_train is None or y_test is None): X_train = X X_test = X y_train = y y_test = y # Fit regression tree classifier, Gini split criterion, pruning enabled dtc = tree.DecisionTreeClassifier(criterion='gini', min_samples_split=100) dtc = dtc.fit(X_train,y_train) # Export tree graph for visualization purposes: # (note: you can use i.e. Graphviz application to visualize the file) out = tree.export_graphviz(dtc, out_file=fileName, feature_names=attributeNames) out.close() correct = 0 wrong = 0 for i in range(0,len(X_test)): x = X_test[i,:] x_class = dtc.predict(x)[0] if((x_class < 0.5 and y_test[i] < 0.5) or (x_class > 0.5 and y_test[i] > 0.5)): correct += 1 else: wrong += 1 rate = double(wrong) / double(correct + wrong) print rate print '\n' return rate
def train_predictor(df, markov_blanket, p_train=0.6): # DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, # min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, # min_impurity_split=1e-07, class_weight=None, presort=False) # RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, # min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, # min_impurity_split=1e-07, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, # verbose=0, warm_start=False, class_weight=None) rf = RandomForestClassifier(n_estimators=5) clf1 = tree.DecisionTreeClassifier(max_leaf_nodes=10,class_weight=None) x = df[list(markov_blanket)].values y = df["TAR10"].values n_samples = x.shape[0] n_train = int(np.round(p_train * n_samples)) xt = x[:n_train, :] yt = y[:n_train] n_check = n_samples - n_train xc = x[n_train:, :] yc = y[n_train:] ynames = ["lateral", "alcista"] xnames = list(markov_blanket) clf1.fit(xt, yt) sys.stdout.write("Result INS is {}\n".format(clf1.score(xt, yt))) sys.stdout.write("Result OOS is {}\n".format(clf1.score(xc, yc))) scores = confusion_matrix(yt, clf1.predict(xt), labels=[0, 1, ]) scores2 = confusion_matrix(yc, clf1.predict(xc), labels=[0, 1]) print(scores) print(scores2) tree.export_graphviz(clf1, out_file='D:\MLmaster\Tree.dot', class_names=ynames, feature_names=xnames) return rf
def iris_demo(): clf = tree.DecisionTreeClassifier() iris = load_iris() # iris.data??150*4,iris.target ???????0,1,2(150*1) clf = clf.fit(iris.data, iris.target) dot_file = 'tree.dot' tree.export_graphviz(clf, out_file=dot_file) visualize_tree(clf, iris.feature_names, dot_file) # (graph,) = pydot.graph_from_dot_file('tree.dot') # graph.write_png('somefile.png')
def loan_demo(): dt = tree.DecisionTreeClassifier() X, Y = get_loan_data_lh() dt = dt.fit(X, Y) dot_file = 'loan.dot' tree.export_graphviz(dt, out_file=dot_file) feature_names = ['age', 'has work', 'own house', 'loan level'] visualize_tree(dt, feature_names, dot_file)
def test_graphviz_errors(): # Check for errors of export_graphviz clf = DecisionTreeClassifier(max_depth=3, min_samples_split=2) clf.fit(X, y) # Check feature_names error out = StringIO() assert_raises(IndexError, export_graphviz, clf, out, feature_names=[]) # Check class_names error out = StringIO() assert_raises(IndexError, export_graphviz, clf, out, class_names=[])
def classifyTree(Xtr, ytr, Xte, yte, splitCriterion="gini", maxDepth=0, visualizeTree=False): """ Classifies data using CART """ try: accuracyRate, probabilities, timing = 0.0, [], 0.0 # Perform classification cartClassifier = tree.DecisionTreeClassifier(criterion=splitCriterion, max_depth=maxDepth) startTime = time.time() prettyPrint("Training a CART tree for classification using \"%s\" and maximum depth of %s" % (splitCriterion, maxDepth), "debug") cartClassifier.fit(numpy.array(Xtr), numpy.array(ytr)) prettyPrint("Submitting the test samples", "debug") predicted = cartClassifier.predict(Xte) endTime = time.time() # Compare the predicted and ground truth and append result to list accuracyRate = round(metrics.accuracy_score(predicted, yte), 2) # Also append the probability estimates probs = cartClassifier.predict_proba(Xte) probabilities.append(probs) timing = endTime-startTime # Keep track of performance if visualizeTree: # Visualize the tree dot_data = StringIO() tree.export_graphviz(cartClassifier, out_file=dot_data) graph = pydot.graph_from_dot_data(dot_data.getvalue()) prettyPrint("Saving learned CART to \"tritonTree_%s.pdf\"" % getTimestamp(), "debug") graph.write_pdf("tree_%s.pdf" % getTimestamp()) except Exception as e: prettyPrint("Error encountered in \"classifyTree\": %s" % e, "error") return accuracyRate, timing, probabilities, predicted
def visualize_tree(clf, outname, headers): from sklearn.externals.six import StringIO import pydot dot_data = StringIO() tree.export_graphviz(clf, out_file=dot_data, feature_names=list(headers)) graph = pydot.graph_from_dot_data(dot_data.getvalue().decode('latin1').encode('utf8')) graph.write_pdf(outname)
def decision_tree_classifier(all_feature_data): input_data=np.asarray(all_feature_data[0]) label=np.asarray(all_feature_data[1]) data=input_data[:,:] # data=sklearn.preprocessing.normalize(data,axis=0) # clf = DecisionTreeClassifier(criterion="gini", # splitter="best", # max_features=None, # max_depth=5, # min_samples_leaf=1, # min_samples_split=2, # class_weight=None # ) clf = DecisionTreeClassifier() fit_clf=clf.fit(data,label) result=fit_clf.predict(data) accuracy=float(np.sum(result==label))/len(label) print "Training accuracy is " + str(accuracy) with open("cityscapes.dot", 'w') as f: f = tree.export_graphviz(clf, out_file=f) # dot_data = StringIO() # tree.export_graphviz(clf, out_file=dot_data) # graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) # graph.write_pdf("cityscapes.pdf") # scores = cross_val_score(clf, data, label, cv=10) # print "Cross validation score is "+ str(scores.mean()) return fit_clf
def makePrediction(para,rawData,totalNumRows,labels): traingSetSize=int(math.floor(totalNumRows*para['trainingSetPercent'])) print('%d instances are selected as training dataset!'%traingSetSize) trainX=np.array(rawData[0:traingSetSize]) trainY=np.array(labels[0:traingSetSize]) clf=tree.DecisionTreeClassifier() clf=clf.fit(trainX,trainY) feaNames=['event'+str(i) for i in range(1,386)] classNames=trainY # generate the decision tree figure # dot_data = StringIO() #class_names=classNames, # tree.export_graphviz(clf, out_file=dot_data, feature_names=feaNames, # filled=True, rounded=True, # special_characters=True) # graph = pydot.graph_from_dot_data(dot_data.getvalue()) # graph.write_png('sample_SOSP.png') testingX=rawData[traingSetSize:] testingY=labels[traingSetSize:] prediction=list(clf.predict(testingX)) if len(prediction)!=len(testingY): print ('prediction and testingY have different length and SOMEWHERE WRONG!') sameLabelNum=0 sameFailureNum=0 for i in range(len(testingY)): if prediction[i]==testingY[i]: sameLabelNum+=1 if prediction[i]==1: sameFailureNum+=1 accuracy=float(sameLabelNum)/len(testingY) print ('accuracy is %.5f:'%accuracy) predictSuccess=0 predictFailure=0 for item in prediction: if item==0: predictSuccess+=1 elif item==1: predictFailure+=1 testSuccess=0 testFailure=0 for tt in testingY: if tt==0: testSuccess+=1 elif tt==1: testFailure+=1 print(predictSuccess,predictFailure,testSuccess,testFailure,sameFailureNum) if sameFailureNum==0: print ('precision is 0 and recall is 0') else: precision=float(sameFailureNum)/(predictFailure) print('precision is %.5f'%precision) recall=float(sameFailureNum)/(testFailure) print('recall is %.5f'%recall) F_measure=2*precision*recall/(precision+recall) print('F_measure is %.5f'%F_measure) return predictFailure,testFailure,sameFailureNum,precision,recall,F_measure
def do_training(self, speech_corpus, text_corpus): if self.model: ## if already trained... return ## 1) get data: #### [Added dump_features method to Utterance class, use that: ] x_data = [] y_data = [] for utterance in speech_corpus: utt_feats = utterance.dump_features(self.target_nodes, \ self.context_list, return_dict=True) for example in utt_feats: assert 'response' in example,example y_data.append({'response': example['response']}) del example['response'] x_data.append(example) ## Handle categorical features (strings) but to keep numerical ones ## as they are: x_vectoriser = DictVectorizer() x_data = x_vectoriser.fit_transform(x_data).toarray() y_vectoriser = DictVectorizer() y_data = y_vectoriser.fit_transform(y_data).toarray() if False: print x_data print y_data ## 2) train classifier: model = tree.DecisionTreeClassifier(min_samples_leaf=self.min_samples_leaf) model.fit(x_data, y_data) print '\n Trained classifier: ' print model print '\n Trained x vectoriser:' print x_vectoriser print 'Feature names:' print x_vectoriser.get_feature_names() print '\n Trained y vectoriser:' print y_vectoriser print 'Feature names:' print y_vectoriser.get_feature_names() ## 3) Save classifier by pickling: output = open(self.model_file, 'wb') pickle.dump([x_vectoriser, y_vectoriser, model], output) output.close() ## Write ASCII tree representation (which can be plotted): tree.export_graphviz(model, out_file=self.model_file + '.dot', \ feature_names=x_vectoriser.get_feature_names()) self.verify(self.voice_resources) # ## reload -- get self.model etc
def classifyTreeKFold(X, y, kFold=2, splitCriterion="gini", maxDepth=0, visualizeTree=False): """ Classifies data using CART and K-Fold cross validation """ try: groundTruthLabels, predictedLabels = [], [] accuracyRates = [] # Meant to hold the accuracy rates # Split data into training and test datasets trainingDataset, testDataset = [], [] trainingLabels, testLabels = [], [] accuracyRates = [] probabilities = [] timings = [] kFoldValidator = KFold(n=len(X), n_folds=kFold, shuffle=False) currentFold = 1 for trainingIndices, testIndices in kFoldValidator: # Prepare the training and testing datasets for trIndex in trainingIndices: trainingDataset.append(X[trIndex]) trainingLabels.append(y[trIndex]) for teIndex in testIndices: testDataset.append(X[teIndex]) testLabels.append(y[teIndex]) # Perform classification startTime = time.time() cartClassifier = tree.DecisionTreeClassifier(criterion=splitCriterion, max_depth=maxDepth) prettyPrint("Training a CART tree for classification using \"%s\" and maximum depth of %s" % (splitCriterion, maxDepth), "debug") cartClassifier.fit(numpy.array(trainingDataset), numpy.array(trainingLabels)) prettyPrint("Submitting the test samples", "debug") predicted = cartClassifier.predict(testDataset) endTime = time.time() # Add that to the groundTruthLabels and predictedLabels matrices groundTruthLabels.append(testLabels) predictedLabels.append(predicted) # Compare the predicted and ground truth and append result to list accuracyRates.append(round(metrics.accuracy_score(predicted, testLabels), 2)) # Also append the probability estimates probs = cartClassifier.predict_proba(testDataset) probabilities.append(probs) timings.append(endTime-startTime) # Keep track of performance if visualizeTree: # Visualize the tree dot_data = StringIO() tree.export_graphviz(cartClassifier, out_file=dot_data) graph = pydot.graph_from_dot_data(dot_data.getvalue()) prettyPrint("Saving learned CART to \"tritonTree_%s.pdf\"" % currentFold, "debug") graph.write_pdf("tritonTree_%s.pdf" % currentFold) trainingDataset, trainingLabels = [], [] testDataset, testLabels = [], [] currentFold += 1 except Exception as e: prettyPrint("Error encountered in \"classifyTreeKFold\": %s" % e, "error") return [], [], [] return accuracyRates, probabilities, timings, groundTruthLabels, predictedLabels
def train_tree_classifer(features, labels, model_output_path): """ train_tree_classifer will train a DecisionTree and write it out to a pdf file features: 2D array of each input feature for each sample labels: array of string labels classifying each sample model_output_path: path for storing the trained tree model """ # save 20% of data for performance evaluation X_train, X_test, y_train, y_test = cross_validation.train_test_split(features, labels, test_size=0.2) param = [ { "max_depth": [None, 10, 100, 1000, 10000] } ] dtree = tree.DecisionTreeClassifier(random_state=0) # 10-fold cross validation, use 4 thread as each fold and each parameter set can be train in parallel clf = grid_search.GridSearchCV(dtree, param, cv=10, n_jobs=20, verbose=3) clf.fit(X_train, y_train) if os.path.exists(model_output_path): joblib.dump(clf.best_estimator_, model_output_path) else: print("Cannot save trained tree model to {0}.".format(model_output_path)) dot_data = tree.export_graphviz(clf.best_estimator_, out_file=None) graph = pydotplus.graph_from_dot_data(dot_data) graph.write_pdf('best_tree.pdf') print("\nBest parameters set:") print(clf.best_params_) y_predict=clf.predict(X_test) labels=sorted(list(set(labels))) print("\nConfusion matrix:") print("Labels: {0}\n".format(",".join(labels))) print(confusion_matrix(y_test, y_predict, labels=labels)) print("\nClassification report:") print(classification_report(y_test, y_predict))
def decision_tree_manual_classifier(all_feature_data): input_data=np.asarray(all_feature_data[0]) label=np.asarray(all_feature_data[1]) data_for_manual_tree=[] for row_index in range(len(all_feature_data[0])): current_row=all_feature_data[0][row_index]+[all_feature_data[1][row_index]] data_for_manual_tree.append(current_row) # # splitting rule # set1, set2 = divideset(data_for_manual_tree, 1, 14) # # print(set1) # print(uniquecounts(set1)) # print("") # # print(set2) # print(uniquecounts(set2)) # # print entropy(set1) # print entropy(set2) # print entropy(data_for_manual_tree) tree = buildtree(data_for_manual_tree) data=input_data[:,:] # data=sklearn.preprocessing.normalize(data,axis=0) # clf = DecisionTreeClassifier(criterion="gini", # splitter="best", # max_features=None, # max_depth=5, # min_samples_leaf=1, # min_samples_split=2, # class_weight=None # ) for row_index in range(len(all_feature_data[0])): to_be_predicted_data=all_feature_data[0][row_index] predicted_label=classify(to_be_predicted_data,tree) clf = DecisionTreeClassifier() fit_clf=clf.fit(data,label) result=fit_clf.predict(data) accuracy=float(np.sum(result==label))/len(label) print "Training accuracy is " + str(accuracy) with open("cityscapes.dot", 'w') as f: f = tree.export_graphviz(clf, out_file=f) return fit_clf