我们从Python开源项目中,提取了以下36个代码示例,用于说明如何使用matplotlib.pyplot.barh()。
def comparison_graph(N,freq1,freq2,new_list,user1,user2): ind = np.arange(N) # the x locations for the groups width = 0.25 # the width of the bars fig, ax = plt.subplots(figsize=(25, 10)) rects1 = ax.barh(ind, freq1, width, color='#9DB9AE') rects2 = ax.barh(ind + width, freq2, width, color='#645D56') t = () for i in new_list: t = t+(i,) # add some text for labels, title and axes ticks ax.set_xlabel('Play Count') ax.set_title('Top common artists') ax.set_yticks(ind + width / 2) ax.set_yticklabels(t) ax.legend((rects1[0], rects2[0]), (user1, user2)) plt.savefig('static/'+ user1+user2+'.jpg')
def plot_preds(image, preds): """Displays image and the top-n predicted probabilities in a bar graph Args: image: PIL image preds: list of predicted labels and their probabilities """ plt.imshow(image) plt.axis('off') plt.figure() labels = ("cat", "dog") plt.barh([0, 1], preds, alpha=0.5) plt.yticks([0, 1], labels) plt.xlabel('Probability') plt.xlim(0,1.01) plt.tight_layout() plt.show()
def plot_preds(image, preds): """Displays image and the top-n predicted probabilities in a bar graph Args: image: PIL image preds: list of predicted labels and their probabilities """ plt.imshow(image) plt.axis('off') plt.figure() order = list(reversed(range(len(preds)))) bar_preds = [pr[2] for pr in preds] labels = (pr[1] for pr in preds) plt.barh(order, bar_preds, alpha=0.5) plt.yticks(order, labels) plt.xlabel('Probability') plt.xlim(0,1.01) plt.tight_layout() plt.show()
def horizontal_bar(): """ Simple demo of a horizontal bar chart. """ plt.rcdefaults() # Example data people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim') y_pos = np.arange(len(people)) performance = 3 + 10 * np.random.rand(len(people)) error = np.random.rand(len(people)) plt.barh(y_pos, performance, xerr=error, align='center') plt.yticks(y_pos, people) plt.xlabel('Performance') plt.title('How fast do you want to go today?') plt.show()
def pylot_show(): count=[] leixing = [] leixing_number={} with open("000000_0.txt", "r", encoding="utf-8") as fp: for line in fp.readlines(): leixing_number[line.strip().split("\t")[0]] = int(line.strip().split("\t")[1]) leixing.append(line.strip().split("\t")[0]) count.append(int(line.strip().split("\t")[1])) y_pos = np.arange(len(leixing)) # ??y???? plt.barh(y_pos, count, align='center', alpha=0.4) # alpha?????????(0~1)?? plt.yticks(y_pos, leixing) # ?y????????? for count, y_pos in zip(count, y_pos): # ?????????????????????????????? plt.text(count, y_pos, count, horizontalalignment='center', verticalalignment='center', weight='bold') plt.ylim(+40.0, -1.0) # ???????????y??? plt.title(u'??????') # ????? plt.ylabel(u'????') # ??y???? plt.subplots_adjust(bottom=0.15) plt.xlabel(u'????') # ??x???? plt.savefig('Y_leixing.png') # ???? plt.show()
def plot_heater(ax, data): """ plots deiced heater status i.e. ON/OFF """ if not 'PRTAFT_deiced_temp_flag' in data: return ax.text(0.05, 0.98,'Heater', axes_title_style, transform=ax.transAxes) ax.grid(False) ax.set_ylim(0,1) ax.yaxis.set_major_locator(plt.NullLocator()) plt.setp(ax.get_xticklabels(), visible=False) heater_status=np.array(data['PRTAFT_deiced_temp_flag'], dtype=np.int8) toggle=np.diff(heater_status.ravel()) time_periods=zip(list(np.where(toggle == 1)[0]), list(np.where(toggle == -1)[0])) for t in time_periods: #plt.barh(0, data['mpl_timestamp'][0,1], left=data['mpl_timestamp'][0,0]) width=data['mpl_timestamp'][t[1],0]-data['mpl_timestamp'][t[0],0] ax.add_patch(patches.Rectangle((data['mpl_timestamp'][t[0],0], 0), width, 1, alpha=0.8, color='#ffaf4d')) return ax
def recommend_dishes2(result): if result is not None: # plt.clf() recommend_dishes = sorted(result["recommend_dishes"].items(), key=lambda dish: dish[1])[-30:] title = "?????(?30)" x_label = "??" labels = [dish[0] for dish in recommend_dishes] label_pos = tuple(range(len(labels))) heights = tuple([dish[1] for dish in recommend_dishes]) plt.title(title, fontsize=20) plt.xlabel(x_label) plt.yticks(label_pos, labels) rects = plt.barh(bottom=label_pos, width=heights, alpha=0.35, align="center") barh_auto_label(rects) plt.show()
def topic(result): from random import randint if result: fig, ax = plt.subplots() index = tuple(range(5)) h1 = [] h2 = [] for i in range(5): h1.append(randint(20, 50)) h2.append(randint(-10, -2)) a = ax.barh(index, h1, color="r", alpha=.5) b = ax.barh(index, h2, color="b", alpha=.5) ax.set_yticks([i + 0.5 for i in index]) ax.set_yticklabels(("??", "??", "??", "??", "??")) ax.margins(0.2) ax.legend((a[0], b[0]), ('??', '??')) plt.show()
def horizontal_hist(items, title=None, axis_label=None, color=None, height=10, width=20, reverse=False): """ Plots a histogram of values and frequencies. Arguments: items (iterable[any]) => Example, [1, 2, 3, 1, 2] title (Optional[str]) => Example, "Resulting histogram". axis_label (Optional[str]) => Example, "y-axis". color (Optional[str]) => Default: matplotlib's default plot color, a royal blue height (Optional[int]) => Default: 10 width (Optional[int]) => Default: 20 reverse (Optional[bool]) => From top to bottom in order of decreasing frequency or not. Returns: None, however a matplotlib figure should be produced. """ unique_items, item_counts = np.unique(items, return_counts=True) item_counts, unique_items = zip(*sorted(zip(item_counts, unique_items), reverse=reverse)) pos = np.arange(len(unique_items)) + 0.5 plt.figure(figsize=(width, height)) plt.barh(pos, item_counts, align='center', color=color) plt.yticks(pos, unique_items) plt.xlabel('Frequency') plt.ylabel(axis_label) if axis_label else None plt.title(title) if title else None plt.show()
def barHonGraphics(xLabel,yLabel,xValueList,yValueList,graphicTitle='??',xWidth=0.5): plt.barh(numpy.arange(len(xValueList)), yValueList, alpha=0.4) plt.yticks(numpy.arange(len(xValueList)), xValueList,fontproperties=font_set) plt.xlabel(yLabel,fontproperties=font_set) plt.ylabel(xLabel,fontproperties=font_set) plt.title(graphicTitle,fontproperties=font_set) plt.show() #???:????
def plot(counts): labels = map(lambda x: x[0], counts) values = map(lambda y: y[1], counts) plt.barh(range(len(values)), values, color='green') plt.yticks(range(len(values)), labels) plt.show()
def chart_charge_count(traffic_by_court, filename): traffic_by_court.sort(key=lambda x: x['chargeCount']) plt.clf() plt.title('Tickets by Locality (2015)') plt.xlabel('Tickets') rects = plt.barh( range(len(traffic_by_court)), [x['chargeCount'] for x in traffic_by_court], tick_label=[x['localityNames'] for x in traffic_by_court]) xlim_max = plt.gca().get_xlim()[1] base_unit = int(xlim_max * 0.005) under_margin = int(xlim_max * 0.1) for rect in rects: width = rect.get_width() position = width - base_unit horizontal_align = 'right' color = 'white' if width < under_margin: # Set the value inside the bar if its over margin position = width + base_unit # pad the value horizontal_align = 'left' color = 'gray' plt.text(position, rect.get_y(), '%d' % width, va='bottom', ha=horizontal_align, color=color) plt.gca().set_ylim(-1, len(rects)) plt.tight_layout() plt.savefig(filename)
def test_graph(config, filename, n): plt.rcdefaults() people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim') y_pos = np.arange(len(people)) performance = 3 + 10 * np.random.rand(len(people)) error = np.random.rand(len(people)) plt.barh(y_pos, performance, xerr=error, align='center', alpha=0.4) plt.yticks(y_pos, people) plt.xlabel("Step "+str(n)) plt.title('Are samples measurements working?') plt.savefig(filename)
def gen_graph(title,labels,frequency,filename,colour): x_pos=np.arange(len(labels)) plt.figure(figsize=(25,10)) plt.barh(x_pos, frequency, align='center', alpha=0.5, height=0.5, color=colour) plt.yticks(x_pos, labels) plt.xlabel('Playcounts') plt.title(title) plt.savefig('static/' + filename)
def main(): newdata = make_data() setup_plot() allticks = [] for i,(oracle,data) in enumerate(zip(ORACLES,newdata)): pos = i*(max(1,Nsub)+1)+np.array(range(max(1,Nsub)+1)) handles = [] for j,dat in enumerate(data): h = plt.barh(pos[1]+j,dat,color=COLORS[j],edgecolor=COLORS[j]) handles.append(h) allticks.extend(zip([x for x in pos], ['', oracle['name'],''])) finalize_plot(allticks,handles) plt.savefig(plotname+'.pdf') plt.show()
def plot_compare_feature_levels(data): # [(name, value), (name, value) ....] ra = range(len(data)) plt.barh(ra, [x[1] for x in data], align='center', alpha=0.4) plt.yticks(ra, [x[0] for x in data])
def barh_plot(): """ barh plot """ # ?????? means_men = (20, 35, 30, 35, 27) means_women = (25, 32, 34, 20, 25) # ?????? index = np.arange(len(means_men)) bar_height = 0.35 # ????(????) plt.barh(index, means_men, height=bar_height, alpha=0.2, color="b", label="Men") plt.barh(index+bar_height, means_women, height=bar_height, alpha=0.8, color="r", label="Women") plt.legend(loc="upper right", shadow=True) # ??????? for x, y in zip(index, means_men): plt.text(y+0.3, x+(bar_height/2), y, ha="left", va="center") for x, y in zip(index, means_women): plt.text(y+0.3, x+bar_height+(bar_height/2), y, ha="left", va="center") # ??????/?????? plt.xlim(0, 45) plt.xlabel("Scores") plt.ylabel("Group") plt.yticks(index+bar_height, ("A?", "B?", "C?", "D?", "E?")) # ???? plt.show() return # barh_plot()
def single_datapoint_bar_graph(http_metrics): requests = map(lambda x: x.route_id, http_metrics) y_pos = np.arange(len(requests)) aggregate = map(lambda x: x.aggregate, http_metrics) plt.barh(y_pos, aggregate, align='center', alpha=0.5) plt.yticks(y_pos, requests) plt.ylabel('HTTP Endpoints') plt.xlabel('Response time (ms)') plt.title('Aggregate Request Response Time') plt.tight_layout() plt.show()
def plotBarH(passNames, passData): plugins.core.sendOutput('Generating bar chart...', 'stderr') floatData = [float(x) for x in passData] xPos = numpy.arange(len(passData)) plot.barh(xPos, floatData[::-1], align='center') plot.yticks(xPos, passNames[::-1])
def plot_silhouettes(X, y): cluster_labels = np.unique(y) n_clusters = cluster_labels.shape[0] silhouette_vals = silhouette_samples(X, y, metric='euclidean') y_ax_lower = 0 y_ax_upper = 0 yticks = [] for i, c in enumerate(cluster_labels): c_silhouette_vals = silhouette_vals[y == c] c_silhouette_vals.sort() y_ax_upper += len(c_silhouette_vals) color = cm.jet(i / n_clusters) plt.barh( range(y_ax_lower, y_ax_upper), c_silhouette_vals, height=1.0, edgecolor='none', color=color, ) yticks.append((y_ax_lower + y_ax_upper) / 2) y_ax_lower += len(c_silhouette_vals) silhouette_avg = np.mean(silhouette_vals) plt.axvline(silhouette_avg, color='red', linestyle='--') plt.yticks(yticks, cluster_labels + 1) plt.ylabel('Cluster') plt.xlabel('Silhouette coefficient') plt.show()
def get_feature_importance(model, fea_names): fea_imp = [] tops = [] fea = model.feature_importances_ idx = np.argsort(fea) fea_imp.append(fea) tops.append(fea_names[idx][:3]) # print fea_names[idx][:3] plt.figure() plt.title("Feature importances") plt.barh(range(len(fea)), fea[idx], align="center") plt.yticks(range(len(fea)), fea_names[idx]) plt.ylim([-1, len(fea)]) plt.show() return fea_imp, tops
def plot_fea_impor(fea, fea_names, idx): plt.figure() plt.title("Feature importances") plt.barh(range(len(fea)), fea[idx], color="r", align="center") plt.xticks(range(len(fea)), fea_names[idx]) plt.xlim([-1, len(fea)]) # plt.show()
def barh_dic(f, title=None): import operator y = sorted(f.items(), key=operator.itemgetter(1)) keys = [a[0] for a in y] vals = [a[1] for a in y] plt.barh(range(len(y)), vals, align='center') plt.yticks(range(len(y)), keys) if title: plt.title(title) plt.show()
def __save_feature_importances(self, importance, fig_name="feature_importance.png"): """Saves feature importances plot Parameters ---------- importance : dict Dictionary with features (key) and importances (values) fig_name : str, default = "feature_importance.png" figure name Returns ------- NoneType None """ if (len(importance) > 0): # Generates plot of feature importances importance_sum = np.sum(list(importance.values())) tuples = [(k, np.round(importance[k] * 100. / importance_sum, 2)) for k in importance] tuples = sorted(tuples, key=lambda x: x[1]) labels, values = zip(*tuples) plt.figure(figsize=(20, int(len(importance) * 0.3) + 1)) ylocs = np.arange(len(values)) plt.barh(ylocs, values, align='center') for x, y in zip(values, ylocs): plt.text(x + 1, y, x, va='center') plt.yticks(ylocs, labels) plt.title("Feature importance (%)") plt.grid(True) plt.savefig(fig_name) plt.close() # Leak Detection leak = sorted(dict(tuples).items(), key=operator.itemgetter(1))[-1] if((leak[-1] > 70) & (len(importance) > 1)): warnings.warn("WARNING : " + str(leak[0]) + " is probably a leak ! " "Please check and delete it...") else: pass
def __plot_feature_importances(self, importance, top = 10): """Plots top 10 feature importances Parameters ---------- importance : dict Dictionary with features (key) and importances (values) top : int Number of top features to display. Returns ------- NoneType None """ if (len(importance) > 0): # Plot feature importances importance_sum = np.sum(list(importance.values())) tuples = [(k, np.round(importance[k] * 100. / importance_sum, 2)) for k in importance] tuples = sorted(tuples, key=lambda x: x[1])[-top:] labels, values = zip(*tuples) plt.figure(figsize=(20, top * 0.3 + 1)) ylocs = np.arange(len(values)) plt.barh(ylocs, values, align='center') for x, y in zip(values, ylocs): plt.text(x + 1, y, x, va='center') plt.yticks(ylocs, labels) plt.title("Top " + str(top) + " feature importance (%)") plt.grid(True) plt.show() plt.close() else: pass
def chart_miles_per_charge(traffic_by_court, filename): traffic_by_court.sort(key=lambda x: x['milesPerChargeStd'], reverse=True) plt.clf() title = 'Relative Frequency of Speeding Tickets in Virginia (2015)\n' title += '(miles driven / ticket)\n' title += 'More Tickets' title += ' ' * 100 title += 'Fewer Tickets' plt.title(title) plt.xlabel('Standard Deviation') plt.ylabel('Rank') rects = plt.barh( range(len(traffic_by_court)), [x['milesPerChargeStd'] for x in traffic_by_court]) base_unit = 0.025 for rect, x in zip(rects, traffic_by_court): # Write the locality name horizontal_align = 'left' if rect.get_x() < 0 else 'right' position = base_unit if rect.get_x() < 0 else (base_unit * -1) plt.text(position, rect.get_y(), x['localityNames'], va='bottom', ha=horizontal_align) # Write the data figure position = rect.get_x() if rect.get_x() < 0 else rect.get_width() horizontal_align = 'right' if rect.get_x() < 0 else 'left' color = 'gray' if rect.get_width() > 0.2: color = 'white' horizontal_align = 'left' if rect.get_x() < 0 else 'right' position += base_unit if horizontal_align == 'left' else base_unit * -1 if position > 1.5: position = 1.6 - base_unit plt.text(position, rect.get_y(), '%d K' % (int(x['milesPerCharge']) / 1000), va='bottom', ha=horizontal_align, color=color) plt.gca().set_ylim(-1, len(rects)) plt.gca().set_xlim(-1.6, 1.6) plt.yticks(range(0, len(traffic_by_court)), reversed(range(1, len(traffic_by_court) + 1))) plt.tight_layout() # Save the figure plt.savefig(filename)
def get_head_ngram_statistics(questions, correct_model1, correct_model2, correct_model1_and_model2, correct_model1_and_not_model2, correct_model2_and_not_model1, output_dir, num_grams=2, top_count=25): # Head ngram statistics head_ngrams = get_head_ngrams(questions, num_grams) # Get head_ngram_frequencies (hnf) hnf_all = get_head_ngram_frequencies(questions, head_ngrams, num_grams) hnf_correct_model1 = get_head_ngram_frequencies({qid: questions[qid] for qid in correct_model1}, head_ngrams, num_grams) hnf_correct_model2 = get_head_ngram_frequencies({qid: questions[qid] for qid in correct_model2}, head_ngrams, num_grams) hnf_correct_model1_and_model2 = get_head_ngram_frequencies({qid: questions[qid] for qid in correct_model1_and_model2}, head_ngrams, num_grams) hnf_correct_model1_and_not_model2 = get_head_ngram_frequencies({qid: questions[qid] for qid in correct_model1_and_not_model2}, head_ngrams, num_grams) hnf_correct_model2_and_not_model1 = get_head_ngram_frequencies({qid: questions[qid] for qid in correct_model2_and_not_model1}, head_ngrams, num_grams) sorted_bigrams_all = sorted(hnf_all.items(), key=lambda x: x[1], reverse=True) top_bigrams = [x[0] for x in sorted_bigrams_all[0:top_count]] counts_total = [hnf_all[x] for x in top_bigrams] counts_model1 = [hnf_correct_model1[x] for x in top_bigrams] counts_model2 = [hnf_correct_model2[x] for x in top_bigrams] counts_model1_and_model2 = [hnf_correct_model1_and_model2[x] for x in top_bigrams] counts_model1_and_not_model2 = [hnf_correct_model1_and_not_model2[x] for x in top_bigrams] counts_model2_and_not_model1 = [hnf_correct_model2_and_not_model1[x] for x in top_bigrams] top_bigrams_with_counts = [] for cc in range(len(top_bigrams)): top_bigrams_with_counts.append('{0} ({1})'.format(top_bigrams[cc], counts_total[cc])) plt.clf() fig, ax = plt.subplots(figsize=(6, 10)) ylocs = list(range(top_count)) counts_model1_percent = 100 * np.array(counts_model1) / np.array(counts_total) plt.barh([top_count - x for x in ylocs], counts_model1_percent, height=0.4, alpha=0.5, color='#EE3224', label=top_bigrams) counts_model2_percent = 100 * np.array(counts_model2) / np.array(counts_total) plt.barh([top_count - x+0.4 for x in ylocs], counts_model2_percent, height=0.4, alpha=0.5, color='#2432EE', label=top_bigrams ) ax.set_yticks([top_count - x + 0.4 for x in ylocs]) ax.set_yticklabels(top_bigrams_with_counts) ax.set_ylim([0.5, top_count+1]) ax.set_xlim([0, 100]) plt.subplots_adjust(left=0.28, right=0.9, top=0.9, bottom=0.1) plt.xlabel('Percentage of questions with correct answers') plt.ylabel('Top N-grams') plt.savefig(os.path.join(output_dir, 'ngram_stats_{0}.png'.format(num_grams))) plt.close()
def draw_bar_graph(final_top_5, final_top_5_prob, path, m): ###################################################################### # Plot graphic bar with top 5 and the input image # # Red bar means that the ground truth label is in the top 5 # ###################################################################### y_pos = np.arange(len(final_top_5)) scores = final_top_5_prob truth_labels = open('ground_truth_labels_ilsvrc12.txt') for k in range(0, int(m)): ground_truth = truth_labels.readline() original = cv2.imread(path) # Swap Red and Blue color channels BGR -> RGB red = original[:, :, 2].copy() blue = original[:, :, 0].copy() original[:, :, 0] = red original[:, :, 2] = blue plt.figure(figsize=(5, 8)) plt.subplot(2, 1, 1) fig = plt.imshow(original) fig.axes.get_yaxis().set_visible(False) # Delete axes fig.axes.get_xaxis().set_visible(False) plt.subplot(2, 1, 2) barlist = plt.barh(y_pos, scores, 0.3, align='center', alpha=0.8) ground_truth = ground_truth.split(',') # If ground truth in Top 5 -> Red Bar for i in range(len(final_top_5)): if str(ground_truth[0].rstrip()) in str(final_top_5[i]): barlist[i].set_color('r') plt.yticks(y_pos, final_top_5) # final_top_5 plt.title(ground_truth[0], fontweight="bold") cur_axes = mplot.gca() # plt.xlabel('Score') # cur_axes.axes.get_yaxis().set_ticks([]) cur_axes.axes.get_xaxis().set_ticks([]) plt.savefig('bar_graph_' + str(m) + '.png') ######################################################################################### ######################################################################################### # FUNCTION: CALCULATE_OVERLAP # # GOAL: Calculate if any of predicted bbox overlap at least 50% with ground truth # # INPUT: Bbox, xmin, ymin, xmax, ymax, image_width, image_height, k # # OUTPUT: Print overlap percentage # # RETURN: Overlap list # #########################################################################################
def lin_reg(training_size): print('Extracting Features from txt') ds_features = np.loadtxt("features-google-new.txt", usecols = (0,1,3,5,6,7,10,11,12,14,15,16)) ds_outputs = np.loadtxt("features-google-new.txt", usecols = (4,)) ds_features_train, ds_features_test = ds_features[:training_size], ds_features[training_size:] ds_outputs_train, ds_outputs_test = ds_outputs[:training_size], ds_outputs[training_size:] params = {'n_estimators': 1000, 'max_depth': 7, 'min_samples_split': 1, 'learning_rate': 0.01, 'loss': 'ls'} clf = ensemble.GradientBoostingRegressor(**params) clf.fit(ds_features_train, ds_outputs_train) mse = mean_squared_error(ds_outputs_test, clf.predict(ds_features_test)) print("MSE: %.4f" % mse) predictions = clf.predict(ds_features_test) errors = [] for i in range(0, len(predictions)): if ds_outputs_test[i] > 0: errors.append(((predictions[i] - ds_outputs_test[i]) / ds_outputs_test[i]) * 100) print(str(np.mean(errors))) for i in range(0, 100): print(str(ds_outputs_test[i])+"---"+str(predictions[i])) feature_names = ['Size', 'Priority', 'TotalAnFee', 'Children', 'Parents', 'Mempool', '#Block','NumTxInLastBlock', 'SecondsSinceLastBlock', 'IncomingTxRate', 'InputValue', 'OutputValue'] #Plot feature importance feature_importance = clf.feature_importances_ feature_importance = 100.0 * (feature_importance / feature_importance.max()) sorted_idx = np.argsort(feature_importance) pos = np.arange(sorted_idx.shape[0]) + .5 plt.subplot(1, 2, 2) plt.barh(pos, feature_importance[sorted_idx], align='center') plt.yticks(pos, list(map(lambda x: feature_names[x], sorted_idx))) plt.xlabel('Relative Importance') plt.title('Variable Importance') plt.show()
def lin_reg(training_size): print('Extracting Features from txt') ds_features = np.loadtxt("features-google-new.txt", usecols = (0,1,3,4,5,6,7,8,11,12,13,14,15,16)) ds_outputs = np.loadtxt("features-google-new.txt", usecols = (10,)) ds_features_train, ds_features_test = ds_features[:training_size], ds_features[training_size:] ds_outputs_train, ds_outputs_test = ds_outputs[:training_size], ds_outputs[training_size:] params = {'n_estimators': 50, 'max_depth': 6, 'min_samples_split': 1, 'learning_rate': 0.01, 'loss': 'ls'} clf = ensemble.GradientBoostingRegressor(**params) clf.fit(ds_features_train, ds_outputs_train) mse = mean_squared_error(ds_outputs_test, clf.predict(ds_features_test)) print("MSE: %.4f" % mse) feature_names = ['Size', 'Priority', 'TotalAnFee', 'FeePerKB', 'Children', 'Parents', 'Mempool', 'MempoolBytes', 'NumTxInLastBlock', 'SecondsSinceLastBlock', 'BlockDiff', 'IncomingTxRate', 'InputValue', 'OutputValue'] ############################################################################### # Plot training deviance # compute test set deviance test_score = np.zeros((params['n_estimators'],), dtype=np.float64) for i, y_pred in enumerate(clf.staged_predict(ds_features_test)): test_score[i] = clf.loss_(ds_outputs_test, y_pred) plt.figure(figsize=(12, 6)) plt.subplot(1, 2, 1) plt.title('Deviance') plt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-', label='Training Set Deviance') plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-', label='Test Set Deviance') plt.legend(loc='upper right') plt.xlabel('Boosting Iterations') plt.ylabel('Deviance') ############################################################################### # Plot feature importance feature_importance = clf.feature_importances_ # make importances relative to max importance feature_importance = 100.0 * (feature_importance / feature_importance.max()) sorted_idx = np.argsort(feature_importance) pos = np.arange(sorted_idx.shape[0]) + .5 plt.subplot(1, 2, 2) plt.barh(pos, feature_importance[sorted_idx], align='center') plt.yticks(pos, map(lambda x: feature_names[x], sorted_idx)) plt.xlabel('Relative Importance') plt.title('Variable Importance') plt.show()