我们从Python开源项目中,提取了以下7个代码示例,用于说明如何使用xgboost.plot_importance()。
def fea_plot(xg_model, feature, label, type = 'weight', max_num_features = None): fig, AX = plt.subplots(nrows=1, ncols=2) xgb.plot_importance(xg_model, xlabel=type, importance_type='weight', ax=AX[0], max_num_features=max_num_features) fscore = xg_model.get_score(importance_type=type) fscore = sorted(fscore.items(), key=itemgetter(1), reverse=True) # sort scores fea_index = get_fea_index(fscore, max_num_features) feature = feature[:, fea_index] dimension = len(fea_index) X = range(1, dimension+1) Yp = np.mean(feature[np.where(label==1)[0]], axis=0) Yn = np.mean(feature[np.where(label!=1)[0]], axis=0) for i in range(0, dimension): param = np.fmax(Yp[i], Yn[i]) Yp[i] /= param Yn[i] /= param p1 = AX[1].bar(X, +Yp, facecolor='#ff9999', edgecolor='white') p2 = AX[1].bar(X, -Yn, facecolor='#9999ff', edgecolor='white') AX[1].legend((p1,p2), ('Malware', 'Normal')) AX[1].set_title('Comparison of selected features by their means') AX[1].set_xlabel('Feature Index') AX[1].set_ylabel('Mean Value') AX[1].set_ylim(-1.1, 1.1) plt.xticks(X, fea_index+1, rotation=80) plt.suptitle('Feature Selection results')
def run_train_validation(self): x_train, y_train,x_validation,y_validation = self.get_train_validationset() dtrain = xgb.DMatrix(x_train, label= y_train,feature_names=x_train.columns) dvalidation = xgb.DMatrix(x_validation, label= y_validation,feature_names=x_validation.columns) self.set_xgb_parameters() evals=[(dtrain,'train'),(dvalidation,'eval')] model = xgb.train(self.xgb_params, dtrain, evals=evals, **self.xgb_learning_params) xgb.plot_importance(model) plt.show() print "features used:\n {}".format(self.get_used_features()) return
def plot_importance(importance_type='weight'): """ How the importance is calculated: either "weight", "gain", or "cover" "weight" is the number of times a feature appears in a tree "gain" is the average"gain"of splits which use the feature "cover" is the average coverage of splits which use the feature where coverage is defined as the number of samples affected by the split """ xgb.plot_importance(model, importance_type=importance_type, max_num_features=40, )
def plot_importance(self): ax = xgb.plot_importance(self.model) self.save_topn_features() return ax
def save_topn_features(self, fname="XGBRegressor_topn_features.txt", topn=-1): ax = xgb.plot_importance(self.model) yticklabels = ax.get_yticklabels()[::-1] if topn == -1: topn = len(yticklabels) else: topn = min(topn, len(yticklabels)) with open(fname, "w") as f: for i in range(topn): f.write("%s\n"%yticklabels[i].get_text())
def save_topn_features(self, fname="XGBClassifier_topn_features.txt", topn=10): ax = xgb.plot_importance(self.model) yticklabels = ax.get_yticklabels()[::-1] if topn == -1: topn = len(yticklabels) else: topn = min(topn, len(yticklabels)) with open(fname, "w") as f: for i in range(topn): f.write("%s\n"%yticklabels[i].get_text())