我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.metrics.precision_recall_fscore_support()。
def _update_tsg_metrics(self, y_true, y_pred, prob): self.tsg_gene_pred = pd.Series(y_pred, self.y.index) self.tsg_gene_score = pd.Series(prob, self.y.index) # compute metrics for classification self.tsg_gene_count[self.num_pred] = sum(y_pred) prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred) tsg_col = 1 # column for metrics relate to tsg self.tsg_precision[self.num_pred] = prec[tsg_col] self.tsg_recall[self.num_pred] = recall[tsg_col] self.tsg_f1_score[self.num_pred] = fscore[tsg_col] self.logger.debug('Tsg Iter %d: Precission=%s, Recall=%s, f1_score=%s' % ( self.num_pred + 1, str(prec), str(recall), str(fscore))) # compute ROC curve metrics fpr, tpr, thresholds = metrics.roc_curve(y_true, prob) self.tsg_tpr_array[self.num_pred, :] = interp(self.tsg_fpr_array, fpr, tpr) #self.tsg_tpr_array[0] = 0.0 # compute Precision-Recall curve metrics p, r, thresh = metrics.precision_recall_curve(y_true, prob) p, r, thresh = p[::-1], r[::-1], thresh[::-1] # reverse order of results self.tsg_precision_array[self.num_pred, :] = interp(self.tsg_recall_array, r, p)
def addProbabilistFold(self, fold_id, true_labels, predicted_proba, threshold = None): if threshold is None: for threshold in self.thresholds: self.addProbabilistFold(fold_id, true_labels, predicted_proba, threshold = threshold) else: predicted_labels = np.array(predicted_proba) > threshold / 100 precision, recall, f_score, _ = precision_recall_fscore_support(true_labels, predicted_labels, average = 'binary') if len(predicted_labels) == 0: fp = 0 tn = 0 else: conf_matrix = confusion_matrix(true_labels, predicted_labels, [True, False]) fp = conf_matrix[1][0] tn = conf_matrix[1][1] fp_tn = fp + tn if fp_tn == 0: false_alarm_rate = 0 else: false_alarm_rate = fp / (fp + tn) self.fold_perf[threshold][fold_id, :] = [precision, recall, false_alarm_rate, f_score]
def addNonProbabilistFold(self, fold_id, true_labels, predicted_labels): precision, recall, f_score, _ = precision_recall_fscore_support(true_labels, predicted_labels, average = 'binary') accuracy = accuracy_score(true_labels, predicted_labels) if len(predicted_labels) == 0: fp = 0 tn = 0 else: conf_matrix = confusion_matrix(true_labels, predicted_labels, [True, False]) fp = conf_matrix[1][0] tn = conf_matrix[1][1] fp_tn = fp + tn if fp_tn == 0: false_alarm_rate = 0 else: false_alarm_rate = fp / (fp + tn) self.fold_perf[fold_id, :] = [precision, recall, false_alarm_rate, f_score, accuracy]
def test_svm_estimator(estimator, notes, encodings_train, labels_train, encodings_test, labels_test): t0 = time() estimator.fit(encodings_train, labels_train) print("Time cons: %.2fs, type: %s" % (time() - t0, notes)) predicted = estimator.predict(encodings_test) accuracy = metrics.accuracy_score(labels_test, predicted) print("Accuracy: %.5f" % accuracy) report = metrics.classification_report(labels_test, predicted) print(report) prec_recall_f_score = metrics.precision_recall_fscore_support( labels_test, predicted) print('-' * 10) prec_recall_f_score_dict = { 'prec': np.mean(prec_recall_f_score[0]), 'recall': np.mean(prec_recall_f_score[1]), 'f_score': np.mean(prec_recall_f_score[2]) } return accuracy, prec_recall_f_score_dict
def score(self, X, y=None, **kwargs): """ Generates the Scikit-Learn classification_report Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values """ y_pred = self.predict(X) keys = ('precision', 'recall', 'f1') self.scores = precision_recall_fscore_support(y, y_pred) self.scores = map(lambda s: dict(zip(self.classes_, s)), self.scores[0:3]) self.scores = dict(zip(keys, self.scores)) return self.draw(y, y_pred)
def score(self, X, y=None, **kwargs): """ Generates the Scikit-Learn precision_recall_fscore_support Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values Returns ------- ax : the axis with the plotted figure """ y_pred = self.predict(X) self.scores = precision_recall_fscore_support(y, y_pred) self.support = dict(zip(self.classes_, self.scores[-1])) return self.draw()
def precision_recall_at_x_proportion(test_labels, test_predictions, x_proportion=0.01, return_cutoff=False): """Compute precision, recall, F1 for a specified fraction of the test set. :params list test_labels: true labels on test set :params list test_predicted: predicted labels on test set :params float x_proportion: proportion of the test set to flag :params bool return_cutoff: if True return the cutoff probablility :returns float precision: fraction correctly flagged :returns float recall: fraction of the positive class recovered :returns float f1: """ cutoff_index = int(len(test_predictions) * x_proportion) cutoff_index = min(cutoff_index, len(test_predictions) - 1) sorted_by_probability = np.sort(test_predictions)[::-1] cutoff_probability = sorted_by_probability[cutoff_index] test_predictions_binary = [1 if x > cutoff_probability else 0 for x in test_predictions] precision, recall, f1, _ = metrics.precision_recall_fscore_support( test_labels, test_predictions_binary) # Only interested in metrics for label 1 precision, recall, f1 = precision[1], recall[1], f1[1] if return_cutoff: return precision, recall, f1, cutoff_probability else: return precision, recall, f1
def calc(gr_truth, predicted): # precision, recall, fscore, _ = score(gr_truth, predicted, average='micro') # print('precision: {}'.format(precision)) # print('recall: {}'.format(recall)) # print('fscore: {}'.format(fscore)) # print('jaccard: {}'.format(jaccard_similarity_score(gr_truth, predicted, normalize=True))) # print('mutual: {}'.format(mutual_info_score(gr_truth, predicted))) # print('mutual adj: {}'.format(adjusted_mutual_info_score(gr_truth, predicted))) # print('mutual norm: {}'.format(normalized_mutual_info_score(gr_truth, predicted))) return normalized_mutual_info_score(gr_truth, predicted)
def compute_f1(predictions, labels): """ Compute the F1 for FAVOR and AGAINST classes, as well as the average of the two. """ _, _, f1, _ = precision_recall_fscore_support(labels, predictions, warn_for=("f1")) f1_against = f1[0] f1_favor = f1[2] f1_overall = (f1_against + f1_favor) / 2 return f1_against, f1_favor, f1_overall
def f1_score_wrapper(y_true, y_pred): # y_pred_2 = np.asarray([1 if b > a else 0 for [a, b] in y_pred]) y_pred_2 = np.argmax(y_pred, axis=1) # print("F1 score inputs:") # print(y_true) # print(y_pred_2) # print("---") p, r, f1, s = precision_recall_fscore_support(y_true, y_pred_2) return accuracy_score(y_true, y_pred_2) if 0 in f1 else np.mean(f1)
def _update_metrics(self, y_true, y_pred, onco_prob, tsg_prob): # record which genes were predicted what self.driver_gene_pred = pd.Series(y_pred, self.y.index) self.driver_gene_score = pd.Series(onco_prob+tsg_prob, self.y.index) # evaluate performance prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred, average='macro') cancer_gene_pred = ((onco_prob + tsg_prob)>.5).astype(int) self.cancer_gene_count[self.num_pred] = np.sum(cancer_gene_pred) self.precision[self.num_pred] = prec self.recall[self.num_pred] = recall self.f1_score[self.num_pred] = fscore # compute Precision-Recall curve metrics driver_prob = onco_prob + tsg_prob driver_true = (y_true > 0).astype(int) p, r, thresh = metrics.precision_recall_curve(driver_true, driver_prob) p, r, thresh = p[::-1], r[::-1], thresh[::-1] # reverse order of results thresh = np.insert(thresh, 0, 1.0) self.driver_precision_array[self.num_pred, :] = interp(self.driver_recall_array, r, p) self.driver_threshold_array[self.num_pred, :] = interp(self.driver_recall_array, r, thresh) # calculate prediction summary statistics prec, recall, fscore, support = metrics.precision_recall_fscore_support(driver_true, cancer_gene_pred) self.driver_precision[self.num_pred] = prec[1] self.driver_recall[self.num_pred] = recall[1] # save driver metrics fpr, tpr, thresholds = metrics.roc_curve(driver_true, driver_prob) self.driver_tpr_array[self.num_pred, :] = interp(self.driver_fpr_array, fpr, tpr)
def _update_onco_metrics(self, y_true, y_pred, prob): self.onco_gene_pred = pd.Series(y_pred, self.y.index) self.onco_gene_score = pd.Series(prob, self.y.index) # compute metrics for classification self.onco_gene_count[self.num_pred] = sum(y_pred) prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred) self.onco_precision[self.num_pred] = prec[self.onco_num] self.onco_recall[self.num_pred] = recall[self.onco_num] self.onco_f1_score[self.num_pred] = fscore[self.onco_num] self.logger.debug('Onco Iter %d: Precission=%s, Recall=%s, f1_score=%s' % ( self.num_pred + 1, str(prec), str(recall), str(fscore))) # compute ROC curve metrics fpr, tpr, thresholds = metrics.roc_curve(y_true, prob) self.onco_tpr_array[self.num_pred, :] = interp(self.onco_fpr_array, fpr, tpr) #self.onco_mean_tpr[0] = 0.0 # compute Precision-Recall curve metrics p, r, thresh = metrics.precision_recall_curve(y_true, prob) p, r, thresh = p[::-1], r[::-1], thresh[::-1] # reverse order of results thresh = np.insert(thresh, 0, 1.0) self.onco_precision_array[self.num_pred, :] = interp(self.onco_recall_array, r, p) self.onco_threshold_array[self.num_pred, :] = interp(self.onco_recall_array, r, thresh)
def arg_p_r_f(Y_true, Y_pred, labels, **kwargs): macro_p = [] macro_r = [] macro_f = [] micro_true = [] micro_pred = [] for y_true, y_pred in zip(Y_true, Y_pred): p, r, f, _ = precision_recall_fscore_support(y_true, y_pred, **kwargs) macro_p.append(p) macro_r.append(r) macro_f.append(f) micro_true.extend(y_true) micro_pred.extend(y_pred) micro_p, micro_r, micro_f, _ = precision_recall_fscore_support( micro_true, micro_pred, **kwargs ) kwargs.pop('average') per_class_fs = f1_score(micro_true, micro_pred, average=None, **kwargs) res = { 'p_macro': np.mean(macro_p), 'r_macro': np.mean(macro_r), 'f_macro': np.mean(macro_f), 'p_micro': micro_p, 'r_micro': micro_r, 'f_micro': micro_f } for label, per_class_f in zip(sorted(labels), per_class_fs): res['f_class_{}'.format(label)] = per_class_f return res
def save_results(y_test, y_pred, labels, fold_number=0): pickle.dump(y_test, open("y_test_fold{number}.plk".format(number=fold_number), "w")) pickle.dump(y_pred, open("y_pred_fold{number}.plk".format(number=fold_number), "w")) print classification_report(y_test, y_pred) print confusion_matrix(y_test, y_pred) print "Micro stats:" print precision_recall_fscore_support(y_test, y_pred, average='micro') print "Macro stats:" print precision_recall_fscore_support(y_test, y_pred, average='macro') try: visualization.plot_confusion_matrix(confusion_matrix(y_test, y_pred), title="Test CM fold{number}".format(number=fold_number), labels=labels) except: pass
def prediction(clf, X, y): y_pred = clf.predict(X) y_test = y print classification_report(y_test, y_pred) # print confusion_matrix(y_test, y_pred) print "Micro stats:" print precision_recall_fscore_support(y_test, y_pred, average='micro') print "Macro stats:" print precision_recall_fscore_support(y_test, y_pred, average='macro')
def write_score(name, gold_labels, pred_scores, classes, average_classes): classes, average_classes = np.array(classes), np.array(average_classes) gold_scores = LabelBinarizer().fit(classes).transform(gold_labels) pred_labels = classes[np.argmax(pred_scores, axis=1)] with closing(Tee('{}.txt'.format(name), 'w')): precision, recall, fscore, _ = precision_recall_fscore_support(gold_labels, pred_labels, labels=classes) for t in zip(classes, precision, recall, fscore): print('{}: P={:.2f}, R={:.2f}, F1={:.2f}'.format(*t)) print('Accuracy: {:.4f}'.format(accuracy_score(gold_labels, pred_labels))) print('F1 average: {:.4f}'.format(np.mean(fscore[LabelEncoder().fit(classes).transform(average_classes)]))) with PdfPages('{}.pdf'.format(name)) as pdf: fpr = {} tpr = {} roc_auc = {} for i in range(len(classes)): fpr[i], tpr[i], _ = roc_curve(gold_scores[:, i], pred_scores[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) fpr['micro'], tpr['micro'], _ = roc_curve(gold_scores.ravel(), pred_scores.ravel()) roc_auc['micro'] = auc(fpr['micro'], tpr['micro']) plt.figure() plt.plot(fpr['micro'], tpr['micro'], label='micro-average (area = {:.2f})'.format(roc_auc['micro'])) for i in range(len(classes)): plt.plot(fpr[i], tpr[i], label='{0} (area = {1:.2f})'.format(i, roc_auc[i])) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC Curves') plt.legend(loc='lower right') pdf.savefig()
def f1(average_classes): # noinspection PyShadowingNames def f1(y_true, y_pred, theano=False): if theano: raise NotImplementedError else: return np.mean(precision_recall_fscore_support(y_true, np.argmax(y_pred, axis=-1))[2][average_classes]) return f1
def GBDT_classify(train_dataSet_path, test_dataSet_path, train_one_and_two_result_as_proba_path): train_data = pd.read_csv(train_dataSet_path) train_data = train_data.as_matrix() X_train = train_data[:, 2:-1] # select columns 0 through end-1 y_train = train_data[:, -1] # select column end test_data = pd.read_csv(test_dataSet_path) test_data = test_data.as_matrix() X_test = test_data[:, 2:-1] # select columns 0 through end-1 y_test = test_data[:, -1] # select column end clf = GradientBoostingClassifier(n_estimators=200) clf.fit(X_train, y_train) pre_y_test = clf.predict_proba(X_test) print pre_y_test print("GBDT Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test))) print u'????.....' f_result = open(test_dataSet_prob_path, 'w') for i in range(0, len(pre_y_test)): if i==0: print str(pre_y_test[i][0]) if i==len(pre_y_test)-1: print str(pre_y_test[i][0]) f_result.write(str(pre_y_test[i][0]) + '\n') return clf
def _f1_score(self, y_pred, y_true): y_pred_2 = np.argmax(y_pred, axis=1) p, r, f1, s = precision_recall_fscore_support(y_true, y_pred_2) return accuracy_score(y_true, y_pred_2) if 0 in f1 else np.mean(f1)
def eval_performance(y_true, y_pred): ''' Evaluate the performance of a multiclass classification model. :param y_true: the gold-standard labels :param y_pred: the predictions :return: mean F1 ''' pre, rec, f1, support = metrics.precision_recall_fscore_support(y_true, y_pred, average='weighted') print '=== Performance ===' print 'Mean precision: %.03f%%' % pre # (100*sum(pre * support)/sum(support)) print 'Mean recall: %.03f%%' % rec # (100*sum(rec * support)/sum(support)) print 'Mean F1: %.03f%%' % f1 # mean_f1 return pre, rec, f1, support
def get_f1_pre_rec(self, labels, prediction): pre, rec, f1, _ = precision_recall_fscore_support( y_true = labels, y_pred = prediction, labels = [self.labels_dict[i] for i in self.labels_list]) counts = np.zeros([2, 1]) for i in labels: counts[i] += 1 return np.expand_dims(pre,1), np.expand_dims(rec,1), np.expand_dims(f1,1), counts
def get_f1_pre_rec(self, labels, prediction): pre, rec, f1, _ = precision_recall_fscore_support( y_true = labels, y_pred = prediction, labels = [self.labels_dict[i] for i in self.labels_list]) counts = np.zeros([6, 1]) for i in labels: counts[i] += 1 return np.expand_dims(pre,1), np.expand_dims(rec,1), np.expand_dims(f1,1), counts
def get_f1_pre_rec(self, labels, prediction): pre, rec, f1, _ = precision_recall_fscore_support( y_true = labels, y_pred = prediction, labels = [self.labels_dict[i] for i in self.labels_list]) counts = np.zeros([len(self.labels_list), 1]) for i in labels: counts[i] += 1 return np.expand_dims(pre,1), np.expand_dims(rec,1), np.expand_dims(f1,1), counts
def multilabel_classification_report(y_true, y_pred, fmt='.3f', target_names=None): y_true = check_multilabel_array(y_true) y_pred = check_multilabel_array(y_pred) if y_true.shape != y_pred.shape: raise ValueError('y_true and y_pred must have equal shapes') n_labels = y_true.shape[1] if target_names is not None and len(target_names) != n_labels: raise ValueError('target_names must specify a name for all %d labels' % n_labels) # Collect stats precision, recall, f1_score, support = precision_recall_fscore_support(y_true, y_pred) tp, fp, tn, fn = multilabel_tp_fp_tn_fn_scores(y_true, y_pred) accuracy = multilabel_accuracy(y_true, y_pred) # Generate data for table, where each row represents a label headers = ['', 'precision', 'recall', 'f1-score', 'accuracy', 'support', 'TP', 'TN', 'FP', 'FN'] data = [] for label_idx in range(n_labels): target_name = str(label_idx) if target_names is None else target_names[label_idx] row = [target_name, precision[label_idx], recall[label_idx], f1_score[label_idx], accuracy[label_idx], support[label_idx], tp[label_idx], tn[label_idx], fp[label_idx], fn[label_idx]] data.append(row) # Calculate summaries for all values summary = ['avg / total', np.average(precision), np.average(recall), np.average(f1_score), np.average(accuracy), np.sum(support), np.sum(tp), np.sum(tn), np.sum(fp), np.sum(fn)] data.append(summary) return tabulate(data, headers=headers, floatfmt=fmt)
def evaluate_prediction(y_true, y_pred): """ evaluate prediction performance, given the ground truth :param y_true: correct target values :param y_pred: predicted values :return: confusion matrix, tp, tn, fp, fn precision, recall, F-score, and support """ cm = confusion_matrix(y_true, y_pred) precision, recall, fscore, support = precision_recall_fscore_support(y_true, y_pred) return cm, tp_tn_fp_fn(cm), precision, recall, fscore, support
def test_metrics(self): Y = np.random.randint(0,2,size=(2,5,5)) Yhat = np.random.randint(0,2,size=(2,5,5)) C,acc,prec,recall,f1 = emlib.metrics(Y, Yhat, display=False) prec2, recall2, f12, supp = smetrics(np.reshape(Y, (Y.size,)), np.reshape(Yhat, (Yhat.size,))) self.assertAlmostEqual(prec, prec2[1]) self.assertAlmostEqual(recall, recall2[1]) self.assertAlmostEqual(f1, f12[1])
def print_data(y,y_): p,r,f,s = precision_recall_fscore_support(y,y_) print('precision:\t{}'.format(p[1])) print('recall:\t\t{}'.format(r[1])) print('f1 score:\t{}'.format(f[1]))
def report_raw(self): precision, recall, f1, support = precision_recall_fscore_support(self.test_labels, self.predict_labels, labels=self.categories) prec_average, rec_average, f1_average, _ = precision_recall_fscore_support(self.test_labels, self.predict_labels, average='macro', labels=self.categories) support_total = sum(support) matrix = [precision.tolist(), recall.tolist(), f1.tolist(), support.tolist()] matrix = [list(i) for i in zip(*matrix)] matrix.append([prec_average, rec_average, f1_average, support_total]) return matrix
def compute_entities_f1(gold_graph, pred_graph): """ Compute the agreement for the entity entailment graph, for each entity, and return the average :param gold_graph: the first annotator's graph :param pred_graph: the second annotator's graph :return: the entity edges' mean F1 score """ # Get all the possible edges in the entity entailment graph all_edges = {str(entity): set([(str(m1), str(m2)) for m1 in entity.mentions.values() for m2 in entity.mentions.values() if m1 != m2]) for entity in gold_graph.entities.values() if len(entity.mentions) > 1} # Get the binary predictions/gold for these edges str_entities_gold = { entity : str(entity) for entity in gold_graph.entities.values() } entity_entailments_gold = {str_entities_gold[entity]: [1 if (m1, m2) in set(entity.entailment_graph.mentions_graph) else 0 for (m1, m2) in all_edges[str_entities_gold[entity]]] for entity in gold_graph.entities.values() if str_entities_gold[entity] in all_edges.keys()} str_entities_pred = { entity : str(entity) for entity in pred_graph.entities.values() } entity_entailments_pred = {str_entities_pred[entity]: [1 if (m1, m2) in set(entity.entailment_graph.mentions_graph) else 0 for (m1, m2) in all_edges[str_entities_pred[entity]]] for entity in pred_graph.entities.values() if str_entities_pred[entity] in all_edges.keys()} mutual_entities = list(set(entity_entailments_gold.keys()).intersection(entity_entailments_pred.keys())) # If both graphs contain no entailments, the score should be one f1 = np.mean([precision_recall_fscore_support(entity_entailments_gold[entity], entity_entailments_pred[entity], average='binary')[2] if np.sum(entity_entailments_gold[entity]) > 0 or np.sum(entity_entailments_pred[entity]) > 0 else 1.0 for entity in mutual_entities]) return f1
def acc_f1_roc(gt, prob, pred): acc = accuracy_score(gt, pred)*100. acc_not_normed = accuracy_score(gt, pred, normalize=False) f1 = f1_score(gt, pred)*100. roc = roc_auc_score(gt, prob, average='macro')*100. p, r, _, _ = precision_recall_fscore_support(gt, pred, average='binary') # print p, r return acc, acc_not_normed, f1, roc, p, r
def getScores(clf, X, y): predictions = clf.predict(X) scores = precision_recall_fscore_support(y, predictions, average='binary') return scores
def getScores(clf, X, y): predictions = clf.predict(X) scores = precision_recall_fscore_support(y, predictions, average='binary') return scores # Import data
def test_single(data, label, model): prediction = model.predict(data) #return float(np.sum(prediction == label)) / len(label) pre, rec, f1, support = metrics.precision_recall_fscore_support(label, prediction) f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:])) return f1
def test_rating(data, label, model): prediction = model.predict(data) #return float(np.sum(prediction % len(loadFile.aspect_dic) == (label % len(loadFile.aspect_dic)))) / len(label) prediction = prediction % len(loadFile.aspect_dic) label = label % len(loadFile.aspect_dic) pre, rec, f1, support = metrics.precision_recall_fscore_support(label, prediction) f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:])) return f1
def test_aspect(data, label, model): prediction = model.predict(data) #return float(np.sum(prediction // len(loadFile.aspect_dic) == (label // len(loadFile.aspect_dic)))) / len(label) prediction = prediction // len(loadFile.aspect_dic) label = label // len(loadFile.aspect_dic) pre, rec, f1, support = metrics.precision_recall_fscore_support(label, prediction) f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:])) return f1
def test_mat(data, label, model): prediction1 = model[0].predict(data) prediction2 = model[1].predict(data) #return float(np.logical_and(prediction1 == label[:, 0], prediction2 == label[:, 1]).sum()) / len(label) label = label[:, 0] * 100 + label[:, 1] prediction = prediction1 * 100 + prediction2 pre, rec, f1, support = metrics.precision_recall_fscore_support(label, prediction) f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:])) return f1
def test(net_file, data_set, label_method, model='RNN', trees=None): if trees is None: trees = tree.load_all(data_set, label_method) assert net_file is not None, "Must give model to test" print "Testing netFile %s" % net_file with open(net_file, 'r') as fid: opts = pickle.load(fid) _ = pickle.load(fid) if model == 'RNTN': nn = RNTN(opts.wvec_dim, opts.output_dim, opts.num_words, opts.minibatch) elif model == 'RNN': nn = RNN(opts.wvec_dim, opts.output_dim, opts.num_words, opts.minibatch) elif opts.model == 'TreeLSTM': nn = TreeLSTM(opts.wvec_dim, opts.mem_dim, opts.output_dim, opts.num_words, opts.minibatch, rho=opts.rho) elif opts.model == 'TreeTLSTM': nn = TreeTLSTM(opts.wvec_dim, opts.mem_dim, opts.output_dim, opts.num_words, opts.minibatch, rho=opts.rho) else: raise '%s is not a valid neural network so far only RNTN, RNN, RNN2, RNN3, and DCNN' % opts.model nn.init_params() nn.from_file(fid) print "Testing %s..." % model cost, correct, guess = nn.cost_and_grad(trees, test=True) correct_sum = 0 for i in xrange(0, len(correct)): correct_sum += (guess[i] == correct[i]) confusion = [[0 for i in range(nn.output_dim)] for j in range(nn.output_dim)] for i, j in zip(correct, guess): confusion[i][j] += 1 # makeconf(confusion) pre, rec, f1, support = metrics.precision_recall_fscore_support(correct, guess) #print "Cost %f, Acc %f" % (cost, correct_sum / float(len(correct))) #return correct_sum / float(len(correct)) f1 = (100*sum(f1[1:] * support[1:])/sum(support[1:])) print "Cost %f, F1 %f, Acc %f" % (cost, f1, correct_sum / float(len(correct))) return f1
def test_precision_recall_f1_score_binary(): # Test Precision Recall and F1 Score for binary classification task y_true, y_pred, _ = make_prediction(binary=True) # detailed measures for each class p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None) assert_array_almost_equal(p, [0.73, 0.85], 2) assert_array_almost_equal(r, [0.88, 0.68], 2) assert_array_almost_equal(f, [0.80, 0.76], 2) assert_array_equal(s, [25, 25]) # individual scoring function that can be used for grid search: in the # binary class case the score is the value of the measure for the positive # class (e.g. label == 1). This is deprecated for average != 'binary'. assert_dep_warning = partial(assert_warns, DeprecationWarning) for kwargs, my_assert in [({}, assert_no_warnings), ({'average': 'binary'}, assert_no_warnings), ({'average': 'micro'}, assert_dep_warning)]: ps = my_assert(precision_score, y_true, y_pred, **kwargs) assert_array_almost_equal(ps, 0.85, 2) rs = my_assert(recall_score, y_true, y_pred, **kwargs) assert_array_almost_equal(rs, 0.68, 2) fs = my_assert(f1_score, y_true, y_pred, **kwargs) assert_array_almost_equal(fs, 0.76, 2) assert_almost_equal(my_assert(fbeta_score, y_true, y_pred, beta=2, **kwargs), (1 + 2 ** 2) * ps * rs / (2 ** 2 * ps + rs), 2)
def test_precision_recall_fscore_support_errors(): y_true, y_pred, _ = make_prediction(binary=True) # Bad beta assert_raises(ValueError, precision_recall_fscore_support, y_true, y_pred, beta=0.0) # Bad pos_label assert_raises(ValueError, precision_recall_fscore_support, y_true, y_pred, pos_label=2, average='macro') # Bad average option assert_raises(ValueError, precision_recall_fscore_support, [0, 1, 2], [1, 2, 0], average='mega')
def test_precision_refcall_f1_score_multilabel_unordered_labels(): # test that labels need not be sorted in the multilabel case y_true = np.array([[1, 1, 0, 0]]) y_pred = np.array([[0, 0, 1, 1]]) for average in ['samples', 'micro', 'macro', 'weighted', None]: p, r, f, s = precision_recall_fscore_support( y_true, y_pred, labels=[3, 0, 1, 2], warn_for=[], average=average) assert_array_equal(p, 0) assert_array_equal(r, 0) assert_array_equal(f, 0) if average is None: assert_array_equal(s, [0, 1, 1, 0])
def test_precision_recall_f1_score_multiclass_pos_label_none(): # Test Precision Recall and F1 Score for multiclass classification task # GH Issue #1296 # initialize data y_true = np.array([0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1]) y_pred = np.array([1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1]) # compute scores with default labels introspection p, r, f, s = precision_recall_fscore_support(y_true, y_pred, pos_label=None, average='weighted')
def test_precision_recall_f1_no_labels(): y_true = np.zeros((20, 3)) y_pred = np.zeros_like(y_true) # tp = [0, 0, 0] # fn = [0, 0, 0] # fp = [0, 0, 0] # support = [0, 0, 0] # |y_hat_i inter y_i | = [0, 0, 0] # |y_i| = [0, 0, 0] # |y_hat_i| = [0, 0, 0] for beta in [1]: p, r, f, s = assert_warns(UndefinedMetricWarning, precision_recall_fscore_support, y_true, y_pred, average=None, beta=beta) assert_array_almost_equal(p, [0, 0, 0], 2) assert_array_almost_equal(r, [0, 0, 0], 2) assert_array_almost_equal(f, [0, 0, 0], 2) assert_array_almost_equal(s, [0, 0, 0], 2) fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred, beta=beta, average=None) assert_array_almost_equal(fbeta, [0, 0, 0], 2) for average in ["macro", "micro", "weighted", "samples"]: p, r, f, s = assert_warns(UndefinedMetricWarning, precision_recall_fscore_support, y_true, y_pred, average=average, beta=beta) assert_almost_equal(p, 0) assert_almost_equal(r, 0) assert_almost_equal(f, 0) assert_equal(s, None) fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred, beta=beta, average=average) assert_almost_equal(fbeta, 0)
def clf_metrics(p_train, p_test, y_train, y_test): """ Compute metrics on classifier predictions Parameters ---------- p_train : np.array [n_samples] predicted probabilities for training set p_test : np.array [n_samples] predicted probabilities for testing set y_train : np.array [n_samples] Training labels. y_test : np.array [n_samples] Testing labels. Returns ------- clf_scores : dict classifier scores for training set """ y_pred_train = 1*(p_train >= 0.5) y_pred_test = 1*(p_test >= 0.5) train_scores = {} test_scores = {} train_scores['accuracy'] = metrics.accuracy_score(y_train, y_pred_train) test_scores['accuracy'] = metrics.accuracy_score(y_test, y_pred_test) train_scores['mcc'] = metrics.matthews_corrcoef(y_train, y_pred_train) test_scores['mcc'] = metrics.matthews_corrcoef(y_test, y_pred_test) (p, r, f, s) = metrics.precision_recall_fscore_support(y_train, y_pred_train) train_scores['precision'] = p train_scores['recall'] = r train_scores['f1'] = f train_scores['support'] = s (p, r, f, s) = metrics.precision_recall_fscore_support(y_test, y_pred_test) test_scores['precision'] = p test_scores['recall'] = r test_scores['f1'] = f test_scores['support'] = s train_scores['confusion matrix'] = \ metrics.confusion_matrix(y_train, y_pred_train, labels=[0, 1]) test_scores['confusion matrix'] = \ metrics.confusion_matrix(y_test, y_pred_test, labels=[0, 1]) train_scores['auc score'] = \ metrics.roc_auc_score(y_train, p_train + 1, average='weighted') test_scores['auc score'] = \ metrics.roc_auc_score(y_test, p_test + 1, average='weighted') clf_scores = {'train': train_scores, 'test': test_scores} return clf_scores
def melodiness_metrics(m_train, m_test, y_train, y_test): """ Compute metrics on melodiness score Parameters ---------- m_train : np.array [n_samples] melodiness scores for training set m_test : np.array [n_samples] melodiness scores for testing set y_train : np.array [n_samples] Training labels. y_test : np.array [n_samples] Testing labels. Returns ------- melodiness_scores : dict melodiness scores for training set """ m_bin_train = 1*(m_train >= 1) m_bin_test = 1*(m_test >= 1) train_scores = {} test_scores = {} train_scores['accuracy'] = metrics.accuracy_score(y_train, m_bin_train) test_scores['accuracy'] = metrics.accuracy_score(y_test, m_bin_test) train_scores['mcc'] = metrics.matthews_corrcoef(y_train, m_bin_train) test_scores['mcc'] = metrics.matthews_corrcoef(y_test, m_bin_test) (p, r, f, s) = metrics.precision_recall_fscore_support(y_train, m_bin_train) train_scores['precision'] = p train_scores['recall'] = r train_scores['f1'] = f train_scores['support'] = s (p, r, f, s) = metrics.precision_recall_fscore_support(y_test, m_bin_test) test_scores['precision'] = p test_scores['recall'] = r test_scores['f1'] = f test_scores['support'] = s train_scores['confusion matrix'] = \ metrics.confusion_matrix(y_train, m_bin_train, labels=[0, 1]) test_scores['confusion matrix'] = \ metrics.confusion_matrix(y_test, m_bin_test, labels=[0, 1]) train_scores['auc score'] = \ metrics.roc_auc_score(y_train, m_train + 1, average='weighted') test_scores['auc score'] = \ metrics.roc_auc_score(y_test, m_test + 1, average='weighted') melodiness_scores = {'train': train_scores, 'test': test_scores} return melodiness_scores
def calc_and_append_scores(y_test, y_pred, metrics, featImportance): metrics['scores_mae'].append(mean_absolute_error(y_test, y_pred)) _, score_off = mae(y_test, y_pred) metrics['scores_mae_official'].append(score_off) prec, rec, fmeasure, _ = precision_recall_fscore_support(y_test, y_pred, average='macro') metrics['scores_prec'].append(prec) metrics['scores_recall'].append(rec) metrics['scores_f1'].append(fmeasure) metrics['scores_accuracy'].append(accuracy_score(y_test, y_pred)) metrics['feature_importance'].append(featImportance) # Getting class-individual metrics tTP = [0,0,0,0] tFP = [0,0,0,0] tTN = [0,0,0,0] tFN = [0,0,0,0] for act, pred in zip(y_test, y_pred): if act == pred: for i in range(0,4): if i == act: #add to true positive tTP[i] += 1 else: #add to true negative tTN[i] += 1 else: for i in range(0,4): if i == act: #add to false negative tFN[i] += 1 else: #add to false positive tFP[i] += 1 tpre = [0,0,0,0] trec = [0,0,0,0] tfm = [0,0,0,0] ttp = [0,0,0,0] for i in range(0,4): if (tTP[i] > 0.): tpre[i] = tTP[i] / (tTP[i] + tFP[i]) trec[i] = tTP[i] / (tTP[i] + tFN[i]) if ((trec[i] > 0.) | (tpre[i] > 0.)): tfm[i] = (2*(tpre[i] * trec[i])) / (tpre[i]+trec[i]) ttp[i] = tTP[i] #for each label separately, # to see how well our model performs on separate labels metrics['indRec'].append(trec) metrics['indPrec'].append(tpre) metrics['indFmeasure'].append(tfm) metrics['indTP'].append(ttp)
def score(self, y_predicted, y_target, y_prob=None): """ Compute metrics on classifier predictions Parameters ---------- y_predicted : np.array [n_samples] Predicted class labels y_target : np.array [n_samples] Target class labels y_prob : np.array [n_samples] or None, default=None predicted probabilties. If None, auc is not computed Returns ------- scores : dict dictionary of scores for the following metrics: accuracy, matthews correlation coefficient, precision, recall, f1, support, confusion matrix, auc score """ labels = set(y_target) labels.update(y_predicted) is_binary = len(labels) <= 2 scores = {} scores['accuracy'] = metrics.accuracy_score(y_target, y_predicted) if is_binary: scores['mcc'] = metrics.matthews_corrcoef(y_target, y_predicted) else: scores['mcc'] = None (scores['precision'], scores['recall'], scores['f1'], scores['support']) = metrics.precision_recall_fscore_support( y_target, y_predicted ) scores['confusion matrix'] = metrics.confusion_matrix( y_target, y_predicted, labels=list(labels) ) if y_prob is not None: scores['auc score'] = metrics.roc_auc_score( y_target, y_prob + 1, average='weighted' ) else: scores['auc score'] = None return scores ###############################################################################
def classification_report(y_true, y_pred, labels=None, sample_weight=None, digits=4, threshold=None): # this function is copied from https://github.com/scikit-learn/scikit-learn/blob/412996f/sklearn/metrics/classification.py#L1341 (c) respective authors # I pulled it here to fix formatting bug. from sklearn.metrics import precision_recall_fscore_support, accuracy_score y_true = np.array(y_true) y_pred = np.array(y_pred) if labels is None: from sklearn.utils.multiclass import unique_labels if threshold is not None: y_true = y_true > threshold y_pred = y_pred > threshold labels = unique_labels(y_true, y_pred) else: labels = np.asarray(labels) last_line_heading = 'avg / total' target_names = ['%s' % l for l in labels] results = [["", "precision", "recall", "f1-score", "support", "accuracy"]] p, r, f1, s = precision_recall_fscore_support(y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight) for i, label in enumerate(labels): values = [target_names[i]] for v in (p[i], r[i], f1[i]): values += ["{0:0.{1}f}".format(v, digits)] values += ["{0}".format(s[i])] accuracy = accuracy_score(y_true == label, y_pred == label, sample_weight=sample_weight) values += ["{0:0.{1}f}".format(accuracy, digits)] results.append(values) values = [last_line_heading] for v in (np.average(p, weights=s), np.average(r, weights=s), np.average(f1, weights=s)): values += ["{0:0.{1}f}".format(v, digits)] values += ['{0}'.format(np.sum(s))] accuracy = accuracy_score(y_true, y_pred, sample_weight=sample_weight) values += ["{0:0.{1}f}".format(accuracy, digits)] results.append(values) return results
def crossValidate(document_term_matrix,labels,classifier="SVM",nfold=2): clf = None precision = [] recall = [] fscore = [] if classifier == "NN": clf = MLPClassifier(hidden_layer_sizes=(50), activation='relu', solver='sgd', alpha=1e-2, random_state=None) elif classifier == "LR": clf = linear_model.LogisticRegression(C=1e3) #clf = tree.DecisionTreeClassifier() if classifier == "RF": clf = RandomForestClassifier() elif classifier == "NB": clf = GaussianNB() elif classifier == "SVM": clf = LinearSVC() elif classifier == "KNN": clf = NearestCentroid() skf = StratifiedKFold(n_splits=nfold, shuffle=True) y_test_total = [] y_pred_total = [] for train_index, test_index in skf.split(document_term_matrix, labels): X_train, X_test = document_term_matrix[train_index], document_term_matrix[test_index] y_train, y_test = labels[train_index], labels[test_index] y_test_total.extend(y_test.tolist()) model = clf.fit(X_train, y_train) y_pred = model.predict(X_test) y_pred_total.extend(y_pred.tolist()) p,r,f,s = precision_recall_fscore_support(y_test, y_pred, average='weighted') print accuracy_score(y_test, y_pred) a_score.append(accuracy_score(y_test, y_pred)) precision.append(p) recall.append(r) fscore.append(f) plot_learning_curve(clf, "Learning Curves", document_term_matrix, labels, ylim=None, cv=skf, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)) plt.savefig('lc.png') return pd.Series(y_test_total), pd.Series(y_pred_total), np.mean(precision),np.mean(recall),np.mean(fscore), np.mean(a_score)