我们从Python开源项目中,提取了以下29个代码示例,用于说明如何使用sklearn.metrics.fbeta_score()。
def optimise_f2_thresholds(y, p, verbose=True, resolution=100): def mf(x): p2 = np.zeros_like(p) for i in range(17): p2[:, i] = (p[:, i] > x[i]).astype(np.int) score = fbeta_score(y, p2, beta=2, average='samples') return score x = [0.2] * 17 for i in range(17): best_i2 = 0 best_score = 0 for i2 in range(resolution): i2 /= resolution x[i] = i2 score = mf(x) if score > best_score: best_i2 = i2 best_score = score x[i] = best_i2 if verbose: print(i, best_i2, best_score) return x
def validate(net, loader, criterion): net.eval() running_loss = 0 running_accuracy = 0 targets = torch.FloatTensor(0,17) # For fscore calculation predictions = torch.FloatTensor(0,17) for i, (X,y) in enumerate(loader): if cuda: X, y = X.cuda(), y.cuda() X, y = Variable(X, volatile=True), Variable(y) output = net(X) loss = criterion(output, y) acc = utils.get_multilabel_accuracy(output, y) targets = torch.cat((targets, y.cpu().data), 0) predictions = torch.cat((predictions,output.cpu().data), 0) running_loss += loss.data[0] running_accuracy += acc fscore = fbeta_score(targets.numpy(), predictions.numpy() > 0.23, beta=2, average='samples') return running_loss/len(loader), running_accuracy/len(loader), fscore
def f2_score(y_true, y_preds): return fbeta_score(y_true, y_preds, beta=2, average='samples')
def get_scores(clf, X_t_train, y_train, X_t_test, y_test): clf.fit(X_t_train, y_train) app = dict() score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None) avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples') prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro') rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro') avg_prec = average_precision_score(y_test, clf.predict(X_t_test)) metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))] #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]]) app['F2 Score'] = avg_sample_score app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test)) app['P_AUPR'] = avg_prec app['Precision'] = prec_score app['Recall'] = rec_score return app
def find_f_measure_threshold2(probs, labels, num_iters=100, seed=0.21): _, num_classes = labels.shape[0:2] best_thresholds = [seed] * num_classes best_scores = [0] * num_classes for t in range(num_classes): thresholds = list(best_thresholds) # [seed]*num_classes for i in range(num_iters): th = i / float(num_iters) thresholds[t] = th f2 = fbeta_score(labels, probs > thresholds, beta=2, average='samples') if f2 > best_scores[t]: best_scores[t] = f2 best_thresholds[t] = th print('\t(t, best_thresholds[t], best_scores[t])=%2d, %0.3f, %f' % (t, best_thresholds[t], best_scores[t])) print('') return best_thresholds, best_scores
def test_fscore_warnings(): clean_warning_registry() with warnings.catch_warnings(record=True) as record: warnings.simplefilter('always') for score in [f1_score, partial(fbeta_score, beta=2)]: score(np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), average='micro') assert_equal(str(record.pop().message), 'F-score is ill-defined and ' 'being set to 0.0 due to no predicted samples.') score(np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]), average='micro') assert_equal(str(record.pop().message), 'F-score is ill-defined and ' 'being set to 0.0 due to no true samples.')
def fbeta(_, predictions_binary, labels, parameters): return metrics.fbeta_score(labels, predictions_binary, **parameters)
def fbeta(true_label, prediction): return fbeta_score(true_label, prediction, beta=2, average='samples')
def fscore(prediction): """ Get the fscore of the validation set. Gives a good indication of score on public leaderboard""" target = torch.FloatTensor(0, 17) for i, (_,y) in enumerate(val_loader): target = torch.cat((target, y), 0) fscore = fbeta_score(target.numpy(), prediction.numpy() > 0.23, beta=2, average='samples') return fscore
def fbeta_score(y_true, y_pred, beta=1): """Computes the F score. The F score is the weighted harmonic mean of precision and recall. Here it is only computed as a batch-wise average, not globally. This is useful for multi-label classification, where input samples can be classified as sets of labels. By only using accuracy (precision) a model would achieve a perfect score by simply assigning every class to every input. In order to avoid this, a metric should penalize incorrect class assignments as well (recall). The F-beta score (ranged from 0.0 to 1.0) computes this, as a weighted mean of the proportion of correct class assignments vs. the proportion of incorrect class assignments. With beta = 1, this is equivalent to a F-measure. With beta < 1, assigning correct classes becomes more important, and with beta > 1 the metric is instead weighted towards penalizing incorrect class assignments. """ if beta < 0: raise ValueError('The lowest choosable beta is zero (only precision).') # If there are no true positives, fix the F score at 0 like sklearn. if K.sum(K.round(K.clip(y_true, 0, 1))) == 0: return 0 p = precision(y_true, y_pred) r = recall(y_true, y_pred) bb = beta ** 2 fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon()) return fbeta_score
def fmeasure(y_true, y_pred): """Computes the f-measure, the harmonic mean of precision and recall. Here it is only computed as a batch-wise average, not globally. """ return fbeta_score(y_true, y_pred, beta=1)
def fbeta(model, X_valid, y_valid): p_valid = model.predict(X_valid) return fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average='samples')
def f2_score(output, target, threshold): output = (output > threshold) return fbeta_score(target, output, beta=2, average='samples')
def optimise_f2_thresholds(y, p, verbose=True, resolution=100): """ Find optimal threshold values for f2 score. Thanks Anokas https://www.kaggle.com/c/planet-understanding-the-amazon-from-space/discussion/32475 """ size = y.shape[1] def mf(x): p2 = np.zeros_like(p) for i in range(size): p2[:, i] = (p[:, i] > x[i]).astype(np.int) score = fbeta_score(y, p2, beta=2, average='samples') return score x = [0.2] * size for i in range(size): best_i2 = 0 best_score = 0 for i2 in range(resolution): i2 /= resolution x[i] = i2 score = mf(x) if score > best_score: best_i2 = i2 best_score = score x[i] = best_i2 if verbose: print(i, best_i2, best_score) return x, best_score
def get_scores(clf, X_t_train, y_train, X_t_test, y_test): clf.fit(X_t_train, y_train) y_score = clf.predict_proba(X_t_test) app = dict() score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None) #auc_score = roc_auc_score(y_test, clf.predict(X_t_test), average='samples') avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples') prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro') rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro') avg_prec = average_precision_score(y_test, clf.predict(X_t_test)) metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))] #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]]) fpr = dict() tpr = dict() roc_auc = dict() for i in range(len(list(enumerate(mlb.classes_)))): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i]) roc_auc[mlb.classes_[i]] = auc(fpr[i], tpr[i]) app['F2 Score'] = avg_sample_score app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test)) app['Classwise F2 Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]]) app['P_AUPR'] = avg_prec app['Precision'] = prec_score app['Recall'] = rec_score app['ROC_AUC_samples'] = roc_auc return app
def get_scores(clf, X_t_train, y_train, X_t_test, y_test): clf.fit(X_t_train, y_train) app = dict() score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None) avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples') avg_prec = average_precision_score(y_test, clf.predict(X_t_test)) metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))] app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]]) app['F2 Score'] = avg_sample_score app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test)) app['Precision Score Avg (PR Curve)'] = avg_prec return app
def test_precision_recall_f1_score_binary(): # Test Precision Recall and F1 Score for binary classification task y_true, y_pred, _ = make_prediction(binary=True) # detailed measures for each class p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None) assert_array_almost_equal(p, [0.73, 0.85], 2) assert_array_almost_equal(r, [0.88, 0.68], 2) assert_array_almost_equal(f, [0.80, 0.76], 2) assert_array_equal(s, [25, 25]) # individual scoring function that can be used for grid search: in the # binary class case the score is the value of the measure for the positive # class (e.g. label == 1). This is deprecated for average != 'binary'. assert_dep_warning = partial(assert_warns, DeprecationWarning) for kwargs, my_assert in [({}, assert_no_warnings), ({'average': 'binary'}, assert_no_warnings), ({'average': 'micro'}, assert_dep_warning)]: ps = my_assert(precision_score, y_true, y_pred, **kwargs) assert_array_almost_equal(ps, 0.85, 2) rs = my_assert(recall_score, y_true, y_pred, **kwargs) assert_array_almost_equal(rs, 0.68, 2) fs = my_assert(f1_score, y_true, y_pred, **kwargs) assert_array_almost_equal(fs, 0.76, 2) assert_almost_equal(my_assert(fbeta_score, y_true, y_pred, beta=2, **kwargs), (1 + 2 ** 2) * ps * rs / (2 ** 2 * ps + rs), 2)
def test_precision_recall_f1_no_labels(): y_true = np.zeros((20, 3)) y_pred = np.zeros_like(y_true) # tp = [0, 0, 0] # fn = [0, 0, 0] # fp = [0, 0, 0] # support = [0, 0, 0] # |y_hat_i inter y_i | = [0, 0, 0] # |y_i| = [0, 0, 0] # |y_hat_i| = [0, 0, 0] for beta in [1]: p, r, f, s = assert_warns(UndefinedMetricWarning, precision_recall_fscore_support, y_true, y_pred, average=None, beta=beta) assert_array_almost_equal(p, [0, 0, 0], 2) assert_array_almost_equal(r, [0, 0, 0], 2) assert_array_almost_equal(f, [0, 0, 0], 2) assert_array_almost_equal(s, [0, 0, 0], 2) fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred, beta=beta, average=None) assert_array_almost_equal(fbeta, [0, 0, 0], 2) for average in ["macro", "micro", "weighted", "samples"]: p, r, f, s = assert_warns(UndefinedMetricWarning, precision_recall_fscore_support, y_true, y_pred, average=average, beta=beta) assert_almost_equal(p, 0) assert_almost_equal(r, 0) assert_almost_equal(f, 0) assert_equal(s, None) fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred, beta=beta, average=average) assert_almost_equal(fbeta, 0)
def test_precision_recall_f1_score_multiclass(): # Test Precision Recall and F1 Score for multiclass classification task y_true, y_pred, _ = make_prediction(binary=False) # compute scores with default labels introspection p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None) assert_array_almost_equal(p, [0.83, 0.33, 0.42], 2) assert_array_almost_equal(r, [0.79, 0.09, 0.90], 2) assert_array_almost_equal(f, [0.81, 0.15, 0.57], 2) assert_array_equal(s, [24, 31, 20]) # averaging tests ps = precision_score(y_true, y_pred, pos_label=1, average='micro') assert_array_almost_equal(ps, 0.53, 2) rs = recall_score(y_true, y_pred, average='micro') assert_array_almost_equal(rs, 0.53, 2) fs = f1_score(y_true, y_pred, average='micro') assert_array_almost_equal(fs, 0.53, 2) ps = precision_score(y_true, y_pred, average='macro') assert_array_almost_equal(ps, 0.53, 2) rs = recall_score(y_true, y_pred, average='macro') assert_array_almost_equal(rs, 0.60, 2) fs = f1_score(y_true, y_pred, average='macro') assert_array_almost_equal(fs, 0.51, 2) ps = precision_score(y_true, y_pred, average='weighted') assert_array_almost_equal(ps, 0.51, 2) rs = recall_score(y_true, y_pred, average='weighted') assert_array_almost_equal(rs, 0.53, 2) fs = f1_score(y_true, y_pred, average='weighted') assert_array_almost_equal(fs, 0.47, 2) assert_raises(ValueError, precision_score, y_true, y_pred, average="samples") assert_raises(ValueError, recall_score, y_true, y_pred, average="samples") assert_raises(ValueError, f1_score, y_true, y_pred, average="samples") assert_raises(ValueError, fbeta_score, y_true, y_pred, average="samples", beta=0.5) # same prediction but with and explicit label ordering p, r, f, s = precision_recall_fscore_support( y_true, y_pred, labels=[0, 2, 1], average=None) assert_array_almost_equal(p, [0.83, 0.41, 0.33], 2) assert_array_almost_equal(r, [0.79, 0.90, 0.10], 2) assert_array_almost_equal(f, [0.81, 0.57, 0.15], 2) assert_array_equal(s, [24, 20, 31])
def test_precision_recall_f1_score_with_an_empty_prediction(): y_true = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 1, 0]]) y_pred = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0]]) # true_pos = [ 0. 1. 1. 0.] # false_pos = [ 0. 0. 0. 1.] # false_neg = [ 1. 1. 0. 0.] p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None) assert_array_almost_equal(p, [0.0, 1.0, 1.0, 0.0], 2) assert_array_almost_equal(r, [0.0, 0.5, 1.0, 0.0], 2) assert_array_almost_equal(f, [0.0, 1 / 1.5, 1, 0.0], 2) assert_array_almost_equal(s, [1, 2, 1, 0], 2) f2 = fbeta_score(y_true, y_pred, beta=2, average=None) support = s assert_array_almost_equal(f2, [0, 0.55, 1, 0], 2) p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average="macro") assert_almost_equal(p, 0.5) assert_almost_equal(r, 1.5 / 4) assert_almost_equal(f, 2.5 / (4 * 1.5)) assert_equal(s, None) assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="macro"), np.mean(f2)) p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average="micro") assert_almost_equal(p, 2 / 3) assert_almost_equal(r, 0.5) assert_almost_equal(f, 2 / 3 / (2 / 3 + 0.5)) assert_equal(s, None) assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="micro"), (1 + 4) * p * r / (4 * p + r)) p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average="weighted") assert_almost_equal(p, 3 / 4) assert_almost_equal(r, 0.5) assert_almost_equal(f, (2 / 1.5 + 1) / 4) assert_equal(s, None) assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="weighted"), np.average(f2, weights=support)) p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average="samples") # |h(x_i) inter y_i | = [0, 0, 2] # |y_i| = [1, 1, 2] # |h(x_i)| = [0, 1, 2] assert_almost_equal(p, 1 / 3) assert_almost_equal(r, 1 / 3) assert_almost_equal(f, 1 / 3) assert_equal(s, None) assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="samples"), 0.333, 2)