我们从Python开源项目中,提取了以下12个代码示例,用于说明如何使用sklearn.metrics.brier_score_loss()。
def calculate_brier_score_loss(actuals, probas): return -1 * brier_score_loss(actuals, probas)
def __init__(self, scoring_method=None): if scoring_method is None: scoring_method = 'brier_score_loss' self.scoring_method = scoring_method if callable(scoring_method): self.scoring_func = scoring_method else: self.scoring_func = scoring_name_function_map[scoring_method]
def score(self, estimator, X, y, advanced_scoring=False): X, y = utils.drop_missing_y_vals(X, y, output_column=None) if isinstance(estimator, GradientBoostingClassifier): X = X.toarray() predictions = estimator.predict_proba(X) if self.scoring_method == 'brier_score_loss': # At the moment, Microsoft's LightGBM returns probabilities > 1 and < 0, which can break some scoring functions. So we have to take the max of 1 and the pred, and the min of 0 and the pred. probas = [max(min(row[1], 1), 0) for row in predictions] predictions = probas try: score = self.scoring_func(y, predictions) except ValueError as e: bad_val_indices = [] for idx, val in enumerate(y): if str(val) in bad_vals_as_strings: bad_val_indices.append(idx) predictions = [val for idx, val in enumerate(predictions) if idx not in bad_val_indices] y = [val for idx, val in enumerate(y) if idx not in bad_val_indices] print('Found ' + str(len(bad_val_indices)) + ' null or infinity values in the y values. We will ignore these, and report the score on the rest of the dataset') try: score = self.scoring_func(y, predictions) except ValueError: # Sometimes, particularly for a badly fit model using either too little data, or a really bad set of hyperparameters during a grid search, we can predict probas that are > 1 or < 0. We'll cap those here, while warning the user about them, because they're unlikely to occur in a model that's properly trained with enough data and reasonable params predictions = self.clean_probas(predictions) score = self.scoring_func(y, predictions) if advanced_scoring: return (-1 * score, predictions) else: return -1 * score
def __call__(self, y_true_proba, y_proba): return brier_score_loss(y_true_proba, y_proba)
def __call__(self, y_true_proba, y_proba): climo = np.ones(y_true_proba.size) * y_true_proba.mean() bs = brier_score_loss(y_true_proba, y_proba) bs_c = brier_score_loss(y_true_proba, climo) return 1 - bs / bs_c
def test_calibration_prefit(): """Test calibration for prefitted classifiers""" n_samples = 50 X, y = make_classification(n_samples=3 * n_samples, n_features=6, random_state=42) sample_weight = np.random.RandomState(seed=42).uniform(size=y.size) X -= X.min() # MultinomialNB only allows positive X # split train and test X_train, y_train, sw_train = \ X[:n_samples], y[:n_samples], sample_weight[:n_samples] X_calib, y_calib, sw_calib = \ X[n_samples:2 * n_samples], y[n_samples:2 * n_samples], \ sample_weight[n_samples:2 * n_samples] X_test, y_test = X[2 * n_samples:], y[2 * n_samples:] # Naive-Bayes clf = MultinomialNB() clf.fit(X_train, y_train, sw_train) prob_pos_clf = clf.predict_proba(X_test)[:, 1] # Naive Bayes with calibration for this_X_calib, this_X_test in [(X_calib, X_test), (sparse.csr_matrix(X_calib), sparse.csr_matrix(X_test))]: for method in ['isotonic', 'sigmoid']: pc_clf = CalibratedClassifierCV(clf, method=method, cv="prefit") for sw in [sw_calib, None]: pc_clf.fit(this_X_calib, y_calib, sample_weight=sw) y_prob = pc_clf.predict_proba(this_X_test) y_pred = pc_clf.predict(this_X_test) prob_pos_pc_clf = y_prob[:, 1] assert_array_equal(y_pred, np.array([0, 1])[np.argmax(y_prob, axis=1)]) assert_greater(brier_score_loss(y_test, prob_pos_clf), brier_score_loss(y_test, prob_pos_pc_clf))
def test_brier_score_loss(): # Check brier_score_loss function y_true = np.array([0, 1, 1, 0, 1, 1]) y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1., 0.95]) true_score = linalg.norm(y_true - y_pred) ** 2 / len(y_true) assert_almost_equal(brier_score_loss(y_true, y_true), 0.0) assert_almost_equal(brier_score_loss(y_true, y_pred), true_score) assert_almost_equal(brier_score_loss(1. + y_true, y_pred), true_score) assert_almost_equal(brier_score_loss(2 * y_true - 1, y_pred), true_score) assert_raises(ValueError, brier_score_loss, y_true, y_pred[1:]) assert_raises(ValueError, brier_score_loss, y_true, y_pred + 1.) assert_raises(ValueError, brier_score_loss, y_true, y_pred - 1.)
def advanced_scoring_classifiers(probas, actuals, name=None): # pandas Series don't play nice here. Make sure our actuals list is indeed a list actuals = list(actuals) predictions = list(probas) print('Here is our brier-score-loss, which is the default value we optimized for while training, and is the value returned from .score() unless you requested a custom scoring metric') print('It is a measure of how close the PROBABILITY predictions are.') if name != None: print(name) # Sometimes we will be given "flattened" probabilities (only the probability of our positive label), while other times we might be given "nested" probabilities (probabilities of both positive and negative, in a list, for each item). try: probas = [proba[1] for proba in probas] except: pass print(format(brier_score_loss(actuals, probas), '.4f')) print('\nHere is the trained estimator\'s overall accuracy (when it predicts a label, how frequently is that the correct label?)') predicted_labels = [] for pred in probas: if pred >= 0.5: predicted_labels.append(1) else: predicted_labels.append(0) print(format(accuracy_score(y_true=actuals, y_pred=predicted_labels) * 100, '.1f') + '%') print('\nHere is a confusion matrix showing predictions and actuals by label') #it would make sense to use sklearn's confusion_matrix here but it apparently has no labels #took this idea instead from: http://stats.stackexchange.com/a/109015 conf = pd.crosstab(pd.Series(actuals), pd.Series(predicted_labels), rownames=['v Actual v'], colnames=['Predicted >'], margins=True) print(conf) print('Here is the accuracy of our trained estimator at each level of predicted probabilities') # create summary dict summary_dict = OrderedDict() for num in range(0, 110, 10): summary_dict[num] = [] for idx, proba in enumerate(probas): proba = math.floor(int(proba * 100) / 10) * 10 summary_dict[proba].append(actuals[idx]) for k, v in summary_dict.items(): if len(v) > 0: print('Predicted probability: ' + str(k) + '%') actual = sum(v) * 1.0 / len(v) # Format into a prettier number actual = round(actual * 100, 0) print('Actual: ' + str(actual) + '%') print('# preds: ' + str(len(v)) + '\n') print('\n\n')