我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.metrics.r2_score()。
def neural_regression(X_train, Y_train, X_val, Y_val, X_test, Y_test, nb_epoch=200, batch_size=10, seed=7): reg = neural_network(X_train.shape[1]) reg.fit(X_train, Y_train, nb_epoch=nb_epoch, batch_size=batch_size, shuffle=True, validation_data=(X_val, Y_val), callbacks=[ ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.01), EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, verbose=0, mode='auto'), ] ) pred = reg.predict(X_test) pred = np.reshape(pred, pred.shape[0]) r2 = r2_score(Y_test, pred) return r2
def _plot_old_pred_data(old_pred_data, show_pred_plot, save_pred_plot, show_clarke_plot, save_clarke_plot, id_str, algorithm_str, minutes_str): actual_bg_array = old_pred_data.result_actual_bg_array actual_bg_time_array = old_pred_data.result_actual_bg_time_array pred_array = old_pred_data.result_pred_array pred_time_array = old_pred_data.result_pred_time_array #Root mean squared error rms = math.sqrt(metrics.mean_squared_error(actual_bg_array, pred_array)) print " Root Mean Squared Error: " + str(rms) print " Mean Absolute Error: " + str(metrics.mean_absolute_error(actual_bg_array, pred_array)) print " R^2 Coefficient of Determination: " + str(metrics.r2_score(actual_bg_array, pred_array)) plot, zone = ClarkeErrorGrid.clarke_error_grid(actual_bg_array, pred_array, id_str + " " + algorithm_str + " " + minutes_str) print " Percent A:{}".format(float(zone[0]) / (zone[0] + zone[1] + zone[2] + zone[3] + zone[4])) print " Percent C, D, E:{}".format(float(zone[2] + zone[3] + zone[4])/ (zone[0] + zone[1] + zone[2] + zone[3] + zone[4])) print " Zones are A:{}, B:{}, C:{}, D:{}, E:{}\n".format(zone[0],zone[1],zone[2],zone[3],zone[4]) if save_clarke_plot: plt.savefig(id_str + algorithm_str.replace(" ", "") + minutes_str + "clarke.png") if show_clarke_plot: plot.show() plt.clf() plt.plot(actual_bg_time_array, actual_bg_array, label="Actual BG", color='black', linestyle='-') plt.plot(pred_time_array, pred_array, label="BG Prediction", color='black', linestyle=':') plt.title(id_str + " " + algorithm_str + " " + minutes_str + " BG Analysis") plt.ylabel("Blood Glucose Level (mg/dl)") plt.xlabel("Time (minutes)") plt.legend(loc='upper left') # SHOW/SAVE PLOT DEPENDING ON THE BOOLEAN PARAMETER if save_pred_plot: plt.savefig(id_str + algorithm_str.replace(" ","") + minutes_str + "plot.png") if show_pred_plot: plt.show() #Function to analyze the old OpenAPS data
def test_multicibist_mpi(mpisync): """ run this with something like: "mpirun -np 4 py.test ../tests/test_cubist.py::test_multicubist_mpi" """ predictor = MultiCubist(trees=10, sampling=60, seed=1, neighbors=1, committee_members=5, parallel=True) predictor.fit(x, y) # Predict the output y_pred_p = predictor.predict(x) score = r2_score(y, y_pred_p) assert 0.7 < score < 0.8
def scatter_regresion_Plot(X, Y, testName): plt.scatter(X, Y, c = 'b', label = '_nolegend_', s = 1) X = X.reshape(-1, 1) Y = Y.reshape(-1, 1) R2 = r2_score(X, Y) regr = linear_model.LinearRegression() regr.fit(X, Y) plt.plot(X, regr.predict(X), "--", label = 'Regression', color = 'r') plt.title(testName + ' ($R^2$: ' + "{0:.3f}".format(R2) + ")", fontsize = 14) plt.xlabel('True Values', fontsize = 12, weight = 'bold') plt.ylabel('Predicted Values', fontsize = 12, weight = 'bold') plt.legend(loc = 'upper left', bbox_to_anchor = (0, 1.0), fancybox = True, shadow = True, fontsize = 10) plt.subplots_adjust(left = 0.2, right = 0.9, bottom = 0.05, top = 0.97, wspace = 0.15, hspace = 0.3)
def trendLine(self, axis_choose=None): stable_sec= int(self.record_sec_le.text()) stable_count = int(stable_sec * (1/0.007)) if axis_choose: axis = axis_choose else: axis = str(self.axis_combobox.currentText()) x = self.raw_data['time'][:stable_count] y = self.raw_data[axis][:stable_count] coefficients = np.polyfit(x,y,1) p = np.poly1d(coefficients) coefficient_of_dermination = r2_score(y, p(x)) self.trendLine_content1_label.setText("Trendline: " + str(p)) self.trendLine_content2_label.setText("R: " + str(coefficient_of_dermination)) return coefficients
def r_squared_mse(y_true, y_pred, sample_weight=None, multioutput=None): r2 = r2_score(y_true, y_pred, sample_weight=sample_weight, multioutput=multioutput) mse = mean_squared_error(y_true, y_pred, sample_weight=sample_weight, multioutput=multioutput) bounds_check = np.min(y_pred) > MIN_MOISTURE_BOUND bounds_check = bounds_check&(np.max(y_pred) < MAX_MOISTURE_BOUND) print('Scoring - std', np.std(y_true), np.std(y_pred)) print('Scoring - median', np.median(y_true), np.median(y_pred)) print('Scoring - min', np.min(y_true), np.min(y_pred)) print('Scoring - max', np.max(y_true), np.max(y_pred)) print('Scoring - mean', np.mean(y_true), np.mean(y_pred)) print('Scoring - MSE, R2, bounds', mse, r2, bounds_check) return (float(mse), float(r2), int(bounds_check))
def score_regression(y, y_hat, report=True): """ Create regression score :param y: :param y_hat: :return: """ r2 = r2_score(y, y_hat) rmse = sqrt(mean_squared_error(y, y_hat)) mae = mean_absolute_error(y, y_hat) report_string = "---Regression Score--- \n" report_string += "R2 = " + str(r2) + "\n" report_string += "RMSE = " + str(rmse) + "\n" report_string += "MAE = " + str(mae) + "\n" if report: print(report_string) return mae, report_string
def learning( self): X = self.X y = self.y print( "Shape of X and y are", X.shape, y.shape) X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2, random_state=42) X_train, X_val, y_train, y_val = model_selection.train_test_split(X_train, y_train, test_size=0.2, random_state=42) val_monitor = skflow.monitors.ValidationMonitor(X_val, y_val, early_stopping_rounds=200) model = skflow.TensorFlowDNNRegressor(hidden_units=[100, 50, 10], steps=5000) model.fit(X_train, y_train, val_monitor) yP = model.predict(X_test) score_r2 = metrics.r2_score(y_test, yP) score_MedAE = metrics.median_absolute_error(y_test, yP) print('Accuracy') print('--------') print('R2: {0:f}, MedAE: {1:f}'.format(score_r2, score_MedAE)) if self.graph: kutil.regress_show4( y_test, yP)
def calc_cost_r2(self, batch_xs_test, batch_ys_test_1d): """ calculate cost and r2 under the current weights """ sess = self.sess cost = self.cost pred = self.pred x, y = self.x, self.y batch_ys_test = batch_ys_test_1d.reshape( (batch_ys_test_1d.shape[0], 1)) # Fit training using batch data # sess.run(optimizer, feed_dict={x: batch_xs_test, y: batch_ys_test}) test_avg_cost = sess.run(cost, feed_dict={x: batch_xs_test, y: batch_ys_test}) # Display logs per epoch step # print( "Test:", "cost=", "{:.9f}".format(test_avg_cost)) pred_test = sess.run( pred, feed_dict={x: batch_xs_test}) r2 = metrics.r2_score( batch_ys_test, pred_test) # print( "R2 of test data:", r2) return test_avg_cost, r2
def test_Validate(): """Test that Validate function works correctly""" accuracy = an.validate(testing=True) val = mean_squared_error(y, slr.predict(X)) assert np.allclose(accuracy,val) accuracy = an.validate(testing=True, X=X, y=y, metric=mean_squared_error) assert np.allclose(accuracy,val) accuracy = an.validate(testing=True, metric=[mean_squared_error, r2_score]) val = [mean_squared_error(y, slr.predict(X)), r2_score(y, slr.predict(X))] assert np.allclose(accuracy,val) with pytest.raises(ValueError): an.validate(X=[1,2,3])
def r2_score_vec(y_true,y_pred): """ returns non-aggregate version of r2 score. based on r2_score() function from sklearn (http://sklearn.org) """ numerator = (y_true - y_pred) ** 2 denominator = (y_true - np.average(y_true)) ** 2 nonzero_denominator = denominator != 0 nonzero_numerator = numerator != 0 valid_score = nonzero_denominator & nonzero_numerator output_scores = np.ones([y_true.shape[0]]) output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score]) # arbitrary set to zero to avoid -inf scores, having a constant # y_true is not interesting for scoring a regression anyway output_scores[nonzero_numerator & ~nonzero_denominator] = 0. return output_scores
def test_few_fit_shapes(): """test_few.py: fit and predict return correct shapes """ np.random.seed(202) # load example data boston = load_boston() d = pd.DataFrame(data=boston.data) print("feature shape:",boston.data.shape) learner = FEW(generations=1, population_size=5, mutation_rate=0.2, crossover_rate=0.8, ml = LassoLarsCV(), min_depth = 1, max_depth = 3, sel = 'epsilon_lexicase', tourn_size = 2, random_state=0, verbosity=0, disable_update_check=False, fit_choice = 'mse') score = learner.fit(boston.data[:300], boston.target[:300]) print("learner:",learner._best_estimator) yhat_test = learner.predict(boston.data[300:]) test_score = learner.score(boston.data[300:],boston.target[300:]) print("train score:",score,"test score:",test_score, "test r2:",r2_score(boston.target[300:],yhat_test)) assert yhat_test.shape == boston.target[300:].shape
def test_multioutput_regression(): y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]]) y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]]) error = mean_squared_error(y_true, y_pred) assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.) # mean_absolute_error and mean_squared_error are equal because # it is a binary problem. error = mean_absolute_error(y_true, y_pred) assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.) error = r2_score(y_true, y_pred, multioutput='variance_weighted') assert_almost_equal(error, 1. - 5. / 2) error = r2_score(y_true, y_pred, multioutput='uniform_average') assert_almost_equal(error, -.875)
def r2_score_(solution, prediction): return metrics.r2_score(solution, prediction)
def report_metrics(yhat, y): # report metrics of training set r2 = r2_score(y, yhat) var_exp = explained_variance_score(y, yhat) r = stats.pearsonr(yhat, y)[0] logger.info('Model metrics for training set: r2={:.2f}, Variance explained={:.2f}, Pearson\'r={:.2f}'.format(r2, var_exp, r))
def r_score(y_true, y_pred, sample_weight=None, multioutput=None): r2 = r2_score(y_true, y_pred, sample_weight=sample_weight, multioutput=multioutput) r = (np.sign(r2)*np.sqrt(np.abs(r2))) if r <= -1: return -1 else: return r
def test_modelmap(linear_data, get_models): yt, Xt, ys, Xs = linear_data() mod = get_models() mod.fit(Xt, yt) Ey = mod.predict(Xs) assert r2_score(ys, Ey) > 0
def test_krige(linear_data, get_krige_method): yt, Xt, ys, Xs = linear_data() mod = Krige(method=get_krige_method) mod.fit(np.tile(Xt, (1, 2)), yt) Ey = mod.predict(np.tile(Xs, (1, 2))) assert r2_score(ys, Ey) > 0
def test_mlkrige(linear_data, models_supported, get_krige_method): """ tests algos that can be used with MLKrige """ yt2, Xt2, ys2, Xs2 = linear_data() yt3, Xt3, ys3, Xs3 = linear_data(seed=10) yt4, Xt4, ys4, Xs4 = linear_data(seed=5) mlk = MLKrige(ml_method=models_supported, method=get_krige_method) arr = np.random.rand(Xt2.shape[0], 2) np.random.shuffle(arr) mlk.fit(np.hstack((Xt2, Xt3, Xt4)), yt2, lon_lat=arr) Ey = mlk.predict(np.hstack((Xs2, Xs3, Xs4)), lon_lat=np.random.rand(Xs2.shape[0], 2)) assert r2_score(ys2, Ey) > 0 # def test_modelpersistance(make_fakedata): # X, y, _, mod_dir = make_fakedata # for model in models.modelmaps.keys(): # mod = models.modelmaps[model]() # mod.fit(X, y) # with open(path.join(mod_dir, model + ".pk"), 'wb') as f: # pickle.dump(mod, f) # with open(path.join(mod_dir, model + ".pk"), 'rb') as f: # pmod = pickle.load(f) # Ey = pmod.predict(X) # assert Ey.shape == y.shape
def test_correct_range(): # Fit the data predictor = Cubist(print_output=False, sampling=90, seed=0, committee_members=2) predictor.fit(x, y) # Predict the output y_pred = predictor.predict(x) # Assert that the true y is similar to the prediction score = r2_score(y, y_pred) assert 0.68 < score < 0.8
def test_correct_range_with_sampling(): # Fit the data predictor = Cubist(print_output=False, sampling=90, seed=10, committee_members=2) predictor.fit(x, y) # Predict the output y_pred = predictor.predict(x) # Assert that the true y is similar to the prediction score = r2_score(y, y_pred) assert 0.68 < score < 0.73
def score(self, x, y, lon_lat, sample_weight=None): """ Overloading default regression score method """ return r2_score(y_pred=self.predict(x, lon_lat), y_true=y, sample_weight=sample_weight)
def score(self, X, y, *args, **kwargs): """ This score is used by Scikilearn GridSearchCV/RandomisedSearchCV by all models that inherit TransformMixin. This is the score as seen by the ML model in the transformed target values. The final cross-val score in the original coordinates can be obtained from uncoverml.validate. Parameters ---------- X : array-like, shape = (n_samples, n_features) Test samples. y : array-like, shape = (n_samples) or (n_samples, n_outputs) True values for X. sample_weight : array-like, shape = [n_samples], optional Sample weights. Returns ------- score : float R^2 of self.predict(X) wrt. y. Returns ------- score : float R^2 of self._notransform_predict(X) wrt. y. """ y_t = self.target_transform.transform(y) if hasattr(self, 'ml_score') and self.ml_score: log.info('Using custom score') return r2_score(y_true=y_t, y_pred=self._notransform_predict( X, *args, **kwargs)) else: return super().score(X, y, *args, **kwargs)
def rsquared(s, o): """ coefficient of determination (r-squared) using python sklern module input: s: simulated o: observed output: r2: coefficient of determination """ s, o = filter_nan(s, o) return r2_score(o, s)
def run_qq(job, context, name, compare_id): """ some measure of qq consistency """ if not have_sklearn: return "sklearn_not_installed" work_dir = job.fileStore.getLocalTempDir() compare_file = os.path.join(work_dir, '{}.compare.positions'.format(name)) job.fileStore.readGlobalFile(compare_id, compare_file) try: data = np.loadtxt(compare_file, dtype=np.int, delimiter =', ', usecols=(1,2)) # this can surley be sped up if necessary correct = Counter() total = Counter() for row in data: correct[row[1]] += row[0] total[row[1]] += 1 qual_scores = [] qual_observed = [] for qual, cor in correct.items(): qual_scores.append(qual) p_err = max(1. - float(cor) / float(total[qual]), sys.float_info.epsilon) observed_score =-10. * math.log10(p_err) qual_observed.append(observed_score) # should do non-linear regression as well? r2 = r2_score(qual_observed, qual_scores) except: # will happen if file is empty r2 = 'fail' return r2
def evaluate(X_train, X_test, y_train, y_test, pipeline): y_train_pred = pipeline.predict(X_train) y_test_pred = pipeline.predict(X_test) # Mean squared error for the hell of it mse_train = mean_squared_error(y_train, y_train_pred) mse_test = mean_squared_error(y_test, y_test_pred) print('MSE train {:.3}, validation {:.3}'.format(mse_train, mse_test)) # Coefficient of determination r2_train = r2_score(y_train, y_train_pred) r2_test = r2_score(y_test, y_test_pred) print('R^2 train {:.3}, validation {:.3}'.format(r2_train, r2_test))
def compute_score(self, conf, hy): conf['_r2'] = r2_score(self.test_y, hy) conf['_spearmanr'] = spearmanr(self.test_y, hy)[0] conf['_pearsonr'] = pearsonr(self.test_y, hy)[0] conf['_score'] = conf['_' + self.score] # print(conf)
def r2(predicted, test): if len(predicted) == len(test): print("Predicted values and output test instances do not match.") return r2_score(test, predicted)
def main(): train,test = load_ml100k.get_train_test(random_state=12) predicted = predict(train) r2 = metrics.r2_score(test[test > 0], predicted[test > 0]) print('R2 stacked: {:.2%}'.format(r2))
def main(transpose_inputs=False): from load_ml100k import get_train_test train,test = get_train_test(random_state=12) if transpose_inputs: train = train.T test = test.T filled = predict(train) r2 = metrics.r2_score(test[test > 0], filled[test > 0]) print('R2 score ({} regression): {:.1%}'.format( ('movie' if transpose_inputs else 'user'), r2))
def main(transpose_inputs=False): train, test = get_train_test(random_state=12) if transpose_inputs: train = train.T test = test.T predicted = predict(train) r2 = metrics.r2_score(test[test > 0], predicted[test > 0]) print('R2 score (binary {} neighbours): {:.1%}'.format( ('movie' if transpose_inputs else 'user'), r2))
def main(transpose_inputs=False): from load_ml100k import get_train_test from sklearn import metrics train,test = get_train_test(random_state=12) if transpose_inputs: train = train.T test = test.T predicted = predict(train) r2 = metrics.r2_score(test[test > 0], predicted[test > 0]) print('R2 score ({} normalization): {:.1%}'.format( ('movie' if transpose_inputs else 'user'), r2))
def _print_r2_score(y, predicted_y): print 'R-Square: ', r2_score(y, predicted_y) print '-----------------------'
def xgb_r2_score(preds, dtrain): labels = dtrain.get_label() return 'r2', r2_score(labels, preds) # form DMatrices for Xgboost training
def score_gbt(X, y, params): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) model = GradientBoostingRegressor(**params) model.fit(X_train, y_train) y_pred = model.predict(X_test) # Chocolate minimizes the loss return -r2_score(y_test, y_pred)
def continuous_score(x, y): return r2_score(x, y, multioutput='uniform_average')
def Conf_Measure(RegModel, Train_Data, True_Labels, ModelType): Predictions = RegModel.predict(Train_Data) tau, p_value = stats.kendalltau(True_Labels, Predictions) R2_Measure = r2_score(True_Labels, Predictions) print('The Kindell Coefficient of ', ModelType, ' model is ', tau,' with a p-value of ',p_value) print('The R Square of ', ModelType, ' model is ', R2_Measure) print('') return(tau, p_value, R2_Measure)
def score(self, x, y=None, multioutput="uniform_average"): if y is not None: xdot = y else: xdot = self.derivative.transform(x) return r2_score(self.model.predict(x), xdot, multioutput=multioutput)
def estimate_accuracy(yEv, yEv_calc, disp = False): """ It was originally located in jchem. However now it is allocated here since the functionality is more inline with jutil than jchem. """ r_sqr = metrics.r2_score( yEv, yEv_calc) RMSE = np.sqrt( metrics.mean_squared_error( yEv, yEv_calc)) MAE = metrics.mean_absolute_error( yEv, yEv_calc) DAE = metrics.median_absolute_error( yEv, yEv_calc) if disp: print("r^2={0:.2e}, RMSE={1:.2e}, MAE={2:.2e}, DAE={3:.2e}".format( r_sqr, RMSE, MAE, DAE)) return r_sqr, RMSE, MAE, DAE
def cv_LinearRegression( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True) kf5 = kf5_c.split( xM) cv_score_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l
def cv_LinearRegression_ci_pred( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True) kf5 = kf5_c.split( xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. coef = np.array(clf.coef_).tolist() intercept = np.array(clf.intercept_).tolist() ci_l.append( (clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l, ci_l, yVp.A1.tolist()