我们从Python开源项目中,提取了以下30个代码示例,用于说明如何使用sklearn.metrics.explained_variance_score()。
def test_cross_val_score_with_score_func_regression(): X, y = make_regression(n_samples=30, n_features=20, n_informative=5, random_state=0) reg = Ridge() # Default score of the Ridge regression estimator scores = cross_val_score(reg, X, y, cv=5) assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # R2 score (aka. determination coefficient) - should be the # same as the default estimator score r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5) assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # Mean squared error; this is a loss function, so "scores" are negative mse_scores = cross_val_score(reg, X, y, cv=5, scoring="mean_squared_error") expected_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99]) assert_array_almost_equal(mse_scores, expected_mse, 2) # Explained variance scoring = make_scorer(explained_variance_score) ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring) assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
def test_cross_val_score_with_score_func_regression(): X, y = make_regression(n_samples=30, n_features=20, n_informative=5, random_state=0) reg = Ridge() # Default score of the Ridge regression estimator scores = cval.cross_val_score(reg, X, y, cv=5) assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # R2 score (aka. determination coefficient) - should be the # same as the default estimator score r2_scores = cval.cross_val_score(reg, X, y, scoring="r2", cv=5) assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) # Mean squared error; this is a loss function, so "scores" are negative mse_scores = cval.cross_val_score(reg, X, y, cv=5, scoring="mean_squared_error") expected_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99]) assert_array_almost_equal(mse_scores, expected_mse, 2) # Explained variance scoring = make_scorer(explained_variance_score) ev_scores = cval.cross_val_score(reg, X, y, cv=5, scoring=scoring) assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
def report_metrics(yhat, y): # report metrics of training set r2 = r2_score(y, yhat) var_exp = explained_variance_score(y, yhat) r = stats.pearsonr(yhat, y)[0] logger.info('Model metrics for training set: r2={:.2f}, Variance explained={:.2f}, Pearson\'r={:.2f}'.format(r2, var_exp, r))
def cv_LinearRegression( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True) kf5 = kf5_c.split( xM) cv_score_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l
def cv_LinearRegression_ci_pred( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True) kf5 = kf5_c.split( xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. coef = np.array(clf.coef_).tolist() intercept = np.array(clf.intercept_).tolist() ci_l.append( (clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred_full_Ridge( xM, yV, alpha, n_splits = 5, shuffle=True, disp = False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) # print( 'alpha of Ridge is', alpha) clf = linear_model.Ridge( alpha) kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle) kf5 = kf5_c.split( xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) y_a = np.array( yV[test])[:,0] yp_a = np.array( yVp_test)[:,0] cv_score_l.extend( np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred_full( xM, yV, n_splits = 5, shuffle=True, disp = False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle) kf5 = kf5_c.split( xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) y_a = np.array( yV[test])[:,0] yp_a = np.array( yVp_test)[:,0] cv_score_l.extend( np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True) kf5 = kf5_c.split( xM) cv_score_l = list() ci_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l, ci_l
def cv_LinearRegression_ci_pred_full_Ridge(xM, yV, alpha, n_folds=5, shuffle=True, disp=False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) # print( 'alpha of Ridge is', alpha) clf = linear_model.Ridge(alpha) kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=shuffle) kf5 = kf5_c.split(xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a # vector clf.fit(xM[train, :], yV[train]) yVp_test = clf.predict(xM[test, :]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append((clf.coef_, clf.intercept_)) y_a = np.array(yV[test])[:, 0] yp_a = np.array(yVp_test)[:, 0] cv_score_l.extend(np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred_full(xM, yV, n_folds=5, shuffle=True, disp=False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=shuffle) kf5 = kf5_c.split(xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a # vector clf.fit(xM[train, :], yV[train]) yVp_test = clf.predict(xM[test, :]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append((clf.coef_, clf.intercept_)) y_a = np.array(yV[test])[:, 0] yp_a = np.array(yVp_test)[:, 0] cv_score_l.extend(np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) cv_score_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l
def cv_LinearRegression_ci( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) cv_score_l = list() ci_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l, ci_l
def cv_LinearRegression_ci_pred_full_Ridge( xM, yV, alpha, n_folds = 5, shuffle=True, disp = False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) # print( 'alpha of Ridge is', alpha) clf = linear_model.Ridge( alpha) kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) y_a = np.array( yV[test])[:,0] yp_a = np.array( yVp_test)[:,0] cv_score_l.extend( np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred_full( xM, yV, n_folds = 5, shuffle=True, disp = False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) y_a = np.array( yV[test])[:,0] yp_a = np.array( yVp_test)[:,0] cv_score_l.extend( np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. coef = np.array(clf.coef_).tolist() intercept = np.array(clf.intercept_).tolist() ci_l.append( (clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l, ci_l, yVp.A1.tolist()
def eval_metrics_on(predictions, labels, regression = True): ''' assuming this is a regression task; labels are continuous-valued floats returns most regression-related scores for the given predictions/targets as a dictionary: r2, mean_abs_error, mse, rmse, median_absolute_error, explained_variance_score ''' if len(labels[0])==2: #labels is list of data/labels pairs labels = np.concatenate([l[1] for l in labels]) if regression: predictions = predictions[:,0] r2 = metrics.r2_score(labels, predictions) mean_abs_error = np.abs(predictions - labels).mean() mse = ((predictions - labels)**2).mean() rmse = np.sqrt(mse) median_absolute_error = metrics.median_absolute_error(labels, predictions) # robust to outliers explained_variance_score = metrics.explained_variance_score(labels, predictions) # best score = 1, lower is worse return {'r2':r2, 'mean_abs_error':mean_abs_error, 'mse':mse, 'rmse':rmse, 'median_absolute_error':median_absolute_error, 'explained_variance_score':explained_variance_score, 'main_metric':rmse} else: predictions = predictions[:,1] if labels.max()==1: auc = metrics.auc(predictions, labels[:,1], reorder=1) accuracy = np.mean((predictions>0.5)==labels[:,1]) return {'auc':auc, 'accuracy':accuracy, 'main_metric':accuracy}
def main(fx, scale): logdir = '../data/fx/ann/tensorboard_models/%s%s%s' % ( scale, fx, time.strftime(time_format, time.localtime())) # Load dataset path_f_final = ['%s/%s_%s_f.npy' % (FILE_PREX, fx, scale), '%s/%s_%s_t.pkl.npy' % (FILE_PREX, fx, scale)] path_f_in = '%s/%s_H.pkl' % (FILE_PREX, fx) pd_data = pd.read_pickle(path_f_in)['close'] fx_max = max(pd_data) fx_min = min(pd_data) data = np.load(path_f_final[0]) data_s = np.load(path_f_final[1]) data_train = data[:data.shape[0] - num_test] data_test = data[data.shape[0] - num_test:] data_s_train = data_s[:data.shape[0] - num_test] data_s_test = data_s[data.shape[0] - num_test:] regressor = learn.TensorFlowEstimator( model_fn=my_model, n_classes=0, optimizer='SGD', batch_size=len(data_train), steps=20000, learning_rate=0.2) # Fit regressor.fit(data_train, data_s_train, logdir=logdir) # Predict and score prediction = regressor.predict(data_test) data = {'close_price': [i * (fx_max - fx_min) + fx_min for i in data_s_test], 'predict': [i * (fx_max - fx_min) + fx_min for i in prediction]} frame = pd.DataFrame(data) frame.to_pickle('%s/%sprediction.pkl' % (logdir, fx)) score1 = metrics.explained_variance_score( data_s_test, prediction) score2 = metrics.mean_absolute_error( data_s_test, prediction) print(score1, score2) return score1, score2
def score(): methods = ['cro_cnn', 'cro_knn', 'cro_svm', 'mon_ann', 'mon_knn', 'mon_svm', 'day_ann', 'day_knn', 'day_svm'] result_tmp1 = np.empty(0) result_tmp2 = np.empty(0) for fx in FX_LIST: data = pd.read_pickle('%s/summary_%s.pkl' % (PREX, fx)) for method in methods: score1 = metrics.mean_squared_error(data['real'], data[method]) result_tmp1 = np.append(result_tmp1, score1) score2 = metrics.explained_variance_score( data['real'], data[method]) result_tmp2 = np.append(result_tmp2, score2) result1 = pd.DataFrame(result_tmp1.reshape(-1, len(methods)), index=FX_LIST, columns=methods) result2 = pd.DataFrame(result_tmp2.reshape(-1, len(methods)), index=FX_LIST, columns=methods) result1.to_pickle('%s/summary_mse.pkl' % PREX) result2.to_pickle('%s/summary_evs.pkl' % PREX) return result1, result2
def test_regression_metrics(n_samples=50): y_true = np.arange(n_samples) y_pred = y_true + 1 assert_almost_equal(mean_squared_error(y_true, y_pred), 1.) assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.) assert_almost_equal(median_absolute_error(y_true, y_pred), 1.) assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2) assert_almost_equal(explained_variance_score(y_true, y_pred), 1.)
def test_regression_metrics_at_limits(): assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2) assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2) assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2) assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2) assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
def cv_LinearRegression(xM, yV, n_folds=5, scoring='median_absolute_error', disp=False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=True) kf5 = kf5_c.split(xM) cv_score_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a # vector clf.fit(xM[train, :], yV[train]) yVp_test = clf.predict(xM[test, :]) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError("{} scoring is not supported.".format(scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format(scoring), np.mean(cv_score_l), np.std(cv_score_l)) return cv_score_l
def cv_LinearRegression_ci_pred(xM, yV, n_folds=5, scoring='median_absolute_error', disp=False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=True) kf5 = kf5_c.split(xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a # vector clf.fit(xM[train, :], yV[train]) yVp_test = clf.predict(xM[test, :]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. coef = np.array(clf.coef_).tolist() intercept = np.array(clf.intercept_).tolist() ci_l.append((clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError("{} scoring is not supported.".format(scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format(scoring), np.mean(cv_score_l), np.std(cv_score_l)) return cv_score_l, ci_l, yVp.A1.tolist()
def test_regression_multioutput_array(): y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]] y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]] mse = mean_squared_error(y_true, y_pred, multioutput='raw_values') mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values') r = r2_score(y_true, y_pred, multioutput='raw_values') evs = explained_variance_score(y_true, y_pred, multioutput='raw_values') assert_array_almost_equal(mse, [0.125, 0.5625], decimal=2) assert_array_almost_equal(mae, [0.25, 0.625], decimal=2) assert_array_almost_equal(r, [0.95, 0.93], decimal=2) assert_array_almost_equal(evs, [0.95, 0.93], decimal=2) # mean_absolute_error and mean_squared_error are equal because # it is a binary problem. y_true = [[0, 0]]*4 y_pred = [[1, 1]]*4 mse = mean_squared_error(y_true, y_pred, multioutput='raw_values') mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values') r = r2_score(y_true, y_pred, multioutput='raw_values') assert_array_almost_equal(mse, [1., 1.], decimal=2) assert_array_almost_equal(mae, [1., 1.], decimal=2) assert_array_almost_equal(r, [0., 0.], decimal=2) r = r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values') assert_array_almost_equal(r, [0, -3.5], decimal=2) assert_equal(np.mean(r), r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='uniform_average')) evs = explained_variance_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values') assert_array_almost_equal(evs, [0, -1.25], decimal=2) # Checking for the condition in which both numerator and denominator is # zero. y_true = [[1, 3], [-1, 2]] y_pred = [[1, 4], [-1, 1]] r2 = r2_score(y_true, y_pred, multioutput='raw_values') assert_array_almost_equal(r2, [1., -3.], decimal=2) assert_equal(np.mean(r2), r2_score(y_true, y_pred, multioutput='uniform_average')) evs = explained_variance_score(y_true, y_pred, multioutput='raw_values') assert_array_almost_equal(evs, [1., -3.], decimal=2) assert_equal(np.mean(evs), explained_variance_score(y_true, y_pred))