我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用sklearn.metrics.mean_absolute_error()。
def calculate_regression_metrics(trained_sklearn_estimator, x_test, y_test): """ Given a trained estimator, calculate metrics. Args: trained_sklearn_estimator (sklearn.base.BaseEstimator): a scikit-learn estimator that has been `.fit()` y_test (numpy.ndarray): A 1d numpy array of the y_test set (predictions) x_test (numpy.ndarray): A 2d numpy array of the x_test set (features) Returns: dict: A dictionary of metrics objects """ # Get predictions predictions = trained_sklearn_estimator.predict(x_test) # Calculate individual metrics mean_squared_error = skmetrics.mean_squared_error(y_test, predictions) mean_absolute_error = skmetrics.mean_absolute_error(y_test, predictions) result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error} return result
def _plot_old_pred_data(old_pred_data, show_pred_plot, save_pred_plot, show_clarke_plot, save_clarke_plot, id_str, algorithm_str, minutes_str): actual_bg_array = old_pred_data.result_actual_bg_array actual_bg_time_array = old_pred_data.result_actual_bg_time_array pred_array = old_pred_data.result_pred_array pred_time_array = old_pred_data.result_pred_time_array #Root mean squared error rms = math.sqrt(metrics.mean_squared_error(actual_bg_array, pred_array)) print " Root Mean Squared Error: " + str(rms) print " Mean Absolute Error: " + str(metrics.mean_absolute_error(actual_bg_array, pred_array)) print " R^2 Coefficient of Determination: " + str(metrics.r2_score(actual_bg_array, pred_array)) plot, zone = ClarkeErrorGrid.clarke_error_grid(actual_bg_array, pred_array, id_str + " " + algorithm_str + " " + minutes_str) print " Percent A:{}".format(float(zone[0]) / (zone[0] + zone[1] + zone[2] + zone[3] + zone[4])) print " Percent C, D, E:{}".format(float(zone[2] + zone[3] + zone[4])/ (zone[0] + zone[1] + zone[2] + zone[3] + zone[4])) print " Zones are A:{}, B:{}, C:{}, D:{}, E:{}\n".format(zone[0],zone[1],zone[2],zone[3],zone[4]) if save_clarke_plot: plt.savefig(id_str + algorithm_str.replace(" ", "") + minutes_str + "clarke.png") if show_clarke_plot: plot.show() plt.clf() plt.plot(actual_bg_time_array, actual_bg_array, label="Actual BG", color='black', linestyle='-') plt.plot(pred_time_array, pred_array, label="BG Prediction", color='black', linestyle=':') plt.title(id_str + " " + algorithm_str + " " + minutes_str + " BG Analysis") plt.ylabel("Blood Glucose Level (mg/dl)") plt.xlabel("Time (minutes)") plt.legend(loc='upper left') # SHOW/SAVE PLOT DEPENDING ON THE BOOLEAN PARAMETER if save_pred_plot: plt.savefig(id_str + algorithm_str.replace(" ","") + minutes_str + "plot.png") if show_pred_plot: plt.show() #Function to analyze the old OpenAPS data
def score_regression(y, y_hat, report=True): """ Create regression score :param y: :param y_hat: :return: """ r2 = r2_score(y, y_hat) rmse = sqrt(mean_squared_error(y, y_hat)) mae = mean_absolute_error(y, y_hat) report_string = "---Regression Score--- \n" report_string += "R2 = " + str(r2) + "\n" report_string += "RMSE = " + str(rmse) + "\n" report_string += "MAE = " + str(mae) + "\n" if report: print(report_string) return mae, report_string
def gs_Ridge(xM, yV, alphas_log=(1, -1, 9), n_folds=5, n_jobs=-1, scoring='r2'): """ Parameters ------------- scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2 """ print('If scoring is not r2 but error metric, output score is revered for scoring!') print(xM.shape, yV.shape) clf = linear_model.Ridge() #parmas = {'alpha': np.logspace(1, -1, 9)} parmas = {'alpha': np.logspace(*alphas_log)} kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True) kf_n = kf_n_c.split(xM) gs = model_selection.GridSearchCV( clf, parmas, scoring=scoring, cv=kf_n, n_jobs=n_jobs) gs.fit(xM, yV) return gs
def gs_Ridge( xM, yV, alphas_log = (1, -1, 9), n_folds = 5, n_jobs = -1, scoring = 'r2'): """ Parameters ------------- scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2 """ print(xM.shape, yV.shape) clf = linear_model.Ridge() #parmas = {'alpha': np.logspace(1, -1, 9)} parmas = {'alpha': np.logspace( *alphas_log)} kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) gs = grid_search.GridSearchCV( clf, parmas, scoring = scoring, cv = kf_n, n_jobs = n_jobs) gs.fit( xM, yV) return gs
def test_multioutput_regression(): y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]]) y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]]) error = mean_squared_error(y_true, y_pred) assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.) # mean_absolute_error and mean_squared_error are equal because # it is a binary problem. error = mean_absolute_error(y_true, y_pred) assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.) error = r2_score(y_true, y_pred, multioutput='variance_weighted') assert_almost_equal(error, 1. - 5. / 2) error = r2_score(y_true, y_pred, multioutput='uniform_average') assert_almost_equal(error, -.875)
def a_score_(solution, prediction): mad = float(mvmean(abs(solution-mvmean(solution)))) return 1 - metrics.mean_absolute_error(solution, prediction)/mad
def train_and_eval_sklearn_regressor( clf, data ): x_train = data['x_train'] y_train = data['y_train'] x_test = data['x_test'] y_test = data['y_test'] clf.fit( x_train, y_train ) p = clf.predict( x_train ) mse = MSE( y_train, p ) rmse = sqrt( mse ) mae = MAE( y_train, p ) print "\n# training | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae ) # p = clf.predict( x_test ) mse = MSE( y_test, p ) rmse = sqrt( mse ) mae = MAE( y_test, p ) print "# testing | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae ) return { 'loss': rmse, 'rmse': rmse, 'mae': mae }
def base_model(): model = Sequential() model.add(Dense(32, input_dim=7, init='normal', activation='relu')) model.add(Dense(64, init='normal', activation='relu')) model.add(Dense(128, init='normal', activation='relu')) model.add(Dense(32, init='normal', activation='relu')) model.add(Dense(6, init='normal')) model.compile(loss='mean_absolute_error', optimizer = 'adam') return model
def print_metrics_regression(y_true, predictions, verbose=1): predictions = np.array(predictions) predictions = np.maximum(predictions, 0).flatten() y_true = np.array(y_true) y_true_bins = [get_bin_custom(x, CustomBins.nbins) for x in y_true] prediction_bins = [get_bin_custom(x, CustomBins.nbins) for x in predictions] cf = metrics.confusion_matrix(y_true_bins, prediction_bins) if verbose: print "Custom bins confusion matrix:" print cf kappa = metrics.cohen_kappa_score(y_true_bins, prediction_bins, weights='linear') mad = metrics.mean_absolute_error(y_true, predictions) mse = metrics.mean_squared_error(y_true, predictions) mape = mean_absolute_percentage_error(y_true, predictions) if verbose: print "Mean absolute deviation (MAD) =", mad print "Mean squared error (MSE) =", mse print "Mean absolute percentage error (MAPE) =", mape print "Cohen kappa score =", kappa return {"mad": mad, "mse": mse, "mape": mape, "kappa": kappa}
def evaluate_waste(self, yhat, y): """ Given predicted yhat, evaluate it against observed y, using the loss function. Args: yhat, y (array(float)): The predicted, respectively observed values. Returns: loss: Evaluated loss as a float. """ if (isinstance(yhat, pd.DataFrame) and isinstance(y, pd.DataFrame)): #print(yhat.shape) #yhat.to_csv("yhat.csv") #y.to_csv("y.csv") yhat, y = self.extract_vectors(yhat, y) #print(len(yhat)) if self.loss_waste == "L2": evaluated_loss = skm.mean_squared_error(y,yhat) #evaluated_loss =(1.0/len(yhat))*np.linalg.norm(yhat - y, ord = 2) elif self.loss_waste == "L1": #evaluated_loss = (1.0/len(yhat))*np.linalg.norm(np.asarray(yhat)-np.asarray(y), ord=1) evaluated_loss = skm.mean_absolute_error(y,yhat) else: evaluated_loss = skm.mean_squared_error(y,yhat) #evaluated_loss = (1.0/len(yhat))*np.linalg.norm(np.asarray(yhat)-np.asarray(y), ord=2) #L2 return(evaluated_loss)
def getScores(labels_true, labels_pred): str2 = "Average Precision: "+ str(precision_score(labels_true, labels_pred, average='weighted'))+'\n' str2 += "Average Recall: "+ str( recall_score(labels_true, labels_pred, average='weighted'))+'\n' str2 += "Average F1-measure: "+ str( f1_score(labels_true, labels_pred, average='weighted'))+'\n' str2 += "Accuracy score: "+ str( accuracy_score(labels_true, labels_pred))+'\n' str2 += "Mean absolute error (sklearn) on the test set is:"+ str( mean_absolute_error(labels_true, labels_pred))+'\n' str2 += "Average Mean absolute error, and per class (official): "+ str(mae(labels_true, labels_pred))+'\n' str2 += "Average Mean absolute error (official): " + str(mae(labels_true, labels_pred)[1])+'\n' print(str2) return str2
def MAE(gold, pred): assert gold.shape == pred.shape return mean_absolute_error(gold, pred)
def forecast_one(model, train, valid, test, train_scale, valid_scale, test_scale): # Make 1-step forecasts trained = model.predict(train[0]) validated = model.predict(valid[0]) predicted = model.predict(test[0]) trained = np.array(trained).flatten() validated = np.array(validated).flatten() predicted = np.array(predicted).flatten() # rescale forecasts and target data (scale[0] is mean, scale[1] is std_dev) trained = trained * train_scale[1] + train_scale[0] validated = validated * valid_scale[1] + valid_scale[0] predicted = predicted * test_scale[1] + test_scale[0] trainY = train[1].flatten() * train_scale[1] + train_scale[0] validY = valid[1].flatten() * valid_scale[1] + valid_scale[0] testY = test[1].flatten() * test_scale[1] + test_scale[0] # calculate errors mse1 = mean_absolute_error(trainY, trained) mse2 = mean_absolute_error(validY, validated) mse3 = mean_absolute_error(testY, predicted) print("Mean Absolue Error (MAE) train: %f" % mse1) print("Mean Absolue Error (MAE) valid: %f" % mse2) print("Mean Absolue Error (MAE) test: %f" % mse3) return predicted, testY, (mse1, mse2, mse3)
def regression(filename): from sklearn.cross_validation import train_test_split print(filename) X,y = loadDataSet(filename) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) from sklearn.linear_model import LinearRegression from sklearn import metrics linreg = LinearRegression() linreg.fit(X_train, y_train) # print(linreg.intercept_, linreg.coef_) # pair the feature names with the coefficients feature_cols = ['????', '????', '??????','?????','??????','???????','???????','?????????','??????'] #print(feature_cols, linreg.coef_) #zip(feature_cols, linreg.coef_) y_pred = linreg.predict(X_test) print("MAE:",metrics.mean_absolute_error(y_test, y_pred)) print("MSE:",metrics.mean_squared_error(y_test, y_pred)) print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_pred))) scores = cross_val_score(linreg, X, y,cv=5) # print(filename) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) res = pd.DataFrame(linreg.coef_,columns=feature_cols,index=[filename]) return (res) #files = ['?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx']
def regression(filename): from sklearn.linear_model import LinearRegression from sklearn import metrics X,y = loadDataSet(filename) print(filename,X.shape) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, test_size=0.25) linreg = LinearRegression() linreg.fit(X_train, y_train) # print(linreg.intercept_, linreg.coef_) # pair the feature names with the coefficients feature_cols = ['????', '????', '??????','?????','??????','???????','???????','?????????','??????'] # feature_cols = ['????', '??????','?????','??????','???????','???????','?????????','??????'] #print(feature_cols, linreg.coef_) #zip(feature_cols, linreg.coef_) y_pred = linreg.predict(X_test) print("MAE:",metrics.mean_absolute_error(y_test, y_pred)) print("MSE:",metrics.mean_squared_error(y_test, y_pred)) print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_pred))) scores = cross_val_score(linreg, X, y,cv=3) print('scores:',scores) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) res = pd.DataFrame(linreg.coef_.T[:len(feature_cols)].T,columns=feature_cols,index=[filename.split('.')[0]]) # res = pd.DataFrame(linreg.coef_,index=[filename.split('.')[0]]) return (res) #files = ['201603.xlsx','201604.xlsx','201605.xlsx','?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx'] #files = ['?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx','201703_06.xlsx'] #files = ['201703_06.xlsx']
def test_optimizer(): opt = Optimizer([model, model_2], scorer=mean_absolute_error) output = opt.minimize('SLSQP') assert output.shape[0] == 2 assert_almost_equal(output.sum(), 1.0, decimal=5)
def test_report_score(): report_score(np.array([1, 2, 3]), mean_absolute_error) report_score(np.array([1, 2, 3]), None)
def test_apply(): output = pipeline.apply(lambda x: np.mean(x, axis=0)).execute() assert output.shape[0] == dataset.X_test.shape[0] output = pipeline.apply(lambda x: np.mean(x, axis=0)).validate(scorer=mean_absolute_error, k=10) assert len(output) == 10
def error_rate(self, folds): holdout = 1 / float(folds) errors = [] for fold in range(folds): y_hat, y_true = self.__validation_data(holdout) # TODO: Take a look at sklearn.metrics to see if any other metric you might want to look at errors.append(mean_absolute_error(y_true, y_hat)) return errors
def get_mae(pred, actual): # only compute on non-zero terms pred = pred[actual.nonzero()].flatten() actual = actual[actual.nonzero()].flatten() return mean_absolute_error(pred, actual)
def eval_mae(preds, dtrain): labels = dtrain.get_label() return 'mae', MAE(np.exp(labels), np.exp(preds))
def eval_mae(preds, dataset): labels = dataset.get_label() return 'mae', MAE(np.exp(labels), np.exp(preds)), False
def mae_loss_func(weights): ''' scipy minimize will pass the weights as a numpy array ''' final_prediction = 0 for weight, prediction in zip(weights, predictions): final_prediction += prediction * weight return mean_absolute_error(actual.loss, final_prediction)
def xg_eval_mae(yhat, dtrain): y = dtrain.get_label() return 'mae', mean_absolute_error(np.exp(y)-shift, np.exp(yhat)-shift)
def mean_absolute_error(self): return mean_absolute_error(y_true=self.y, y_pred=self.predict(self.X))
def estimate_accuracy(yEv, yEv_calc, disp = False): """ It was originally located in jchem. However now it is allocated here since the functionality is more inline with jutil than jchem. """ r_sqr = metrics.r2_score( yEv, yEv_calc) RMSE = np.sqrt( metrics.mean_squared_error( yEv, yEv_calc)) MAE = metrics.mean_absolute_error( yEv, yEv_calc) DAE = metrics.median_absolute_error( yEv, yEv_calc) if disp: print("r^2={0:.2e}, RMSE={1:.2e}, MAE={2:.2e}, DAE={3:.2e}".format( r_sqr, RMSE, MAE, DAE)) return r_sqr, RMSE, MAE, DAE
def cv_LinearRegression( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True) kf5 = kf5_c.split( xM) cv_score_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l
def cv_LinearRegression_ci_pred( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True) kf5 = kf5_c.split( xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. coef = np.array(clf.coef_).tolist() intercept = np.array(clf.intercept_).tolist() ci_l.append( (clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred_full_Ridge( xM, yV, alpha, n_splits = 5, shuffle=True, disp = False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) # print( 'alpha of Ridge is', alpha) clf = linear_model.Ridge( alpha) kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle) kf5 = kf5_c.split( xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) y_a = np.array( yV[test])[:,0] yp_a = np.array( yVp_test)[:,0] cv_score_l.extend( np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def estimate_accuracy4(yEv, yEv_calc, disp = False): """ It was originally located in jchem. However now it is allocated here since the functionality is more inline with jutil than jchem. """ r_sqr = metrics.r2_score( yEv, yEv_calc) RMSE = np.sqrt( metrics.mean_squared_error( yEv, yEv_calc)) MAE = metrics.mean_absolute_error( yEv, yEv_calc) DAE = metrics.median_absolute_error( yEv, yEv_calc) if disp: print("r^2={0:.2e}, RMSE={1:.2e}, MAE={2:.2e}, DAE={3:.2e}".format( r_sqr, RMSE, MAE, DAE)) return r_sqr, RMSE, MAE, DAE
def cv_LinearRegression_ci( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True) kf5 = kf5_c.split( xM) cv_score_l = list() ci_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l, ci_l
def cv_LinearRegression_ci_pred_full( xM, yV, n_splits = 5, shuffle=True, disp = False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle) kf5 = kf5_c.split( xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) y_a = np.array( yV[test])[:,0] yp_a = np.array( yVp_test)[:,0] cv_score_l.extend( np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def estimate_accuracy4(yEv, yEv_calc, disp = False): r_sqr = metrics.r2_score( yEv, yEv_calc) RMSE = np.sqrt( metrics.mean_squared_error( yEv, yEv_calc)) MAE = metrics.mean_absolute_error( yEv, yEv_calc) DAE = metrics.median_absolute_error( yEv, yEv_calc) if disp: print("r^2={0:.2e}, RMSE={1:.2e}, MAE={2:.2e}, DAE={3:.2e}".format( r_sqr, RMSE, MAE, DAE)) return r_sqr, RMSE, MAE, DAE
def cv_LinearRegression_ci(xM, yV, n_folds=5, scoring='median_absolute_error', disp=False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=True) kf5 = kf5_c.split(xM) cv_score_l = list() ci_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a # vector clf.fit(xM[train, :], yV[train]) yVp_test = clf.predict(xM[test, :]) # Additionally, coef_ and intercept_ are stored. ci_l.append((clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError("{} scoring is not supported.".format(scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format(scoring), np.mean(cv_score_l), np.std(cv_score_l)) return cv_score_l, ci_l
def cv_LinearRegression_ci_pred_full_Ridge(xM, yV, alpha, n_folds=5, shuffle=True, disp=False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) # print( 'alpha of Ridge is', alpha) clf = linear_model.Ridge(alpha) kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=shuffle) kf5 = kf5_c.split(xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a # vector clf.fit(xM[train, :], yV[train]) yVp_test = clf.predict(xM[test, :]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append((clf.coef_, clf.intercept_)) y_a = np.array(yV[test])[:, 0] yp_a = np.array(yVp_test)[:, 0] cv_score_l.extend(np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred_full(xM, yV, n_folds=5, shuffle=True, disp=False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=shuffle) kf5 = kf5_c.split(xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a # vector clf.fit(xM[train, :], yV[train]) yVp_test = clf.predict(xM[test, :]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append((clf.coef_, clf.intercept_)) y_a = np.array(yV[test])[:, 0] yp_a = np.array(yVp_test)[:, 0] cv_score_l.extend(np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def make_scoring(scoring): """ Score is reversed if greater_is_better is False. """ if scoring == 'r2': return metrics.make_scorer(metrics.r2_score) elif scoring == 'mean_absolute_error': return metrics.make_scorer(metrics.mean_absolute_error, greater_is_better=False) elif scoring == 'mean_squared_error': return metrics.make_scorer(metrics.mean_squared_error, greater_is_better=False) elif scoring == 'median_absolute_error': return metrics.make_scorer(metrics.median_absolute_error, greater_is_better=False) else: raise ValueError("Not supported scoring")
def cv_LinearRegression( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) cv_score_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l
def cv_LinearRegression_ci_pred( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. coef = np.array(clf.coef_).tolist() intercept = np.array(clf.intercept_).tolist() ci_l.append( (clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred_full_Ridge( xM, yV, alpha, n_folds = 5, shuffle=True, disp = False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) # print( 'alpha of Ridge is', alpha) clf = linear_model.Ridge( alpha) kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) y_a = np.array( yV[test])[:,0] yp_a = np.array( yVp_test)[:,0] cv_score_l.extend( np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred_full( xM, yV, n_folds = 5, shuffle=True, disp = False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) y_a = np.array( yV[test])[:,0] yp_a = np.array( yVp_test)[:,0] cv_score_l.extend( np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) cv_score_l = list() ci_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l, ci_l