我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用sklearn.metrics.median_absolute_error()。
def cv_LinearRegression_It( xM, yV, n_splits = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False): """ N_it times iteration is performed for cross_validation in order to make further average effect. The flag of 'disp' is truned off so each iteration will not shown. """ cv_score_le = list() for ni in range( N_it): cv_score_l = cv_LinearRegression( xM, yV, n_splits = n_splits, scoring = scoring, disp = disp) cv_score_le.extend( cv_score_l) o_d = {'mean': np.mean( cv_score_le), 'std': np.std( cv_score_le), 'list': cv_score_le} if disp or ldisp: print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std'])) return o_d
def cv_LinearRegression_ci_It( xM, yV, n_splits = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False): """ N_it times iteration is performed for cross_validation in order to make further average effect. The flag of 'disp' is truned off so each iteration will not shown. """ cv_score_le = list() ci_le = list() for ni in range( N_it): cv_score_l, ci_l = cv_LinearRegression_ci( xM, yV, n_splits = n_splits, scoring = scoring, disp = disp) cv_score_le.extend( cv_score_l) ci_le.extend( ci_l) o_d = {'mean': np.mean( cv_score_le), 'std': np.std( cv_score_le), 'list': cv_score_le, 'ci': ci_le} if disp or ldisp: print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std'])) return o_d
def cv_LinearRegression_ci_pred_It( xM, yV, n_splits = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False): """ N_it times iteration is performed for cross_validation in order to make further average effect. The flag of 'disp' is truned off so each iteration will not shown. """ cv_score_le = list() ci_le = list() yVp_ltype_l = list() # yVp_ltype is list type of yVp not matrix type for ni in range( N_it): cv_score_l, ci_l, yVp_ltype = cv_LinearRegression_ci_pred( xM, yV, n_splits = n_splits, scoring = scoring, disp = disp) cv_score_le.extend( cv_score_l) ci_le.extend( ci_l) yVp_ltype_l.append( yVp_ltype) o_d = {'mean': np.mean( cv_score_le), 'std': np.std( cv_score_le), 'list': cv_score_le, 'ci': ci_le, 'yVp': yVp_ltype_l} if disp or ldisp: print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std'])) return o_d
def cv_LinearRegression_It(xM, yV, n_folds=5, scoring='median_absolute_error', N_it=10, disp=False, ldisp=False): """ N_it times iteration is performed for cross_validation in order to make further average effect. The flag of 'disp' is truned off so each iteration will not shown. """ cv_score_le = list() for ni in range(N_it): cv_score_l = cv_LinearRegression( xM, yV, n_folds=n_folds, scoring=scoring, disp=disp) cv_score_le.extend(cv_score_l) o_d = {'mean': np.mean(cv_score_le), 'std': np.std(cv_score_le), 'list': cv_score_le} if disp or ldisp: print('{0}: mean(+/-std) --> {1}(+/-{2})'.format(scoring, o_d['mean'], o_d['std'])) return o_d
def cv_LinearRegression_ci_It(xM, yV, n_folds=5, scoring='median_absolute_error', N_it=10, disp=False, ldisp=False): """ N_it times iteration is performed for cross_validation in order to make further average effect. The flag of 'disp' is truned off so each iteration will not shown. """ cv_score_le = list() ci_le = list() for ni in range(N_it): cv_score_l, ci_l = cv_LinearRegression_ci( xM, yV, n_folds=n_folds, scoring=scoring, disp=disp) cv_score_le.extend(cv_score_l) ci_le.extend(ci_l) o_d = {'mean': np.mean(cv_score_le), 'std': np.std(cv_score_le), 'list': cv_score_le, 'ci': ci_le} if disp or ldisp: print('{0}: mean(+/-std) --> {1}(+/-{2})'.format(scoring, o_d['mean'], o_d['std'])) return o_d
def gs_Ridge(xM, yV, alphas_log=(1, -1, 9), n_folds=5, n_jobs=-1, scoring='r2'): """ Parameters ------------- scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2 """ print('If scoring is not r2 but error metric, output score is revered for scoring!') print(xM.shape, yV.shape) clf = linear_model.Ridge() #parmas = {'alpha': np.logspace(1, -1, 9)} parmas = {'alpha': np.logspace(*alphas_log)} kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True) kf_n = kf_n_c.split(xM) gs = model_selection.GridSearchCV( clf, parmas, scoring=scoring, cv=kf_n, n_jobs=n_jobs) gs.fit(xM, yV) return gs
def cv_LinearRegression_It( xM, yV, n_folds = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False): """ N_it times iteration is performed for cross_validation in order to make further average effect. The flag of 'disp' is truned off so each iteration will not shown. """ cv_score_le = list() for ni in range( N_it): cv_score_l = cv_LinearRegression( xM, yV, n_folds = n_folds, scoring = scoring, disp = disp) cv_score_le.extend( cv_score_l) o_d = {'mean': np.mean( cv_score_le), 'std': np.std( cv_score_le), 'list': cv_score_le} if disp or ldisp: print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std'])) return o_d
def cv_LinearRegression_ci_It( xM, yV, n_folds = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False): """ N_it times iteration is performed for cross_validation in order to make further average effect. The flag of 'disp' is truned off so each iteration will not shown. """ cv_score_le = list() ci_le = list() for ni in range( N_it): cv_score_l, ci_l = cv_LinearRegression_ci( xM, yV, n_folds = n_folds, scoring = scoring, disp = disp) cv_score_le.extend( cv_score_l) ci_le.extend( ci_l) o_d = {'mean': np.mean( cv_score_le), 'std': np.std( cv_score_le), 'list': cv_score_le, 'ci': ci_le} if disp or ldisp: print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std'])) return o_d
def cv_LinearRegression_ci_pred_It( xM, yV, n_folds = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False): """ N_it times iteration is performed for cross_validation in order to make further average effect. The flag of 'disp' is truned off so each iteration will not shown. """ cv_score_le = list() ci_le = list() yVp_ltype_l = list() # yVp_ltype is list type of yVp not matrix type for ni in range( N_it): cv_score_l, ci_l, yVp_ltype = cv_LinearRegression_ci_pred( xM, yV, n_folds = n_folds, scoring = scoring, disp = disp) cv_score_le.extend( cv_score_l) ci_le.extend( ci_l) yVp_ltype_l.append( yVp_ltype) o_d = {'mean': np.mean( cv_score_le), 'std': np.std( cv_score_le), 'list': cv_score_le, 'ci': ci_le, 'yVp': yVp_ltype_l} if disp or ldisp: print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std'])) return o_d
def gs_Ridge( xM, yV, alphas_log = (1, -1, 9), n_folds = 5, n_jobs = -1, scoring = 'r2'): """ Parameters ------------- scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2 """ print(xM.shape, yV.shape) clf = linear_model.Ridge() #parmas = {'alpha': np.logspace(1, -1, 9)} parmas = {'alpha': np.logspace( *alphas_log)} kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) gs = grid_search.GridSearchCV( clf, parmas, scoring = scoring, cv = kf_n, n_jobs = n_jobs) gs.fit( xM, yV) return gs
def estimate_accuracy(yEv, yEv_calc, disp = False): """ It was originally located in jchem. However now it is allocated here since the functionality is more inline with jutil than jchem. """ r_sqr = metrics.r2_score( yEv, yEv_calc) RMSE = np.sqrt( metrics.mean_squared_error( yEv, yEv_calc)) MAE = metrics.mean_absolute_error( yEv, yEv_calc) DAE = metrics.median_absolute_error( yEv, yEv_calc) if disp: print("r^2={0:.2e}, RMSE={1:.2e}, MAE={2:.2e}, DAE={3:.2e}".format( r_sqr, RMSE, MAE, DAE)) return r_sqr, RMSE, MAE, DAE
def cv_LinearRegression( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True) kf5 = kf5_c.split( xM) cv_score_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l
def cv_LinearRegression_ci_pred( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True) kf5 = kf5_c.split( xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. coef = np.array(clf.coef_).tolist() intercept = np.array(clf.intercept_).tolist() ci_l.append( (clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred_full_Ridge( xM, yV, alpha, n_splits = 5, shuffle=True, disp = False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) # print( 'alpha of Ridge is', alpha) clf = linear_model.Ridge( alpha) kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle) kf5 = kf5_c.split( xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) y_a = np.array( yV[test])[:,0] yp_a = np.array( yVp_test)[:,0] cv_score_l.extend( np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def estimate_accuracy4(yEv, yEv_calc, disp = False): """ It was originally located in jchem. However now it is allocated here since the functionality is more inline with jutil than jchem. """ r_sqr = metrics.r2_score( yEv, yEv_calc) RMSE = np.sqrt( metrics.mean_squared_error( yEv, yEv_calc)) MAE = metrics.mean_absolute_error( yEv, yEv_calc) DAE = metrics.median_absolute_error( yEv, yEv_calc) if disp: print("r^2={0:.2e}, RMSE={1:.2e}, MAE={2:.2e}, DAE={3:.2e}".format( r_sqr, RMSE, MAE, DAE)) return r_sqr, RMSE, MAE, DAE
def eval_score( model, X_test, y_test, string = "Test", graph = False): print() print( "Evaluation of", string) print('--------') yP = model.predict(X_test) score_r2 = metrics.r2_score(y_test, yP) score_MedAE = metrics.median_absolute_error(y_test, yP) print('Accuracy') print('R2: {0:f}, MedAE: {1:f}'.format(score_r2, score_MedAE)) print() if graph: kutil.regress_show4( y_test, yP)
def cv_LinearRegression_ci( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True) kf5 = kf5_c.split( xM) cv_score_l = list() ci_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l, ci_l
def cv_LinearRegression_ci_pred_full( xM, yV, n_splits = 5, shuffle=True, disp = False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle) kf5 = kf5_c.split( xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) y_a = np.array( yV[test])[:,0] yp_a = np.array( yVp_test)[:,0] cv_score_l.extend( np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def estimate_accuracy4(yEv, yEv_calc, disp = False): r_sqr = metrics.r2_score( yEv, yEv_calc) RMSE = np.sqrt( metrics.mean_squared_error( yEv, yEv_calc)) MAE = metrics.mean_absolute_error( yEv, yEv_calc) DAE = metrics.median_absolute_error( yEv, yEv_calc) if disp: print("r^2={0:.2e}, RMSE={1:.2e}, MAE={2:.2e}, DAE={3:.2e}".format( r_sqr, RMSE, MAE, DAE)) return r_sqr, RMSE, MAE, DAE
def cv_LinearRegression_ci(xM, yV, n_folds=5, scoring='median_absolute_error', disp=False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=True) kf5 = kf5_c.split(xM) cv_score_l = list() ci_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a # vector clf.fit(xM[train, :], yV[train]) yVp_test = clf.predict(xM[test, :]) # Additionally, coef_ and intercept_ are stored. ci_l.append((clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError("{} scoring is not supported.".format(scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format(scoring), np.mean(cv_score_l), np.std(cv_score_l)) return cv_score_l, ci_l
def cv_LinearRegression_ci_pred_full_Ridge(xM, yV, alpha, n_folds=5, shuffle=True, disp=False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) # print( 'alpha of Ridge is', alpha) clf = linear_model.Ridge(alpha) kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=shuffle) kf5 = kf5_c.split(xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a # vector clf.fit(xM[train, :], yV[train]) yVp_test = clf.predict(xM[test, :]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append((clf.coef_, clf.intercept_)) y_a = np.array(yV[test])[:, 0] yp_a = np.array(yVp_test)[:, 0] cv_score_l.extend(np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred_full(xM, yV, n_folds=5, shuffle=True, disp=False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=shuffle) kf5 = kf5_c.split(xM) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a # vector clf.fit(xM[train, :], yV[train]) yVp_test = clf.predict(xM[test, :]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append((clf.coef_, clf.intercept_)) y_a = np.array(yV[test])[:, 0] yp_a = np.array(yVp_test)[:, 0] cv_score_l.extend(np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred_It(xM, yV, n_folds=5, scoring='median_absolute_error', N_it=10, disp=False, ldisp=False): """ N_it times iteration is performed for cross_validation in order to make further average effect. The flag of 'disp' is truned off so each iteration will not shown. """ cv_score_le = list() ci_le = list() yVp_ltype_l = list() # yVp_ltype is list type of yVp not matrix type for ni in range(N_it): cv_score_l, ci_l, yVp_ltype = cv_LinearRegression_ci_pred( xM, yV, n_folds=n_folds, scoring=scoring, disp=disp) cv_score_le.extend(cv_score_l) ci_le.extend(ci_l) yVp_ltype_l.append(yVp_ltype) o_d = {'mean': np.mean(cv_score_le), 'std': np.std(cv_score_le), 'list': cv_score_le, 'ci': ci_le, 'yVp': yVp_ltype_l} if disp or ldisp: print('{0}: mean(+/-std) --> {1}(+/-{2})'.format(scoring, o_d['mean'], o_d['std'])) return o_d
def cv_LinearRegression( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) cv_score_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l
def cv_LinearRegression_ci( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) cv_score_l = list() ci_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l, ci_l
def cv_LinearRegression_ci_pred( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. coef = np.array(clf.coef_).tolist() intercept = np.array(clf.intercept_).tolist() ci_l.append( (clf.coef_, clf.intercept_)) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError( "{} scoring is not supported.".format( scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l)) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred_full_Ridge( xM, yV, alpha, n_folds = 5, shuffle=True, disp = False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) # print( 'alpha of Ridge is', alpha) clf = linear_model.Ridge( alpha) kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) y_a = np.array( yV[test])[:,0] yp_a = np.array( yVp_test)[:,0] cv_score_l.extend( np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def cv_LinearRegression_ci_pred_full( xM, yV, n_folds = 5, shuffle=True, disp = False): """ Note - scoring is not used. I may used later. Not it is remained for compatibility purpose. metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle) cv_score_l = list() ci_l = list() yVp = yV.copy() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a vector clf.fit( xM[train,:], yV[train]) yVp_test = clf.predict( xM[test,:]) yVp[test] = yVp_test # Additionally, coef_ and intercept_ are stored. ci_l.append( (clf.coef_, clf.intercept_)) y_a = np.array( yV[test])[:,0] yp_a = np.array( yVp_test)[:,0] cv_score_l.extend( np.abs(y_a - yp_a).tolist()) return cv_score_l, ci_l, yVp.A1.tolist()
def _make_scoring_r0( scoring): if scoring == 'r2': return metrics.make_scorer( metrics.r2_score) elif scoring == 'mean_absolute_error': return metrics.make_scorer( metrics.mean_absolute_error, greater_is_better=False) elif scoring == 'mean_squared_error': return metrics.make_scorer( metrics.mean_squared_error, greater_is_better=False) elif scoring == 'median_absolute_error': return metrics.make_scorer( metrics.median_absolute_error, greater_is_better=False) else: raise ValueError("Not supported scoring")
def eval_metrics_on(predictions, labels, regression = True): ''' assuming this is a regression task; labels are continuous-valued floats returns most regression-related scores for the given predictions/targets as a dictionary: r2, mean_abs_error, mse, rmse, median_absolute_error, explained_variance_score ''' if len(labels[0])==2: #labels is list of data/labels pairs labels = np.concatenate([l[1] for l in labels]) if regression: predictions = predictions[:,0] r2 = metrics.r2_score(labels, predictions) mean_abs_error = np.abs(predictions - labels).mean() mse = ((predictions - labels)**2).mean() rmse = np.sqrt(mse) median_absolute_error = metrics.median_absolute_error(labels, predictions) # robust to outliers explained_variance_score = metrics.explained_variance_score(labels, predictions) # best score = 1, lower is worse return {'r2':r2, 'mean_abs_error':mean_abs_error, 'mse':mse, 'rmse':rmse, 'median_absolute_error':median_absolute_error, 'explained_variance_score':explained_variance_score, 'main_metric':rmse} else: predictions = predictions[:,1] if labels.max()==1: auc = metrics.auc(predictions, labels[:,1], reorder=1) accuracy = np.mean((predictions>0.5)==labels[:,1]) return {'auc':auc, 'accuracy':accuracy, 'main_metric':accuracy}
def test_regression_metrics(n_samples=50): y_true = np.arange(n_samples) y_pred = y_true + 1 assert_almost_equal(mean_squared_error(y_true, y_pred), 1.) assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.) assert_almost_equal(median_absolute_error(y_true, y_pred), 1.) assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2) assert_almost_equal(explained_variance_score(y_true, y_pred), 1.)
def test_regression_metrics_at_limits(): assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2) assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2) assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2) assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2) assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
def process_linear_regression(self): """ Linear Regression Fit a Machine Learning Model to the data - where `input` is matrix with: - rows - `n_samples` - columns - `n_features` - where `output` is: - array of `n_samples` when predicting one output - matrix of `n_samples` rows and `n_outputs` columns when predicting multiple outputs simultaneously - Important Note: - Given say a dataset with 400 rows and 10 columns, must pass in matrix of 400 rows and 1 column to predict 1 column - Prior to passing `input` to the Fit function, convert the Series/Dataframe objects to a Numpy matrix first so Scikit-Learn can convert the input to a Numpy Object - WRONG Obtain Numpy array (400 elements) returned from Series using `values` attribute `df["mpg"].values.shape` - CORRECT Obtain Numpy matrix object (400 rows, 1 col) returned from Series using `values` attribute `df[["mpg"]].values.shape` """ print("Linear Regression in progress...") model = self.prediction_utils.generate_model(self.model_type, None, None, None) df = self.prediction_data.df_listings inputs = df[self.training_columns] if not len(inputs): print("No Training Columns to use for Logistic Regression. Perhaps they were all bad and removed.") return None # Check inputs is Numpy matrix not Numpy array print("Shape of inputs to Scikit-Learn Fit function: ", inputs.values.shape) output = df[self.target_column] model.fit(inputs, output) predictions = model.predict(inputs) df["predictions"] = predictions if self.prediction_config.PLOT_LINEAR_RELATIONSHIP_PREDICTION_VS_ACTUAL_FOR_TRAIN_FEATURES_VS_TARGET == True: self.plot_linear_relationships(predictions) print("Check predictions accuracy against 'known' Model Training Data:\n %r" % (df[[self.target_column, "predictions"]])) print("Predictions using Scikit-Learn Linear Regression: %r" % (predictions) ) mae = median_absolute_error(df[self.target_column], predictions) mse = mean_squared_error(df[self.target_column], predictions, multioutput='raw_values') rmse = math.sqrt(mse) print("MAE: %r" % (mae) ) print("MSE: %r" % (mse[0]) ) print("RMSE: %r" % (rmse) ) if mae and rmse: mae_rmse_ratio_prefix = mae / rmse print("MAE to RMSE Ratio using Linear Regression: %.2f:1" % (mae_rmse_ratio_prefix) ) if self.prediction_config.PLOT_INDIVIDUAL_TRAIN_FEATURES_VS_TARGET == True: for index, training_model_feature_name in enumerate(self.training_columns): self.prediction_utils.plot(training_model_feature_name, df) self.response["pre-hyperparameter_optimisation"] = { "model_type": self.model_type, "rmse": rmse } print("Linear Regression Pre-Hyperparameter k Optimisation results: %r" % (self.response))
def process_hyperparameter_fixed(self): """ Step 1: Create instance of K-Nearest-Neighbors Machine Learning Model class where p=2 is Euclidean Distance Step 2: Fit the Model using by specifying data for K-Nearest-Neighbor Model to use: - X as Training data (i.e. DataFrame "feature" Columns from Training data) - y as Target values (i.e. DataFrame's Target Column) X argument of `fit` function is matrix-like object, containing cols of interest from Training set (to make predictions) y argument of `fit` function is list-like object, containing just TARGET_COLUMN, `price`. X and y are passed into `fit` method of Scikit-Learn. Warning: DO NOT pass in data containing the following else Error occurs: - Missing values - Non-numerical values Step 3: Scikit-Learn's `predict` function called to make predictions on cols of test_df. Returns NumPy array of predicted "price" TARGET_COLUMN values Step 4: Calculate MAE, MSE, and RMSE float values for each individual Target, where least loss "best" values are 0 """ print("Training features include: %r" % (self.training_columns) ) training_column_names = self.training_columns feature_combo = '__'.join(training_column_names) model = self.prediction_utils.generate_model(self.model_type, self.prediction_config.HYPERPARAMETER_FIXED, 'brute', 2) _temp_training_part = self.prediction_data.training_part X = _temp_training_part[self.training_columns] y = _temp_training_part[self.target_column] model.fit(X, y) _temp_testing_part = self.prediction_data.testing_part predictions = model.predict(_temp_testing_part[self.training_columns]) print("Predictions using Scikit-Learn KNN Regression: %r" % (predictions) ) mae = median_absolute_error(_temp_testing_part[self.target_column], predictions) mse = mean_squared_error(_temp_testing_part[self.target_column], predictions, multioutput='raw_values') rmse = math.sqrt(mse) print("MAE: %r" % (mae) ) print("MSE: %r" % (mse[0]) ) print("RMSE: %r" % (rmse) ) if mae and rmse: mae_rmse_ratio_prefix = mae / rmse print("MAE to RMSE Ratio: %.2f:1" % (mae_rmse_ratio_prefix) ) if self.prediction_config.PLOT_INDIVIDUAL_TRAIN_FEATURES_VS_TARGET == True: for index, training_model_feature_name in enumerate(self.training_columns): self.prediction_utils.plot(training_model_feature_name, _temp_testing_part) return { "feature_names": feature_combo, "rmse": rmse, "k_neighbors_qty": self.prediction_config.HYPERPARAMETER_FIXED, "k_folds_qty": None, "k_fold_cross_validation_toggle": False }
def cv_LinearRegression(xM, yV, n_folds=5, scoring='median_absolute_error', disp=False): """ metrics.explained_variance_score(y_true, y_pred) Explained variance regression score function metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss metrics.mean_squared_error(y_true, y_pred[, ...]) Mean squared error regression loss metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function. """ if disp: print(xM.shape, yV.shape) clf = linear_model.LinearRegression() kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=True) kf5 = kf5_c.split(xM) cv_score_l = list() for train, test in kf5: # clf.fit( xM[train,:], yV[train,:]) # yV is vector but not a metrix here. Hence, it should be treated as a # vector clf.fit(xM[train, :], yV[train]) yVp_test = clf.predict(xM[test, :]) if scoring == 'median_absolute_error': cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test)) else: raise ValueError("{} scoring is not supported.".format(scoring)) if disp: # Now only this flag is on, the output will be displayed. print('{}: mean, std -->'.format(scoring), np.mean(cv_score_l), np.std(cv_score_l)) return cv_score_l