我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.metrics.mean_squared_error()。
def test_stacked_regressor(self): bclf = LinearRegression() clfs = [RandomForestRegressor(n_estimators=50, random_state=1), GradientBoostingRegressor(n_estimators=25, random_state=1), Ridge(random_state=1)] # Friedman1 X, y = datasets.make_friedman1(n_samples=1200, random_state=1, noise=1.0) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] sr = StackedRegressor(bclf, clfs, n_folds=3, verbose=0, oob_score_flag=True) sr.fit(X_train, y_train) mse = mean_squared_error(y_test, sr.predict(X_test)) assert_less(mse, 6.0)
def test_fwls_regressor(self): feature_func = lambda x: np.ones(x.shape) bclf = LinearRegression() clfs = [RandomForestRegressor(n_estimators=50, random_state=1), GradientBoostingRegressor(n_estimators=25, random_state=1), Ridge(random_state=1)] # Friedman1 X, y = datasets.make_friedman1(n_samples=1200, random_state=1, noise=1.0) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] sr = FWLSRegressor(bclf, clfs, feature_func, n_folds=3, verbose=0, oob_score_flag=True) sr.fit(X_train, y_train) mse = mean_squared_error(y_test, sr.predict(X_test)) assert_less(mse, 6.0)
def test_regressor(self): X, y = datasets.make_friedman1(n_samples=1200, random_state=1, noise=1.0) X_train, y_train = X[:200], y[:200] index = [i for i in range(200)] rf = RandomForestRegressor() jrf = JoblibedRegressor(rf, "rfr", cache_dir='') jrf.fit(X_train, y_train, index) prediction = jrf.predict(X_train, index) mse = mean_squared_error(y_train, prediction) assert_less(mse, 6.0) rf = RandomForestRegressor(n_estimators=20) jrf = JoblibedRegressor(rf, "rfr", cache_dir='') jrf.fit(X_train, y_train, index) prediction2 = jrf.predict(X_train, index) assert_allclose(prediction, prediction2)
def parameterChoosing(self): #Set the parameters by cross-validation tuned_parameters = [{'max_depth': range(20,60), 'n_estimators': range(10,40), 'max_features': ['sqrt', 'log2', None] } ] clf = GridSearchCV(RandomForestRegressor(n_estimators=30), tuned_parameters, cv=5, scoring='mean_squared_error') clf.fit(self.X_train, self.y_train.ravel()) print "Best parameters set found on development set:\n" print clf.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in clf.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print "MSE for test data set:\n" y_true, y_pred = self.y_test, clf.predict(self.X_test) print mean_squared_error(y_true, y_pred)
def calculate_regression_metrics(trained_sklearn_estimator, x_test, y_test): """ Given a trained estimator, calculate metrics. Args: trained_sklearn_estimator (sklearn.base.BaseEstimator): a scikit-learn estimator that has been `.fit()` y_test (numpy.ndarray): A 1d numpy array of the y_test set (predictions) x_test (numpy.ndarray): A 2d numpy array of the x_test set (features) Returns: dict: A dictionary of metrics objects """ # Get predictions predictions = trained_sklearn_estimator.predict(x_test) # Calculate individual metrics mean_squared_error = skmetrics.mean_squared_error(y_test, predictions) mean_absolute_error = skmetrics.mean_absolute_error(y_test, predictions) result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error} return result
def score(train_labels, train_features, test_labels, test_features, save_file, use_tree=False): if use_tree: train_clf = Classifier(tree.DecisionTreeClassifier()) else: train_clf = Classifier() print train_clf.clf print '' t_start = time.clock() train_clf.learn(train_features, train_labels) t_end = time.clock() if save_file: train_clf.save_to_file(open(save_file, 'w')) p_start = time.clock() predicted = train_clf.clf.predict(test_features) p_end = time.clock() test_labels_t = train_clf.labels.transform(test_labels) print classification_report(test_labels_t, predicted, target_names=train_clf.labels.classes_) print 'Training time: %fs' % (t_end - t_start) print 'Predicting time: %fs' % (p_end - p_start) print 'Mean squared error: %f' % mean_squared_error(test_labels_t, predicted) return train_clf.score(test_features, test_labels)
def _plot_old_pred_data(old_pred_data, show_pred_plot, save_pred_plot, show_clarke_plot, save_clarke_plot, id_str, algorithm_str, minutes_str): actual_bg_array = old_pred_data.result_actual_bg_array actual_bg_time_array = old_pred_data.result_actual_bg_time_array pred_array = old_pred_data.result_pred_array pred_time_array = old_pred_data.result_pred_time_array #Root mean squared error rms = math.sqrt(metrics.mean_squared_error(actual_bg_array, pred_array)) print " Root Mean Squared Error: " + str(rms) print " Mean Absolute Error: " + str(metrics.mean_absolute_error(actual_bg_array, pred_array)) print " R^2 Coefficient of Determination: " + str(metrics.r2_score(actual_bg_array, pred_array)) plot, zone = ClarkeErrorGrid.clarke_error_grid(actual_bg_array, pred_array, id_str + " " + algorithm_str + " " + minutes_str) print " Percent A:{}".format(float(zone[0]) / (zone[0] + zone[1] + zone[2] + zone[3] + zone[4])) print " Percent C, D, E:{}".format(float(zone[2] + zone[3] + zone[4])/ (zone[0] + zone[1] + zone[2] + zone[3] + zone[4])) print " Zones are A:{}, B:{}, C:{}, D:{}, E:{}\n".format(zone[0],zone[1],zone[2],zone[3],zone[4]) if save_clarke_plot: plt.savefig(id_str + algorithm_str.replace(" ", "") + minutes_str + "clarke.png") if show_clarke_plot: plot.show() plt.clf() plt.plot(actual_bg_time_array, actual_bg_array, label="Actual BG", color='black', linestyle='-') plt.plot(pred_time_array, pred_array, label="BG Prediction", color='black', linestyle=':') plt.title(id_str + " " + algorithm_str + " " + minutes_str + " BG Analysis") plt.ylabel("Blood Glucose Level (mg/dl)") plt.xlabel("Time (minutes)") plt.legend(loc='upper left') # SHOW/SAVE PLOT DEPENDING ON THE BOOLEAN PARAMETER if save_pred_plot: plt.savefig(id_str + algorithm_str.replace(" ","") + minutes_str + "plot.png") if show_pred_plot: plt.show() #Function to analyze the old OpenAPS data
def build_model(look_back: int, batch_size: int=1) -> Sequential: """ The function builds a keras Sequential model :param look_back: number of previous time steps as int :param batch_size: batch_size as int, defaults to 1 :return: keras Sequential model """ model = Sequential() model.add(LSTM(64, activation='relu', batch_input_shape=(batch_size, look_back, 1), stateful=True, return_sequences=False)) model.add(Dense(1, activation='linear')) model.compile(loss='mean_squared_error', optimizer='adam') return model
def setUp(self): os.putenv("KMP_DUPLICATE_LIB_OK", "TRUE") self.X_class, self.y_class = datasets.make_classification(random_state=42) self.X_reg, self.y_reg = datasets.make_regression(random_state=42) self.classification_optimizers = [XGBoostOptimizer, RandomForestOptimizer] self.regression_optimizers = [XGBoostOptimizer, RandomForestOptimizer] self.class_scorer = Scorer("auc_error", lambda y_pred, y_true: 1 - metrics.roc_auc_score(y_pred, y_true)) self.reg_scorer = Scorer("mse", metrics.mean_squared_error) self.classification_task_split = \ Task("class_split", self.X_class, self.y_class, "classification", test_size=0.1, random_state=42) self.regression_task_split = \ Task("reg_split", self.X_class, self.y_class, "regression", test_size=0.1, random_state=42) self.classification_task_cv = \ Task("class_cv", self.X_reg, self.y_reg, "classification", cv=5, random_state=42) self.regression_task_cv = \ Task("reg_cv", self.X_reg, self.y_reg, "regression", cv=5, random_state=42)
def after_test(self): # scores_test=[] # scores_train=[] # scores_test_mse = [] # scores_train_mse = [] # for i, y_pred in enumerate(self.clf.staged_predict(self.X_test)): # scores_test.append(mean_absolute_percentage_error(self.y_test, y_pred)) # scores_test_mse.append(mean_squared_error(self.y_test, y_pred)) # # for i, y_pred in enumerate(self.clf.staged_predict(self.X_train)): # scores_train.append(mean_absolute_percentage_error(self.y_train, y_pred)) # scores_train_mse.append(mean_squared_error(self.y_train, y_pred)) # # pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test,'scores_train_mse': scores_train_mse, 'scores_test_mse': scores_test_mse}).to_csv('temp/trend.csv') # df = pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test}) # print "Test set MAPE minimum: {}".format(np.array(scores_test).min()) # df.plot() # plt.show() return
def after_test(self): scores_test=[] scores_train=[] scores_test_mse = [] scores_train_mse = [] for i, y_pred in enumerate(self.clf.staged_predict(self.X_test)): scores_test.append(mean_absolute_percentage_error(self.y_test, y_pred)) scores_test_mse.append(mean_squared_error(self.y_test, y_pred)) for i, y_pred in enumerate(self.clf.staged_predict(self.X_train)): scores_train.append(mean_absolute_percentage_error(self.y_train, y_pred)) scores_train_mse.append(mean_squared_error(self.y_train, y_pred)) pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test,'scores_train_mse': scores_train_mse, 'scores_test_mse': scores_test_mse}).to_csv('temp/trend.csv') df = pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test}) print "Test set MAPE minimum: {}".format(np.array(scores_test).min()) # df.plot() # plt.show() return
def _mse_converged(self): """Check convergence based on mean squared difference between prior and posterior Returns ------- converged : boolean Whether the parameter estimation converged. mse : float Mean squared error between prior and posterior. """ prior = self.global_prior_[0:self.prior_size] posterior = self.global_posterior_[0:self.prior_size] mse = mean_squared_error(prior, posterior, multioutput='uniform_average') if mse > self.threshold: return False, mse else: return True, mse
def _mse_converged(self): """Check convergence based on mean squared error Returns ------- converged : boolean Whether the parameter estimation converged. mse : float Mean squared error between prior and posterior. """ mse = mean_squared_error(self.local_prior, self.local_posterior_, multioutput='uniform_average') if mse > self.threshold: return False, mse else: return True, mse
def __init__(self, bclf, clfs, n_folds=3, oob_score_flag=False, oob_metrics=mean_squared_error, Kfold=None, verbose=0, save_stage0=False, save_dir=''): self.n_folds = n_folds self.clfs = clfs self.bclf = bclf self.all_learner = OrderedDict() self.oob_score_flag = oob_score_flag self.oob_metrics = oob_metrics self.verbose = verbose self.stack_by_proba = False self.save_stage0 = save_stage0 self.save_dir = save_dir self.MyKfold = Kfold
def __init__(self, bclf, clfs, feature_func, n_folds=3, oob_score_flag=False, oob_metrics=mean_squared_error, Kfold=None, verbose=0, save_stage0=False, save_dir=''): super(FWLSRegressor, self).__init__(bclf, clfs, n_folds, oob_score_flag, oob_metrics, Kfold, verbose, save_stage0, save_dir) self.feature_func = feature_func
def test(): y = [] yp = [] fi = open(sys.argv[1], 'r') for line in fi: data = ints(line.replace(":1", "").split()) clk = data[1] mp = data[2] fsid = 3 # feature start id pred = 0.0 for i in range(fsid, len(data)): feat = data[i] if feat in featWeight: pred += featWeight[feat] pred = sigmoid(pred) y.append(clk) yp.append(pred) fi.close() auc = roc_auc_score(y, yp) rmse = math.sqrt(mean_squared_error(y, yp)) print str(round) + '\t' + str(auc) + '\t' + str(rmse)
def PlotLearn(R, A, Y): intA = [BinVecToInt(j) for j in A] intY = [BinVecToInt(j) for j in Y] fig, ax = mpl.subplots(figsize=(20, 10)) ax.plot(intA, intY, label ='Orig') l, = ax.plot(intA, intY, label ='Pred') ax.legend(loc = 'upper left') #Updates the plot in ax as model learns data def UpdateF(i): R.fit(A, Y) YH = R.predict(A) S = MSE(Y, YH) intYH = [BinVecToInt(j) for j in YH] l.set_ydata(intYH) ax.set_title('Iteration: ' + str(i * 64) + ' - MSE: ' + str(S)) return l, ani = mpla.FuncAnimation(fig, UpdateF, frames = 2000, interval = 128, repeat = False) #ani.save('foo.gif') mpl.show() return ani
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'max_features': ['sqrt', 'log2', None], 'max_depth': range(2,1000), } ] reg = GridSearchCV(DecisionTreeRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error') reg.fit(self.X_train, self.y_train) print "Best parameters set found on development set:\n" print reg.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in reg.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print "MSE for test data set:\n" y_true, y_pred = self.y_test, reg.predict(self.X_test) print mean_squared_error(y_true, y_pred)
def predict(self): # predict the test data y_pred1 = self.net1.predict(self.X_test) y_pred1 = y_pred1.reshape((y_pred1.shape[0], 1)) y_pred2 = self.linRegr.predict(self.X_test) y_pred2 = y_pred2.reshape((y_pred2.shape[0], 1)) y_pred3 = self.knn.predict(self.X_test) y_pred3 = y_pred3.reshape((y_pred3.shape[0], 1)) y_pred4 = self.decisionTree.predict(self.X_test) y_pred4 = y_pred4.reshape((y_pred4.shape[0], 1)) y_pred5 = self.adaReg.predict(self.X_test) y_pred5 = y_pred5.reshape((y_pred5.shape[0], 1)) self.y_pred = (y_pred1+y_pred2+y_pred3+y_pred4+y_pred5)/5 # print MSE mse = mean_squared_error(self.y_pred, self.y_test) print "MSE: {}".format(mse)
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'weights': ['uniform', 'distance'], 'n_neighbors': range(2,100) } ] reg = GridSearchCV(neighbors.KNeighborsRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error') reg.fit(self.X_train, self.y_train) print "Best parameters set found on development set:\n" print reg.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in reg.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print reg.scorer_ print "MSE for test data set:" y_true, y_pred = self.y_test, reg.predict(self.X_test) print mean_squared_error(y_pred, y_true)
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'alpha': np.logspace(-5,5) } ] reg = GridSearchCV(linear_model.Ridge(alpha = 0.5), tuned_parameters, cv=5, scoring='mean_squared_error') reg.fit(self.X_train, self.y_train) print "Best parameters set found on development set:\n" print reg.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in reg.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print reg.scorer_ print "MSE for test data set:" y_true, y_pred = self.y_test, reg.predict(self.X_test) print mean_squared_error(y_pred, y_true)
def eval_sts(ycat, y, name, quiet=False): """ Evaluate given STS regression-classification predictions and print results. """ if ycat.ndim == 1: ypred = ycat else: ypred = loader.sts_categorical2labels(ycat) if y.ndim == 1: ygold = y else: ygold = loader.sts_categorical2labels(y) pr = pearsonr(ypred, ygold)[0] sr = spearmanr(ypred, ygold)[0] e = mse(ypred, ygold) if not quiet: print('%s Pearson: %f' % (name, pr,)) print('%s Spearman: %f' % (name, sr,)) print('%s MSE: %f' % (name, e,)) return STSRes(pr, sr, e)
def r_squared_mse(y_true, y_pred, sample_weight=None, multioutput=None): r2 = r2_score(y_true, y_pred, sample_weight=sample_weight, multioutput=multioutput) mse = mean_squared_error(y_true, y_pred, sample_weight=sample_weight, multioutput=multioutput) bounds_check = np.min(y_pred) > MIN_MOISTURE_BOUND bounds_check = bounds_check&(np.max(y_pred) < MAX_MOISTURE_BOUND) print('Scoring - std', np.std(y_true), np.std(y_pred)) print('Scoring - median', np.median(y_true), np.median(y_pred)) print('Scoring - min', np.min(y_true), np.min(y_pred)) print('Scoring - max', np.max(y_true), np.max(y_pred)) print('Scoring - mean', np.mean(y_true), np.mean(y_pred)) print('Scoring - MSE, R2, bounds', mse, r2, bounds_check) return (float(mse), float(r2), int(bounds_check))
def fastLapModel(xList, labels, names, multiple=0, full_set=0): X = numpy.array(xList) y = numpy.array(labels) featureNames = [] featureNames = numpy.array(names) # take fixed holdout set 30% of data rows xTrain, xTest, yTrain, yTest = train_test_split( X, y, test_size=0.30, random_state=531) # for final model (no CV) if full_set: xTrain = X yTrain = y check_set(xTrain, xTest, yTrain, yTest) print "Fitting the model to the data set..." # train random forest at a range of ensemble sizes in order to see how the # mse changes mseOos = [] m = 10 ** multiple nTreeList = range(500 * m, 1000 * m, 100 * m) # iTrees = 10000 for iTrees in nTreeList: depth = None maxFeat = int(np.sqrt(np.shape(xTrain)[1])) + 1 # try tweaking RFmd = ensemble.RandomForestRegressor(n_estimators=iTrees, max_depth=depth, max_features=maxFeat, oob_score=False, random_state=531, n_jobs=-1) # RFmd.n_features = 5 RFmd.fit(xTrain, yTrain) # Accumulate mse on test set prediction = RFmd.predict(xTest) mseOos.append(mean_squared_error(yTest, prediction)) # plot training and test errors vs number of trees in ensemble plot.plot(nTreeList, mseOos) plot.xlabel('Number of Trees in Ensemble') plot.ylabel('Mean Squared Error') #plot.ylim([0.0, 1.1*max(mseOob)]) plot.show() print("MSE") print(mseOos[-1]) return xTrain, xTest, yTrain, yTest, RFmd
def check_improve(degree): y = _lifted_predict(U[:degree], X) common_settings = dict(degree=degree, n_components=n_components, beta=1e-10, tol=0, random_state=0) est_5 = PolynomialNetworkRegressor(max_iter=5, **common_settings) est_10 = PolynomialNetworkRegressor(max_iter=10, **common_settings) with warnings.catch_warnings(): warnings.simplefilter("ignore") est_5.fit(X, y) est_10.fit(X, y) y_pred_5 = est_5.predict(X) y_pred_10 = est_10.predict(X) assert_less_equal(mean_squared_error(y, y_pred_10), mean_squared_error(y, y_pred_5), msg="More iterations do not improve fit.")
def test_random_starts(): # not as strong a test as the direct case! # using training error here, and a higher threshold. # We observe the lifted solver reaches rather diff. solutions. degree = 3 noisy_y = _lifted_predict(U[:degree], X) noisy_y += 5. * rng.randn(noisy_y.shape[0]) common_settings = dict(degree=degree, n_components=n_components, beta=0.01, tol=0.01) scores = [] for k in range(5): est = PolynomialNetworkRegressor(random_state=k, **common_settings) y_pred = est.fit(X, noisy_y).predict(X) scores.append(mean_squared_error(noisy_y, y_pred)) assert_less_equal(np.std(scores), 1e-4)
def check_improve(degree): y = _poly_predict(X, P, lams, kernel="anova", degree=degree) est = FactorizationMachineRegressor(degree=degree, n_components=5, fit_lower=None, fit_linear=False, beta=0.0001, max_iter=5, tol=0, random_state=0) with warnings.catch_warnings(): warnings.simplefilter("ignore") y_pred_5 = est.fit(X, y).predict(X) est.set_params(max_iter=10) y_pred_10 = est.fit(X, y).predict(X) assert_less_equal(mean_squared_error(y, y_pred_10), mean_squared_error(y, y_pred_5), msg="More iterations do not improve fit.")
def test_random_starts(): noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=2) noisy_y += 5. * rng.randn(noisy_y.shape[0]) X_train, X_test = X[:10], X[10:] y_train, y_test = noisy_y[:10], noisy_y[10:] scores = [] # init_lambdas='ones' is important to reduce variance here reg = FactorizationMachineRegressor(degree=2, n_components=n_components, beta=5, fit_lower=None, fit_linear=False, max_iter=2000, init_lambdas='ones', tol=0.001) for k in range(10): reg.set_params(random_state=k) y_pred = reg.fit(X_train, y_train).predict(X_test) scores.append(mean_squared_error(y_test, y_pred)) assert_less_equal(np.std(scores), 0.001)
def rmse(Y_true, Y_pred): # https://www.kaggle.com/wiki/RootMeanSquaredError from sklearn.metrics import mean_squared_error print('shape:', Y_true.shape, Y_pred.shape) print("===RMSE===") # in RMSE = mean_squared_error(Y_true[:, 0].flatten(), Y_pred[:, 0].flatten())**0.5 print('inflow: ', RMSE) # out if Y_true.shape[1] > 1: RMSE = mean_squared_error(Y_true[:, 1].flatten(), Y_pred[:, 1].flatten())**0.5 print('outflow: ', RMSE) # new if Y_true.shape[1] > 2: RMSE = mean_squared_error(Y_true[:, 2].flatten(), Y_pred[:, 2].flatten())**0.5 print('newflow: ', RMSE) # end if Y_true.shape[1] > 3: RMSE = mean_squared_error(Y_true[:, 3].flatten(), Y_pred[:, 3].flatten())**0.5 print('endflow: ', RMSE) RMSE = mean_squared_error(Y_true.flatten(), Y_pred.flatten())**0.5 print("total rmse: ", RMSE) print("===RMSE===") return RMSE
def k_vs_rms(START_K, END_K, STEP_K, training_data, labels, test_data, expected_labels, weights='distance'): num_points = int((END_K - START_K) / STEP_K) + 1 points = np.zeros([num_points, 2]) index = -1 for K in range(START_K, END_K, STEP_K): print "k = " + str(K) index += 1 output = knn_regression(K, training_data, labels, test_data, weights) v = np.column_stack((output, expected_labels)) v = v[~np.isnan(v[:,0]),:] RMSE = mean_squared_error(v[:,0], v[:,1])**0.5 points[index,0] = K points[index,1] = RMSE if points[-1,0] == 0 and points[-1,1] == 0: points = points[:-1,:] return points # Test parameters
def train(df_train, df_test): train_x, train_y = extract_feature_and_y(df_train) print("train x and y shape: {0} and {1}".format( train_x.shape, train_y.shape)) test_x, test_y = extract_feature_and_y(df_test) print("test x and y shape: {0} and {1}".format( test_x.shape, test_y.shape)) # print("train x nan:", np.isfinite(train_x).any()) # print("train y nan:", np.isfinite(train_y).any()) # print("test x nan:", np.isfinite(test_x).any()) info = train_ridge_linear_model(train_x, train_y, test_x) #info = train_lasso_model(train_x, train_y, test_x) #info = train_EN_model(train_x, train_y, test_x) _mse = mean_squared_error(test_y, info["y"]) _std = np.std(test_y - info["y"]) print("MSE on test data: %f" % _mse) print("std of error on test data: %f" % _std) plot_y(train_y, info["train_y"], test_y, info["y"])
def train(X_train, y_train): model = Sequential() model.add(LSTM( lstm_neurons, batch_input_shape=(batch_size, X_train.shape[1], X_train.shape[2]), stateful=True)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') for i in range(epochs): print 'batch', i+1 model.fit( X_train, y_train, epochs=1, batch_size=batch_size, verbose=2, shuffle=False, validation_split=0.33) model.reset_states() return model
def run_model(model,dtrain,predictor_var,target,scoring_method='mean_squared_error'): cv_method = KFold(len(dtrain),5) cv_scores = cross_val_score(model,dtrain[predictor_var],dtrain[target],cv=cv_method,scoring=scoring_method) #print cv_scores, np.mean(cv_scores), np.sqrt((-1)*np.mean(cv_scores)) dtrain_for_val = dtrain[dtrain['Year']<2000] dtest_for_val = dtrain[dtrain['Year']>1999] #cv_method = KFold(len(dtrain_for_val),5) #cv_scores_2 = cross_val_score(model,dtrain_for_val[predictor_var],dtrain_for_val[target],cv=cv_method,scoring=scoring_method) #print cv_scores_2, np.mean(cv_scores_2) dtrain_for_val_ini = dtrain_for_val[predictor_var] dtest_for_val_ini = dtest_for_val[predictor_var] model.fit(dtrain_for_val_ini,dtrain_for_val[target]) pred_for_val = model.predict(dtest_for_val_ini) #print math.sqrt(mean_squared_error(dtest_for_val['Footfall'],pred_for_val))
def arima(series, durations, order): X = series.values size = int(len(X) * 0.99) train, test = X[0:size], X[size:len(X)] history = [x for x in train] predictions = list() for t in range(len(test)): model = ARIMA(history, order=(5,1,0)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] predictions.append(yhat) obs = test[t] history.append(obs) print('predicted=%f, expected=%f' % (yhat, obs)) error = mean_squared_error(test, predictions) print('Test MSE: %.3f' % error) return predictions # plot
def prediction_curve(dmat, vals, steps, radius): """Return MSE from predicting values from neighbors at radial steps.""" # Set null distances (greater than some threshold) to 0. # Not in general a great idea, but fine here because we don't # do anything with identity edges, and sums will be faster # if we don't have to worry about nans dmat = np.nan_to_num(dmat) error_vals = [] for step in steps: neighbors = (np.abs(dmat - step) < radius).astype(np.float) neighbors /= neighbors.sum(axis=1, keepdims=True) predicted = neighbors.dot(vals) m = ~np.isnan(predicted) error_vals.append(mean_squared_error(vals[m], predicted[m])) return np.array(error_vals)
def _raw_rank(self, x, y, network): impt = np.zeros(x.shape[1]) for i in range(x.shape[1]): hold = np.array(x[:, i]) np.random.shuffle(x[:, i]) # Handle both TensorFlow and SK-Learn models. if 'tensorflow' in str(type(network)).lower(): pred = list(network.predict(x, as_iterable=True)) else: pred = network.predict(x) rmse = metrics.mean_squared_error(y, pred) impt[i] = rmse x[:, i] = hold return impt
def score_regression(y, y_hat, report=True): """ Create regression score :param y: :param y_hat: :return: """ r2 = r2_score(y, y_hat) rmse = sqrt(mean_squared_error(y, y_hat)) mae = mean_absolute_error(y, y_hat) report_string = "---Regression Score--- \n" report_string += "R2 = " + str(r2) + "\n" report_string += "RMSE = " + str(rmse) + "\n" report_string += "MAE = " + str(mae) + "\n" if report: print(report_string) return mae, report_string
def _cross_val_score_loo_r0( lm, X, y): """ mean_square_error metric is used from sklearn.metric. Return -------- The mean squared error values are returned. """ if len( y.shape) == 1: y = np.array( [y]).T kf = cross_validation.LeaveOneOut( y.shape[0]) score_l = list() for tr, te in kf: lm.fit( X[tr,:], y[tr,:]) yp = lm.predict( X[te, :]) score_l.append( metrics.mean_squared_error( y[te,:], yp)) return score_l
def gs_Ridge(xM, yV, alphas_log=(1, -1, 9), n_folds=5, n_jobs=-1, scoring='r2'): """ Parameters ------------- scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2 """ print('If scoring is not r2 but error metric, output score is revered for scoring!') print(xM.shape, yV.shape) clf = linear_model.Ridge() #parmas = {'alpha': np.logspace(1, -1, 9)} parmas = {'alpha': np.logspace(*alphas_log)} kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True) kf_n = kf_n_c.split(xM) gs = model_selection.GridSearchCV( clf, parmas, scoring=scoring, cv=kf_n, n_jobs=n_jobs) gs.fit(xM, yV) return gs
def gs_Ridge( xM, yV, alphas_log = (1, -1, 9), n_folds = 5, n_jobs = -1, scoring = 'r2'): """ Parameters ------------- scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2 """ print(xM.shape, yV.shape) clf = linear_model.Ridge() #parmas = {'alpha': np.logspace(1, -1, 9)} parmas = {'alpha': np.logspace( *alphas_log)} kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True) gs = grid_search.GridSearchCV( clf, parmas, scoring = scoring, cv = kf_n, n_jobs = n_jobs) gs.fit( xM, yV) return gs
def test_Validate(): """Test that Validate function works correctly""" accuracy = an.validate(testing=True) val = mean_squared_error(y, slr.predict(X)) assert np.allclose(accuracy,val) accuracy = an.validate(testing=True, X=X, y=y, metric=mean_squared_error) assert np.allclose(accuracy,val) accuracy = an.validate(testing=True, metric=[mean_squared_error, r2_score]) val = [mean_squared_error(y, slr.predict(X)), r2_score(y, slr.predict(X))] assert np.allclose(accuracy,val) with pytest.raises(ValueError): an.validate(X=[1,2,3])
def eval_pred( y_true, y_pred, eval_type): if eval_type == 'logloss':#eval_type?????? loss = ll( y_true, y_pred ) print "logloss: ", loss return loss elif eval_type == 'auc': loss = AUC( y_true, y_pred ) print "AUC: ", loss return loss elif eval_type == 'rmse': loss = np.sqrt(mean_squared_error(y_true, y_pred)) print "rmse: ", loss return loss ######### BaseModel Class #########
def K_FoldValidation(estimator, XMatrix, yVector, numFolds): numTrainingExamples = len(XMatrix) K = numFolds if K < 2: print("Error, K must be greater than or equal to 2") exit(-10) elif K > numTrainingExamples: print("Error, K must be less than or equal to the number of training examples") exit(-11) K_folds = model_selection.KFold(numTrainingExamples, K) for k, (train_index, test_index) in enumerate(K_folds): X_train, X_test = XMatrix[train_index], XMatrix[test_index] y_train, y_test = yVector[train_index], yVector[test_index] # Fit estimator.fit(X_train, y_train, logdir='') # Predict and score score = metrics.mean_squared_error(estimator.predict(X_test), y_test) print('Iteration {0:f} MSE: {1:f}'.format(k+1, score))
def test_input_data_continuous(self, learner, filename): # Load data data = Orange.data.Table(filename) # Train recommender recommender = learner(data) print(str(recommender) + ' trained') # Compute predictions y_pred = recommender(data) # Compute RMSE rmse = math.sqrt(mean_squared_error(data.Y, y_pred)) print('-> RMSE (input data; continuous): %.3f' % rmse) # Check correctness self.assertGreaterEqual(rmse, 0)
def test_input_data_discrete(self, learner, filename): # Load data data = Orange.data.Table(filename) # Train recommender recommender = learner(data) print(str(recommender) + ' trained') # Compute predictions y_pred = recommender(data) # Compute RMSE rmse = math.sqrt(mean_squared_error(data.Y, y_pred)) print('-> RMSE (input data; discrete): %.3f' % rmse) # Check correctness self.assertGreaterEqual(rmse, 0)
def mean_squared_error_(ground_truth, predictions): return mean_squared_error(ground_truth, predictions) ** 0.5
def train_and_eval_sklearn_regressor( clf, data ): x_train = data['x_train'] y_train = data['y_train'] x_test = data['x_test'] y_test = data['y_test'] clf.fit( x_train, y_train ) p = clf.predict( x_train ) mse = MSE( y_train, p ) rmse = sqrt( mse ) mae = MAE( y_train, p ) print "\n# training | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae ) # p = clf.predict( x_test ) mse = MSE( y_test, p ) rmse = sqrt( mse ) mae = MAE( y_test, p ) print "# testing | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae ) return { 'loss': rmse, 'rmse': rmse, 'mae': mae }