Python sklearn.metrics 模块,mean_squared_error() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.metrics.mean_squared_error()

项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def test_stacked_regressor(self):
        bclf = LinearRegression()
        clfs = [RandomForestRegressor(n_estimators=50, random_state=1),
                GradientBoostingRegressor(n_estimators=25, random_state=1),
                Ridge(random_state=1)]

        # Friedman1
        X, y = datasets.make_friedman1(n_samples=1200,
                                       random_state=1,
                                       noise=1.0)
        X_train, y_train = X[:200], y[:200]
        X_test, y_test = X[200:], y[200:]

        sr = StackedRegressor(bclf,
                              clfs,
                              n_folds=3,
                              verbose=0,
                              oob_score_flag=True)
        sr.fit(X_train, y_train)
        mse = mean_squared_error(y_test, sr.predict(X_test))
        assert_less(mse, 6.0)
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def test_fwls_regressor(self):
        feature_func = lambda x: np.ones(x.shape)
        bclf = LinearRegression()
        clfs = [RandomForestRegressor(n_estimators=50, random_state=1),
                GradientBoostingRegressor(n_estimators=25, random_state=1),
                Ridge(random_state=1)]

        # Friedman1
        X, y = datasets.make_friedman1(n_samples=1200,
                                       random_state=1,
                                       noise=1.0)
        X_train, y_train = X[:200], y[:200]
        X_test, y_test = X[200:], y[200:]

        sr = FWLSRegressor(bclf,
                              clfs,
                              feature_func,
                              n_folds=3,
                              verbose=0,
                              oob_score_flag=True)
        sr.fit(X_train, y_train)
        mse = mean_squared_error(y_test, sr.predict(X_test))
        assert_less(mse, 6.0)
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def test_regressor(self):
        X, y = datasets.make_friedman1(n_samples=1200,
                                       random_state=1,
                                       noise=1.0)
        X_train, y_train = X[:200], y[:200]
        index = [i for i in range(200)]

        rf = RandomForestRegressor()
        jrf = JoblibedRegressor(rf, "rfr", cache_dir='')
        jrf.fit(X_train, y_train, index)
        prediction = jrf.predict(X_train, index)
        mse = mean_squared_error(y_train, prediction)
        assert_less(mse, 6.0)

        rf = RandomForestRegressor(n_estimators=20)
        jrf = JoblibedRegressor(rf, "rfr", cache_dir='')
        jrf.fit(X_train, y_train, index)
        prediction2 = jrf.predict(X_train, index)
        assert_allclose(prediction, prediction2)
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def parameterChoosing(self):
        #Set the parameters by cross-validation
        tuned_parameters = [{'max_depth': range(20,60),
                             'n_estimators': range(10,40),
                             'max_features': ['sqrt', 'log2', None]
                             }
                            ]

        clf = GridSearchCV(RandomForestRegressor(n_estimators=30), tuned_parameters, cv=5, scoring='mean_squared_error')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "MSE for test data set:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print mean_squared_error(y_true, y_pred)
项目:healthcareai-py    作者:HealthCatalyst    | 项目源码 | 文件源码
def calculate_regression_metrics(trained_sklearn_estimator, x_test, y_test):
    """
    Given a trained estimator, calculate metrics.

    Args:
        trained_sklearn_estimator (sklearn.base.BaseEstimator): a scikit-learn estimator that has been `.fit()`
        y_test (numpy.ndarray): A 1d numpy array of the y_test set (predictions)
        x_test (numpy.ndarray): A 2d numpy array of the x_test set (features)

    Returns:
        dict: A dictionary of metrics objects
    """
    # Get predictions
    predictions = trained_sklearn_estimator.predict(x_test)

    # Calculate individual metrics
    mean_squared_error = skmetrics.mean_squared_error(y_test, predictions)
    mean_absolute_error = skmetrics.mean_absolute_error(y_test, predictions)

    result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error}

    return result
项目:TrackToTrip    作者:ruipgil    | 项目源码 | 文件源码
def score(train_labels, train_features, test_labels, test_features, save_file, use_tree=False):
    if use_tree:
        train_clf = Classifier(tree.DecisionTreeClassifier())
    else:
        train_clf = Classifier()

    print train_clf.clf
    print ''

    t_start = time.clock()
    train_clf.learn(train_features, train_labels)
    t_end = time.clock()
    if save_file:
        train_clf.save_to_file(open(save_file, 'w'))

    p_start = time.clock()
    predicted = train_clf.clf.predict(test_features)
    p_end = time.clock()

    test_labels_t = train_clf.labels.transform(test_labels)
    print classification_report(test_labels_t, predicted, target_names=train_clf.labels.classes_)
    print 'Training time: %fs' % (t_end - t_start)
    print 'Predicting time: %fs' % (p_end - p_start)
    print 'Mean squared error: %f' % mean_squared_error(test_labels_t, predicted)
    return train_clf.score(test_features, test_labels)
项目:OpenAPS    作者:medicinexlab    | 项目源码 | 文件源码
def _plot_old_pred_data(old_pred_data, show_pred_plot, save_pred_plot, show_clarke_plot, save_clarke_plot, id_str, algorithm_str, minutes_str):
    actual_bg_array = old_pred_data.result_actual_bg_array
    actual_bg_time_array = old_pred_data.result_actual_bg_time_array
    pred_array = old_pred_data.result_pred_array
    pred_time_array = old_pred_data.result_pred_time_array

    #Root mean squared error
    rms = math.sqrt(metrics.mean_squared_error(actual_bg_array, pred_array))
    print "                Root Mean Squared Error: " + str(rms)
    print "                Mean Absolute Error: " + str(metrics.mean_absolute_error(actual_bg_array, pred_array))
    print "                R^2 Coefficient of Determination: " + str(metrics.r2_score(actual_bg_array, pred_array))

    plot, zone = ClarkeErrorGrid.clarke_error_grid(actual_bg_array, pred_array, id_str + " " + algorithm_str + " " + minutes_str)
    print "                Percent A:{}".format(float(zone[0]) / (zone[0] + zone[1] + zone[2] + zone[3] + zone[4]))
    print "                Percent C, D, E:{}".format(float(zone[2] + zone[3] + zone[4])/ (zone[0] + zone[1] + zone[2] + zone[3] + zone[4]))
    print "                Zones are A:{}, B:{}, C:{}, D:{}, E:{}\n".format(zone[0],zone[1],zone[2],zone[3],zone[4])
    if save_clarke_plot: plt.savefig(id_str + algorithm_str.replace(" ", "") + minutes_str + "clarke.png")
    if show_clarke_plot: plot.show()

    plt.clf()
    plt.plot(actual_bg_time_array, actual_bg_array, label="Actual BG", color='black', linestyle='-')
    plt.plot(pred_time_array, pred_array, label="BG Prediction", color='black', linestyle=':')
    plt.title(id_str + " " + algorithm_str + " " + minutes_str + " BG Analysis")
    plt.ylabel("Blood Glucose Level (mg/dl)")
    plt.xlabel("Time (minutes)")
    plt.legend(loc='upper left')

    # SHOW/SAVE PLOT DEPENDING ON THE BOOLEAN PARAMETER
    if save_pred_plot: plt.savefig(id_str + algorithm_str.replace(" ","") + minutes_str + "plot.png")
    if show_pred_plot: plt.show()


#Function to analyze the old OpenAPS data
项目:keras-timeseries-prediction    作者:gcarq    | 项目源码 | 文件源码
def build_model(look_back: int, batch_size: int=1) -> Sequential:
    """
    The function builds a keras Sequential model
    :param look_back: number of previous time steps as int
    :param batch_size: batch_size as int, defaults to 1
    :return: keras Sequential model
    """
    model = Sequential()
    model.add(LSTM(64,
                   activation='relu',
                   batch_input_shape=(batch_size, look_back, 1),
                   stateful=True,
                   return_sequences=False))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model
项目:stacker    作者:bamine    | 项目源码 | 文件源码
def setUp(self):
        os.putenv("KMP_DUPLICATE_LIB_OK", "TRUE")
        self.X_class, self.y_class = datasets.make_classification(random_state=42)
        self.X_reg, self.y_reg = datasets.make_regression(random_state=42)
        self.classification_optimizers = [XGBoostOptimizer, RandomForestOptimizer]
        self.regression_optimizers = [XGBoostOptimizer, RandomForestOptimizer]
        self.class_scorer = Scorer("auc_error", lambda y_pred, y_true: 1 - metrics.roc_auc_score(y_pred, y_true))
        self.reg_scorer = Scorer("mse", metrics.mean_squared_error)

        self.classification_task_split = \
            Task("class_split", self.X_class, self.y_class, "classification", test_size=0.1, random_state=42)
        self.regression_task_split = \
            Task("reg_split", self.X_class, self.y_class, "regression", test_size=0.1, random_state=42)

        self.classification_task_cv = \
            Task("class_cv", self.X_reg, self.y_reg, "classification", cv=5, random_state=42)
        self.regression_task_cv = \
            Task("reg_cv", self.X_reg, self.y_reg, "regression", cv=5, random_state=42)
项目:Supply-demand-forecasting    作者:LevinJ    | 项目源码 | 文件源码
def after_test(self):
#         scores_test=[]
#         scores_train=[]
#         scores_test_mse = []
#         scores_train_mse = []
#         for i, y_pred in enumerate(self.clf.staged_predict(self.X_test)):
#             scores_test.append(mean_absolute_percentage_error(self.y_test, y_pred))
#             scores_test_mse.append(mean_squared_error(self.y_test, y_pred))
#         
#         for i, y_pred in enumerate(self.clf.staged_predict(self.X_train)):
#             scores_train.append(mean_absolute_percentage_error(self.y_train, y_pred))
#             scores_train_mse.append(mean_squared_error(self.y_train, y_pred))
#         
#         pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test,'scores_train_mse': scores_train_mse, 'scores_test_mse': scores_test_mse}).to_csv('temp/trend.csv')
#         df = pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test})
#         print "Test set MAPE minimum: {}".format(np.array(scores_test).min())
#         df.plot()
#         plt.show()
        return
项目:Supply-demand-forecasting    作者:LevinJ    | 项目源码 | 文件源码
def after_test(self):
        scores_test=[]
        scores_train=[]
        scores_test_mse = []
        scores_train_mse = []
        for i, y_pred in enumerate(self.clf.staged_predict(self.X_test)):
            scores_test.append(mean_absolute_percentage_error(self.y_test, y_pred))
            scores_test_mse.append(mean_squared_error(self.y_test, y_pred))

        for i, y_pred in enumerate(self.clf.staged_predict(self.X_train)):
            scores_train.append(mean_absolute_percentage_error(self.y_train, y_pred))
            scores_train_mse.append(mean_squared_error(self.y_train, y_pred))

        pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test,'scores_train_mse': scores_train_mse, 'scores_test_mse': scores_test_mse}).to_csv('temp/trend.csv')
        df = pd.DataFrame({'scores_train': scores_train, 'scores_test': scores_test})
        print "Test set MAPE minimum: {}".format(np.array(scores_test).min())
#         df.plot()
#         plt.show()
        return
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def _mse_converged(self):
        """Check convergence based on mean squared difference between
            prior and posterior

        Returns
        -------

        converged : boolean
            Whether the parameter estimation converged.

        mse : float
            Mean squared error between prior and posterior.

        """

        prior = self.global_prior_[0:self.prior_size]
        posterior = self.global_posterior_[0:self.prior_size]
        mse = mean_squared_error(prior, posterior,
                                 multioutput='uniform_average')
        if mse > self.threshold:
            return False, mse
        else:
            return True, mse
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def _mse_converged(self):
        """Check convergence based on mean squared error

        Returns
        -------

        converged : boolean
            Whether the parameter estimation converged.

        mse : float
            Mean squared error between prior and posterior.

        """

        mse = mean_squared_error(self.local_prior, self.local_posterior_,
                                 multioutput='uniform_average')
        if mse > self.threshold:
            return False, mse
        else:
            return True, mse
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def __init__(self,
                 bclf,
                 clfs,
                 n_folds=3,
                 oob_score_flag=False,
                 oob_metrics=mean_squared_error,
                 Kfold=None,
                 verbose=0,
                 save_stage0=False,
                 save_dir=''):
        self.n_folds = n_folds
        self.clfs = clfs
        self.bclf = bclf
        self.all_learner = OrderedDict()
        self.oob_score_flag = oob_score_flag
        self.oob_metrics = oob_metrics
        self.verbose = verbose
        self.stack_by_proba = False
        self.save_stage0 = save_stage0
        self.save_dir = save_dir
        self.MyKfold = Kfold
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def __init__(self,
                 bclf,
                 clfs,
                 feature_func,
                 n_folds=3,
                 oob_score_flag=False,
                 oob_metrics=mean_squared_error,
                 Kfold=None,
                 verbose=0,
                 save_stage0=False,
                 save_dir=''):
        super(FWLSRegressor, self).__init__(bclf,
                                            clfs,
                                            n_folds,
                                            oob_score_flag,
                                            oob_metrics,
                                            Kfold,
                                            verbose,
                                            save_stage0,
                                            save_dir)

        self.feature_func = feature_func
项目:rtb-unbiased-learning    作者:wnzhang    | 项目源码 | 文件源码
def test():
    y = []
    yp = []
    fi = open(sys.argv[1], 'r')
    for line in fi:
        data = ints(line.replace(":1", "").split())
        clk = data[1]
        mp = data[2]
        fsid = 3 # feature start id
        pred = 0.0
        for i in range(fsid, len(data)):
            feat = data[i]
            if feat in featWeight:
                pred += featWeight[feat]
        pred = sigmoid(pred)
        y.append(clk)
        yp.append(pred)
    fi.close()
    auc = roc_auc_score(y, yp)
    rmse = math.sqrt(mean_squared_error(y, yp))
    print str(round) + '\t' + str(auc) + '\t' + str(rmse)
项目:pythonml    作者:nicholastoddsmith    | 项目源码 | 文件源码
def PlotLearn(R, A, Y):
    intA = [BinVecToInt(j) for j in A]
    intY = [BinVecToInt(j) for j in Y]
    fig, ax = mpl.subplots(figsize=(20, 10))
    ax.plot(intA, intY, label ='Orig')
    l, = ax.plot(intA, intY, label ='Pred')
    ax.legend(loc = 'upper left')
    #Updates the plot in ax as model learns data
    def UpdateF(i):
        R.fit(A, Y)
        YH = R.predict(A)
        S = MSE(Y, YH)
        intYH = [BinVecToInt(j) for j in YH]
        l.set_ydata(intYH)
        ax.set_title('Iteration: ' + str(i * 64) + ' - MSE: ' + str(S))
        return l,

    ani = mpla.FuncAnimation(fig, UpdateF, frames = 2000, interval = 128, repeat = False)
    #ani.save('foo.gif')
    mpl.show()
    return ani
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'max_features': ['sqrt', 'log2', None],
                             'max_depth': range(2,1000),
                             }
                            ]


        reg = GridSearchCV(DecisionTreeRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "MSE for test data set:\n"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_true, y_pred)
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def predict(self):
        # predict the test data
        y_pred1 = self.net1.predict(self.X_test)
        y_pred1 = y_pred1.reshape((y_pred1.shape[0], 1))

        y_pred2 = self.linRegr.predict(self.X_test)
        y_pred2 = y_pred2.reshape((y_pred2.shape[0], 1))

        y_pred3 = self.knn.predict(self.X_test)
        y_pred3 = y_pred3.reshape((y_pred3.shape[0], 1))

        y_pred4 = self.decisionTree.predict(self.X_test)
        y_pred4 = y_pred4.reshape((y_pred4.shape[0], 1))

        y_pred5 = self.adaReg.predict(self.X_test)
        y_pred5 = y_pred5.reshape((y_pred5.shape[0], 1))

        self.y_pred = (y_pred1+y_pred2+y_pred3+y_pred4+y_pred5)/5

        # print MSE
        mse = mean_squared_error(self.y_pred, self.y_test)
        print "MSE: {}".format(mse)
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,100)
                             }
                            ]


        reg = GridSearchCV(neighbors.KNeighborsRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print reg.scorer_

        print "MSE for test data set:"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_pred, y_true)
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'alpha': np.logspace(-5,5)
                             }
                            ]


        reg = GridSearchCV(linear_model.Ridge(alpha = 0.5), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print reg.scorer_

        print "MSE for test data set:"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_pred, y_true)
项目:Hotpot    作者:Liang-Qiu    | 项目源码 | 文件源码
def eval_sts(ycat, y, name, quiet=False):
    """ Evaluate given STS regression-classification predictions and print results. """
    if ycat.ndim == 1:
        ypred = ycat
    else:
        ypred = loader.sts_categorical2labels(ycat)
    if y.ndim == 1:
        ygold = y
    else:
        ygold = loader.sts_categorical2labels(y)
    pr = pearsonr(ypred, ygold)[0]
    sr = spearmanr(ypred, ygold)[0]
    e = mse(ypred, ygold)
    if not quiet:
        print('%s Pearson: %f' % (name, pr,))
        print('%s Spearman: %f' % (name, sr,))
        print('%s MSE: %f' % (name, e,))
    return STSRes(pr, sr, e)
项目:Hotpot    作者:Liang-Qiu    | 项目源码 | 文件源码
def eval_sts(ycat, y, name, quiet=False):
    """ Evaluate given STS regression-classification predictions and print results. """
    if ycat.ndim == 1:
        ypred = ycat
    else:
        ypred = loader.sts_categorical2labels(ycat)
    if y.ndim == 1:
        ygold = y
    else:
        ygold = loader.sts_categorical2labels(y)
    pr = pearsonr(ypred, ygold)[0]
    sr = spearmanr(ypred, ygold)[0]
    e = mse(ypred, ygold)
    if not quiet:
        print('%s Pearson: %f' % (name, pr,))
        print('%s Spearman: %f' % (name, sr,))
        print('%s MSE: %f' % (name, e,))
    return STSRes(pr, sr, e)
项目:elm    作者:ContinuumIO    | 项目源码 | 文件源码
def r_squared_mse(y_true, y_pred, sample_weight=None, multioutput=None):

    r2 = r2_score(y_true, y_pred,
                  sample_weight=sample_weight, multioutput=multioutput)
    mse = mean_squared_error(y_true, y_pred,
                             sample_weight=sample_weight,
                             multioutput=multioutput)
    bounds_check = np.min(y_pred) > MIN_MOISTURE_BOUND
    bounds_check = bounds_check&(np.max(y_pred) < MAX_MOISTURE_BOUND)
    print('Scoring - std', np.std(y_true), np.std(y_pred))
    print('Scoring - median', np.median(y_true), np.median(y_pred))
    print('Scoring - min', np.min(y_true), np.min(y_pred))
    print('Scoring - max', np.max(y_true), np.max(y_pred))
    print('Scoring - mean', np.mean(y_true), np.mean(y_pred))
    print('Scoring - MSE, R2, bounds', mse, r2, bounds_check)
    return (float(mse),
            float(r2),
            int(bounds_check))
项目:f1_2017    作者:aflaisler    | 项目源码 | 文件源码
def fastLapModel(xList, labels, names, multiple=0, full_set=0):
    X = numpy.array(xList)
    y = numpy.array(labels)
    featureNames = []
    featureNames = numpy.array(names)
    # take fixed holdout set 30% of data rows
    xTrain, xTest, yTrain, yTest = train_test_split(
        X, y, test_size=0.30, random_state=531)
    # for final model (no CV)
    if full_set:
        xTrain = X
        yTrain = y
    check_set(xTrain, xTest, yTrain, yTest)
    print "Fitting the model to the data set..."
    # train random forest at a range of ensemble sizes in order to see how the
    # mse changes
    mseOos = []
    m = 10 ** multiple
    nTreeList = range(500 * m, 1000 * m, 100 * m)
    # iTrees = 10000
    for iTrees in nTreeList:
        depth = None
        maxFeat = int(np.sqrt(np.shape(xTrain)[1])) + 1  # try tweaking
        RFmd = ensemble.RandomForestRegressor(n_estimators=iTrees, max_depth=depth, max_features=maxFeat,
                                              oob_score=False, random_state=531, n_jobs=-1)
        # RFmd.n_features = 5
        RFmd.fit(xTrain, yTrain)

        # Accumulate mse on test set
        prediction = RFmd.predict(xTest)
        mseOos.append(mean_squared_error(yTest, prediction))
    # plot training and test errors vs number of trees in ensemble
    plot.plot(nTreeList, mseOos)
    plot.xlabel('Number of Trees in Ensemble')
    plot.ylabel('Mean Squared Error')
    #plot.ylim([0.0, 1.1*max(mseOob)])
    plot.show()
    print("MSE")
    print(mseOos[-1])
    return xTrain, xTest, yTrain, yTest, RFmd
项目:polylearn    作者:scikit-learn-contrib    | 项目源码 | 文件源码
def check_improve(degree):
    y = _lifted_predict(U[:degree], X)

    common_settings = dict(degree=degree, n_components=n_components,
                           beta=1e-10, tol=0, random_state=0)

    est_5 = PolynomialNetworkRegressor(max_iter=5, **common_settings)
    est_10 = PolynomialNetworkRegressor(max_iter=10, **common_settings)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        est_5.fit(X, y)
        est_10.fit(X, y)

    y_pred_5 = est_5.predict(X)
    y_pred_10 = est_10.predict(X)

    assert_less_equal(mean_squared_error(y, y_pred_10),
                      mean_squared_error(y, y_pred_5),
                      msg="More iterations do not improve fit.")
项目:polylearn    作者:scikit-learn-contrib    | 项目源码 | 文件源码
def test_random_starts():
    # not as strong a test as the direct case!
    # using training error here, and a higher threshold.
    # We observe the lifted solver reaches rather diff. solutions.
    degree = 3
    noisy_y = _lifted_predict(U[:degree], X)
    noisy_y += 5. * rng.randn(noisy_y.shape[0])

    common_settings = dict(degree=degree, n_components=n_components,
                           beta=0.01, tol=0.01)
    scores = []
    for k in range(5):
        est = PolynomialNetworkRegressor(random_state=k, **common_settings)
        y_pred = est.fit(X, noisy_y).predict(X)
        scores.append(mean_squared_error(noisy_y, y_pred))

    assert_less_equal(np.std(scores), 1e-4)
项目:polylearn    作者:scikit-learn-contrib    | 项目源码 | 文件源码
def check_improve(degree):
    y = _poly_predict(X, P, lams, kernel="anova", degree=degree)

    est = FactorizationMachineRegressor(degree=degree, n_components=5,
                                        fit_lower=None, fit_linear=False,
                                        beta=0.0001, max_iter=5, tol=0,
                                        random_state=0)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        y_pred_5 = est.fit(X, y).predict(X)
        est.set_params(max_iter=10)
        y_pred_10 = est.fit(X, y).predict(X)

    assert_less_equal(mean_squared_error(y, y_pred_10),
                      mean_squared_error(y, y_pred_5),
                      msg="More iterations do not improve fit.")
项目:polylearn    作者:scikit-learn-contrib    | 项目源码 | 文件源码
def test_random_starts():
    noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=2)
    noisy_y += 5. * rng.randn(noisy_y.shape[0])
    X_train, X_test = X[:10], X[10:]
    y_train, y_test = noisy_y[:10], noisy_y[10:]

    scores = []
    # init_lambdas='ones' is important to reduce variance here
    reg = FactorizationMachineRegressor(degree=2, n_components=n_components,
                                        beta=5, fit_lower=None,
                                        fit_linear=False, max_iter=2000,
                                        init_lambdas='ones', tol=0.001)
    for k in range(10):
        reg.set_params(random_state=k)
        y_pred = reg.fit(X_train, y_train).predict(X_test)
        scores.append(mean_squared_error(y_test, y_pred))

    assert_less_equal(np.std(scores), 0.001)
项目:DeepST    作者:lucktroy    | 项目源码 | 文件源码
def rmse(Y_true, Y_pred):
    # https://www.kaggle.com/wiki/RootMeanSquaredError
    from sklearn.metrics import mean_squared_error
    print('shape:', Y_true.shape, Y_pred.shape)
    print("===RMSE===")
    # in
    RMSE = mean_squared_error(Y_true[:, 0].flatten(), Y_pred[:, 0].flatten())**0.5
    print('inflow: ', RMSE)
    # out
    if Y_true.shape[1] > 1:
        RMSE = mean_squared_error(Y_true[:, 1].flatten(), Y_pred[:, 1].flatten())**0.5
        print('outflow: ', RMSE)
    # new
    if Y_true.shape[1] > 2:
        RMSE = mean_squared_error(Y_true[:, 2].flatten(), Y_pred[:, 2].flatten())**0.5
        print('newflow: ', RMSE)
    # end
    if Y_true.shape[1] > 3:
        RMSE = mean_squared_error(Y_true[:, 3].flatten(), Y_pred[:, 3].flatten())**0.5
        print('endflow: ', RMSE)

    RMSE = mean_squared_error(Y_true.flatten(), Y_pred.flatten())**0.5
    print("total rmse: ", RMSE)
    print("===RMSE===")
    return RMSE
项目:Photometric-Redshifts    作者:martiansideofthemoon    | 项目源码 | 文件源码
def k_vs_rms(START_K, END_K, STEP_K, training_data, labels, test_data, expected_labels, weights='distance'):
    num_points = int((END_K - START_K) / STEP_K) + 1
    points = np.zeros([num_points, 2])
    index = -1
    for K in range(START_K, END_K, STEP_K):
        print "k = " + str(K)
        index += 1
        output = knn_regression(K, training_data, labels, test_data, weights)
        v = np.column_stack((output, expected_labels))
        v = v[~np.isnan(v[:,0]),:]
        RMSE = mean_squared_error(v[:,0], v[:,1])**0.5
        points[index,0] = K
        points[index,1] = RMSE
    if points[-1,0] == 0 and points[-1,1] == 0:
        points = points[:-1,:]
    return points

# Test parameters
项目:EarlyWarning    作者:wjlei1990    | 项目源码 | 文件源码
def train(df_train, df_test):
    train_x, train_y = extract_feature_and_y(df_train)
    print("train x and y shape: {0} and {1}".format(
        train_x.shape, train_y.shape))
    test_x, test_y = extract_feature_and_y(df_test)
    print("test x and y shape: {0} and {1}".format(
        test_x.shape, test_y.shape))

    # print("train x nan:", np.isfinite(train_x).any())
    # print("train y nan:", np.isfinite(train_y).any())
    # print("test x nan:", np.isfinite(test_x).any())

    info = train_ridge_linear_model(train_x, train_y, test_x) 
    #info = train_lasso_model(train_x, train_y, test_x) 
    #info = train_EN_model(train_x, train_y, test_x) 

    _mse = mean_squared_error(test_y, info["y"])
    _std = np.std(test_y - info["y"])
    print("MSE on test data: %f" % _mse)
    print("std of error on test data: %f" % _std)

    plot_y(train_y, info["train_y"], test_y, info["y"])
项目:lstm_stock_prediction    作者:gregorymfoster    | 项目源码 | 文件源码
def train(X_train, y_train):
    model = Sequential()
    model.add(LSTM(
        lstm_neurons,
        batch_input_shape=(batch_size, X_train.shape[1], X_train.shape[2]),
        stateful=True))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    for i in range(epochs):
        print 'batch', i+1
        model.fit(
            X_train,
            y_train,
            epochs=1,
            batch_size=batch_size,
            verbose=2,
            shuffle=False,
            validation_split=0.33)
        model.reset_states()
    return model
项目:The_Ultimate_Student_Hunt    作者:analyticsvidhya    | 项目源码 | 文件源码
def run_model(model,dtrain,predictor_var,target,scoring_method='mean_squared_error'):
    cv_method = KFold(len(dtrain),5)
    cv_scores = cross_val_score(model,dtrain[predictor_var],dtrain[target],cv=cv_method,scoring=scoring_method)
    #print cv_scores, np.mean(cv_scores), np.sqrt((-1)*np.mean(cv_scores))

    dtrain_for_val = dtrain[dtrain['Year']<2000]
    dtest_for_val = dtrain[dtrain['Year']>1999]
    #cv_method = KFold(len(dtrain_for_val),5)
    #cv_scores_2 = cross_val_score(model,dtrain_for_val[predictor_var],dtrain_for_val[target],cv=cv_method,scoring=scoring_method)
    #print cv_scores_2, np.mean(cv_scores_2)

    dtrain_for_val_ini = dtrain_for_val[predictor_var]
    dtest_for_val_ini = dtest_for_val[predictor_var]
    model.fit(dtrain_for_val_ini,dtrain_for_val[target])
    pred_for_val = model.predict(dtest_for_val_ini)

    #print math.sqrt(mean_squared_error(dtest_for_val['Footfall'],pred_for_val))
项目:machine_deeplearning_workbench    作者:chandupydev    | 项目源码 | 文件源码
def arima(series, durations, order):
    X = series.values
    size = int(len(X) * 0.99)
    train, test = X[0:size], X[size:len(X)]
    history = [x for x in train]
    predictions = list()
    for t in range(len(test)):
        model = ARIMA(history, order=(5,1,0))
        model_fit = model.fit(disp=0)
        output = model_fit.forecast()
        yhat = output[0]
        predictions.append(yhat)
        obs = test[t]
        history.append(obs)
        print('predicted=%f, expected=%f' % (yhat, obs))
    error = mean_squared_error(test, predictions)
    print('Test MSE: %.3f' % error)
    return predictions 

# plot
项目:Waskom_PNAS_2017    作者:WagnerLabPapers    | 项目源码 | 文件源码
def prediction_curve(dmat, vals, steps, radius):
    """Return MSE from predicting values from neighbors at radial steps."""
    # Set null distances (greater than some threshold) to 0.
    # Not in general a great idea, but fine here because we don't
    # do anything with identity edges, and sums will be faster
    # if we don't have to worry about nans
    dmat = np.nan_to_num(dmat)

    error_vals = []
    for step in steps:
        neighbors = (np.abs(dmat - step) < radius).astype(np.float)
        neighbors /= neighbors.sum(axis=1, keepdims=True)
        predicted = neighbors.dot(vals)
        m = ~np.isnan(predicted)
        error_vals.append(mean_squared_error(vals[m], predicted[m]))
    return np.array(error_vals)
项目:pub    作者:drcannady    | 项目源码 | 文件源码
def _raw_rank(self, x, y, network):
        impt = np.zeros(x.shape[1])

        for i in range(x.shape[1]):
            hold = np.array(x[:, i])
            np.random.shuffle(x[:, i])

            # Handle both TensorFlow and SK-Learn models.
            if 'tensorflow' in str(type(network)).lower():
                pred = list(network.predict(x, as_iterable=True))
            else:
                pred = network.predict(x)

            rmse = metrics.mean_squared_error(y, pred)
            impt[i] = rmse
            x[:, i] = hold

        return impt
项目:snape    作者:mbernico    | 项目源码 | 文件源码
def score_regression(y, y_hat, report=True):
    """
    Create regression score
    :param y:
    :param y_hat:
    :return:
    """
    r2 = r2_score(y, y_hat)
    rmse = sqrt(mean_squared_error(y, y_hat))
    mae = mean_absolute_error(y, y_hat)

    report_string = "---Regression Score--- \n"
    report_string += "R2 = " + str(r2) + "\n"
    report_string += "RMSE = " + str(rmse) + "\n"
    report_string += "MAE = " + str(mae) + "\n"

    if report:
        print(report_string)

    return mae, report_string
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def _cross_val_score_loo_r0( lm, X, y):
    """
    mean_square_error metric is used from sklearn.metric.

    Return 
    --------
    The mean squared error values are returned. 
    """

    if len( y.shape) == 1:
        y = np.array( [y]).T

    kf = cross_validation.LeaveOneOut( y.shape[0])
    score_l = list()
    for tr, te in kf:
        lm.fit( X[tr,:], y[tr,:])
        yp = lm.predict( X[te, :])
        score_l.append( metrics.mean_squared_error( y[te,:], yp))

    return score_l
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gs_Ridge(xM, yV, alphas_log=(1, -1, 9), n_folds=5, n_jobs=-1, scoring='r2'):
    """
    Parameters
    -------------
    scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2
    """
    print('If scoring is not r2 but error metric, output score is revered for scoring!')
    print(xM.shape, yV.shape)

    clf = linear_model.Ridge()
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    parmas = {'alpha': np.logspace(*alphas_log)}
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(xM)
    gs = model_selection.GridSearchCV(
        clf, parmas, scoring=scoring, cv=kf_n, n_jobs=n_jobs)

    gs.fit(xM, yV)

    return gs
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gs_Ridge( xM, yV, alphas_log = (1, -1, 9), n_folds = 5, n_jobs = -1, scoring = 'r2'):
    """
    Parameters
    -------------
    scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2
    """
    print(xM.shape, yV.shape)

    clf = linear_model.Ridge()
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    parmas = {'alpha': np.logspace( *alphas_log)}
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
    gs = grid_search.GridSearchCV( clf, parmas, scoring = scoring, cv = kf_n, n_jobs = n_jobs)

    gs.fit( xM, yV)

    return gs
项目:analyzefit    作者:wsmorgan    | 项目源码 | 文件源码
def test_Validate():
    """Test that Validate function works correctly"""

    accuracy = an.validate(testing=True)

    val = mean_squared_error(y, slr.predict(X))

    assert np.allclose(accuracy,val)

    accuracy = an.validate(testing=True, X=X, y=y, metric=mean_squared_error)

    assert np.allclose(accuracy,val)

    accuracy = an.validate(testing=True, metric=[mean_squared_error, r2_score])
    val = [mean_squared_error(y, slr.predict(X)), r2_score(y, slr.predict(X))]

    assert np.allclose(accuracy,val)

    with pytest.raises(ValueError):
        an.validate(X=[1,2,3])
项目:stacking    作者:ikki407    | 项目源码 | 文件源码
def eval_pred( y_true, y_pred, eval_type):
    if eval_type == 'logloss':#eval_type??????
        loss = ll( y_true, y_pred )
        print "logloss: ", loss
        return loss            

    elif eval_type == 'auc':
        loss = AUC( y_true, y_pred )
        print "AUC: ", loss
        return loss             

    elif eval_type == 'rmse':
        loss = np.sqrt(mean_squared_error(y_true, y_pred))
        print "rmse: ", loss
        return loss




######### BaseModel Class #########
项目:WaNN    作者:TeoZosa    | 项目源码 | 文件源码
def K_FoldValidation(estimator, XMatrix, yVector, numFolds):
    numTrainingExamples = len(XMatrix)
    K = numFolds
    if K < 2:
        print("Error, K must be greater than or equal to 2")
        exit(-10)
    elif K > numTrainingExamples:
        print("Error, K must be less than or equal to the number of training examples")
        exit(-11)
    K_folds = model_selection.KFold(numTrainingExamples, K)

    for k, (train_index, test_index) in enumerate(K_folds):
        X_train, X_test = XMatrix[train_index], XMatrix[test_index]
        y_train, y_test = yVector[train_index], yVector[test_index]
        # Fit
        estimator.fit(X_train, y_train, logdir='')

        # Predict and score
        score = metrics.mean_squared_error(estimator.predict(X_test), y_test)
        print('Iteration {0:f} MSE: {1:f}'.format(k+1, score))
项目:orange3-recommendation    作者:biolab    | 项目源码 | 文件源码
def test_input_data_continuous(self, learner, filename):
        # Load data
        data = Orange.data.Table(filename)

        # Train recommender
        recommender = learner(data)

        print(str(recommender) + ' trained')

        # Compute predictions
        y_pred = recommender(data)

        # Compute RMSE
        rmse = math.sqrt(mean_squared_error(data.Y, y_pred))
        print('-> RMSE (input data; continuous): %.3f' % rmse)

        # Check correctness
        self.assertGreaterEqual(rmse, 0)
项目:orange3-recommendation    作者:biolab    | 项目源码 | 文件源码
def test_input_data_discrete(self, learner, filename):
        # Load data
        data = Orange.data.Table(filename)

        # Train recommender
        recommender = learner(data)
        print(str(recommender) + ' trained')

        # Compute predictions
        y_pred = recommender(data)

        # Compute RMSE
        rmse = math.sqrt(mean_squared_error(data.Y, y_pred))
        print('-> RMSE (input data; discrete): %.3f' % rmse)

        # Check correctness
        self.assertGreaterEqual(rmse, 0)
项目:HousePricePredictionKaggle    作者:Nuwantha    | 项目源码 | 文件源码
def mean_squared_error_(ground_truth, predictions):
    return mean_squared_error(ground_truth, predictions) ** 0.5
项目:HousePricePredictionKaggle    作者:Nuwantha    | 项目源码 | 文件源码
def mean_squared_error_(ground_truth, predictions):
    return mean_squared_error(ground_truth, predictions) ** 0.5
项目:HousePricePredictionKaggle    作者:Nuwantha    | 项目源码 | 文件源码
def mean_squared_error_(ground_truth, predictions):
    return mean_squared_error(ground_truth, predictions) ** 0.5
项目:hyperband    作者:zygmuntz    | 项目源码 | 文件源码
def train_and_eval_sklearn_regressor( clf, data ):

    x_train = data['x_train']
    y_train = data['y_train']

    x_test = data['x_test']
    y_test = data['y_test'] 

    clf.fit( x_train, y_train ) 
    p = clf.predict( x_train )

    mse = MSE( y_train, p )
    rmse = sqrt( mse )
    mae = MAE( y_train, p )


    print "\n# training | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae )

    #

    p = clf.predict( x_test )

    mse = MSE( y_test, p )
    rmse = sqrt( mse )
    mae = MAE( y_test, p )

    print "# testing  | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae )  

    return { 'loss': rmse, 'rmse': rmse, 'mae': mae }