Python sklearn.preprocessing 模块,PolynomialFeatures() 实例源码

我们从Python开源项目中,提取了以下26个代码示例,用于说明如何使用sklearn.preprocessing.PolynomialFeatures()

项目:MLBDailyProjections    作者:brendanahart    | 项目源码 | 文件源码
def mapFeatures(X):
    '''
    MAPFEATURE Feature mapping function to polynomial features
    MAPFEATURE(X1, X2) maps the two input features
    to quadratic features used in the regularization exercise.
    Returns a new feature array with more features, comprising of
    X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc..
    Inputs X1, X2 must be the same size

    :param X:
    :return: XTransform
    '''

    degree = 4

    poly = PolynomialFeatures(degree)
    XTransform = poly.fit_transform(X)

    return XTransform
项目:sparsereg    作者:Ohjeah    | 项目源码 | 文件源码
def fit(self, x, y=None):
        if y is not None:
            xdot = y
        else:
            xdot = self.derivative.transform(x)

        if self.operators is not None:
            feature_transformer = SymbolicFeatures(exponents=np.linspace(1, self.degree, self.degree), operators=self.operators)
        else:
            feature_transformer = PolynomialFeatures(degree=self.degree, include_bias=False)

        steps = [("features", feature_transformer),
                 ("model", STRidge(alpha=self.alpha, threshold=self.threshold, **self.kw))]
        self.model = MultiOutputRegressor(Pipeline(steps), n_jobs=self.n_jobs)
        self.model.fit(x, xdot)

        self.n_input_features_ = self.model.estimators_[0].steps[0][1].n_input_features_
        self.n_output_features_ = self.model.estimators_[0].steps[0][1].n_output_features_
        return self
项目:microbiome-summer-school-2017    作者:aldro61    | 项目源码 | 文件源码
def fit_linear_regression(X, y, degree):
    return Pipeline([("polynomial_features", PolynomialFeatures(degree=degree,
                                                                include_bias=False)),
                     ("linear_regression", LinearRegression())]
                    ).fit(X, y)
项目:orange3-educational    作者:biolab    | 项目源码 | 文件源码
def send_data(self):
        if self.data is not None:
            attributes = self.x_var_model[self.x_var_index]
            class_var = self.y_var_model[self.y_var_index]

            data_table = Table(
                Domain([attributes], class_vars=[class_var]), self.data)
            polyfeatures = skl_preprocessing.PolynomialFeatures(
                int(self.polynomialexpansion))

            valid_mask = ~np.isnan(data_table.X).any(axis=1)
            x = data_table.X[valid_mask]
            x = polyfeatures.fit_transform(x)
            x_label = data_table.domain.attributes[0].name

            out_array = np.concatenate((x, data_table.Y[np.newaxis].T[valid_mask]), axis=1)

            out_domain = Domain(
                [ContinuousVariable("1")] + ([data_table.domain.attributes[0]]
                                             if self.polynomialexpansion > 0
                                             else []) +
                [ContinuousVariable("{}^{}".format(x_label, i))
                 for i in range(2, int(self.polynomialexpansion) + 1)], class_vars=[class_var])

            self.Outputs.data.send(Table(out_domain, out_array))
            return

        self.Outputs.data.send(None)
项目:AlphaPy    作者:ScottFreeLLC    | 项目源码 | 文件源码
def get_polynomials(features, poly_degree):
    r"""Generate interactions that are products of distinct features.

    Parameters
    ----------
    features : pandas.DataFrame
        Dataframe containing the features for generating interactions.
    poly_degree : int
        The degree of the polynomial features.

    Returns
    -------
    poly_features : numpy array
        The interaction features only.

    References
    ----------
    You can find more information on polynomial interactions here [POLY]_.

    .. [POLY] http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html

    """
    polyf = PolynomialFeatures(interaction_only=True,
                               degree=poly_degree,
                               include_bias=False)
    poly_features = polyf.fit_transform(features)
    return poly_features


#
# Function get_text_features
#
项目:time_series_modeling    作者:rheineke    | 项目源码 | 文件源码
def scaled_pipelines():
    # Model parameters
    # RANSAC parameters
    # 500 max trials takes 90s
    ransac_kwargs = {
        'max_trials': 1000,
        'min_samples': 5000,
        'loss': 'absolute_loss',
        'residual_threshold': 2.0,
        'random_state': _RANDOM_STATE,
    }
    # Ridge CV parameters
    alphas = [.01, .1, 1, 10]
    # Model instances
    model_steps = [
        LinearRegression(),
        # [PolynomialFeatures(degree=2), LinearRegression()],
        # [PolynomialFeatures(degree=3), LinearRegression()],
        # RANSACRegressor(base_estimator=LinearRegression(), **ransac_kwargs),
        # RANSACRegressor with polynomial regression?
        # RidgeCV(alphas=alphas),
        # LassoCV(),  # Alphas set automatically by default
        # ElasticNetCV(l1_ratio=0.5),  # Same as default
        # [PolynomialFeatures(degree=2), ElasticNetCV(l1_ratio=0.5)],
        # SGDRegressor(),
    ]
    # Pipelines
    pipelines = []
    for m in model_steps:
        # Steps
        common_steps = [
            StandardScaler(),
            PCA(**_PCA_KWARGS)
        ]
        model_steps = m if isinstance(m, list) else [m]
        steps = common_steps + model_steps
        pipelines.append(make_pipeline(*steps))
    return pipelines
项目:mlprojects-py    作者:srinathperera    | 项目源码 | 文件源码
def get_models4ensamble(conf):
    models = []
    #models = [RFRModel(conf), DLModel(conf), LRModel(conf)]
    #models = [LRModel(conf)]
    # see http://scikit-learn.org/stable/modules/linear_model.html

    #0 was too big to run with depth set to 1, and 1 was overfitting a bit

    if conf.command == 1:
        xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":3, "eta":0.1, "min_child_weight":5,
            "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}
    else:
        xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
            "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}

    #xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
    #    "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}

    models = [
                #DLModel(conf),

                #LRModel(conf, model=linear_model.BayesianRidge()),
                #LRModel(conf, model=linear_model.LassoLars(alpha=.1)),
                #LRModel(conf, model=linear_model.Lasso(alpha = 0.1)),
                #LRModel(conf, model=Pipeline([('poly', PolynomialFeatures(degree=3)),
                #LRModel(conf, model=linear_model.Ridge (alpha = .5))
                #   ('linear', LinearRegression(fit_intercept=False))])),
                XGBoostModel(conf, xgb_params, use_cv=True),
                LRModel(conf, model=linear_model.Lasso(alpha = 0.3)),
                RFRModel(conf, RandomForestRegressor(oob_score=True, n_jobs=4)),
                #LRModel(conf, model=linear_model.Lasso(alpha = 0.2)),
                ETRModel(conf, model=ExtraTreesRegressor(n_jobs=4)),
                #AdaBoostRModel(conf, model=AdaBoostRegressor(loss='square'))
              ]
    return models
    #return [XGBoostModel(conf, xgb_params, use_cv=True)]
项目:simple-linear-regression    作者:williamd4112    | 项目源码 | 文件源码
def polynomial(self, X, deg=1):
        return PolynomialFeatures(deg).fit_transform(X)
项目:SINDy    作者:loiseaujc    | 项目源码 | 文件源码
def Identified_Model(y, t, library, estimator) :

    '''
    Simulates the model from Sparse identification.

    Inputs
    ------

    library: library object used in the sparse identification
             (e.g. poly_lib = PolynomialFeatures(degree=3) )

    estimator: estimator object obtained from the sparse identification

    Output
    ------

    dy : numpy array object containing the derivatives evaluated using the
         model identified from sparse regression.

    '''

    dy = np.zeros_like(y)

    lib = library.fit_transform(y.reshape(1,-1))
    Theta = block_diag(lib, lib, lib)
    dy = Theta.dot(estimator.coef_)

    return dy
项目:fuku-ml    作者:fukuball    | 项目源码 | 文件源码
def feature_transform(X, mode='polynomial', degree=1):

        poly = PolynomialFeatures(degree)
        process_X = poly.fit_transform(X)

        if mode == 'legendre':
            lege = legendre(degree)
            process_X = lege(process_X)

        return process_X
项目:DeepIV    作者:jhartford    | 项目源码 | 文件源码
def fit_twosls(x, z, t, y):
    '''
    Two stage least squares with polynomial basis function.
    '''
    params = dict(poly__degree=range(1,4),
                  ridge__alpha=np.logspace(-5, 5, 11))
    pipe = Pipeline([('poly', PolynomialFeatures()),
                        ('ridge', Ridge())])
    stage_1 = GridSearchCV(pipe, param_grid=params, cv=5)
    if z.shape[1] > 0:
        X = np.concatenate([x,z], axis=1)
    else:
        X = z
    stage_1.fit(X,t)
    t_hat = stage_1.predict(X)
    print("First stage paramers: " + str(stage_1.best_params_ ))

    pipe2 = Pipeline([('poly', PolynomialFeatures()),
                        ('ridge', Ridge())])
    stage_2 = GridSearchCV(pipe2, param_grid=params, cv=5)
    X2 = np.concatenate([x,t_hat], axis=1)
    stage_2.fit(X2, y)
    print("Best in sample score: %f" % stage_2.score(X2, y))
    print("Second stage paramers: " + str(stage_2.best_params_  ))

    def g_hat(x,z,t):
        X_new = np.concatenate([x, t], axis=1)
        return stage_2.predict(X_new)
    return g_hat
项目:Yukiwian    作者:kiwiloveskiwis    | 项目源码 | 文件源码
def measure(y):
    x=np.linspace(1,183,183)
    y_ex=[]
    y_ex=np.array(y_ex)

    pred=Pipeline([('poly',PolynomialFeatures(10)),
                   ('linear',LinearRegression(fit_intercept=False))])
    pred.fit(x[:,np.newaxis],y) 
    y_ex=pred.predict(x[:,np.newaxis])

    t=comp(y_ex,y)
        return t
项目:Kaggle_HomeDepot    作者:ChenglongChen    | 项目源码 | 文件源码
def fit(self, X, y):
        sdim, fdim = X.shape
        for i in range(self.n_estimators):
            ridge = Ridge(alpha=self.alpha, normalize=self.normalize, random_state=self.random_state)
            fidx = self._random_feature_idx(fdim, self.random_state+i*100)
            sidx = self._random_sample_idx(sdim, self.random_state+i*10)
            X_tmp = X[sidx][:,fidx]
            if self.poly:
                X_tmp = PolynomialFeatures(degree=2).fit_transform(X_tmp)[:,1:]
            ridge.fit(X_tmp, y[sidx])
            self.ridge_list[i] = ridge
            self.feature_idx_list[i] = fidx
        return self
项目:Kaggle_HomeDepot    作者:ChenglongChen    | 项目源码 | 文件源码
def predict(self, X):
        y_pred = np.zeros((X.shape[0], self.n_estimators))
        for i in range(self.n_estimators):
            fidx = self.feature_idx_list[i]
            ridge = self.ridge_list[i]
            X_tmp = X[:,fidx]
            if self.poly:
                X_tmp = PolynomialFeatures(degree=2).fit_transform(X_tmp)[:,1:]
            y_pred[:,i] = ridge.predict(X_tmp)
        y_pred = np.mean(y_pred, axis=1)
        return y_pred
项目:mlbootcamp_5    作者:ivan-filonov    | 项目源码 | 文件源码
def gen_features(train, y, test):
    ntrain = len(train)
    df_all = pd.concat([train, test])

    poly = preprocessing.PolynomialFeatures(degree=3)
    dpoly = poly.fit_transform(df_all)

    df_all['ap_diff'] = df_all.ap_hi - df_all.ap_lo

    h = df_all['height'] / 100
    df_all['BWI'] = df_all['weight'] / (h * h)
    df_all['bad_bwi'] = (df_all.BWI > 60).values * 1 + (df_all.BWI < 10).values * 1

    df_all['bad_height'] = (df_all.height < 130).values * 1

    df_all['bad_weight'] = (df_all.weight + 120 < df_all.height).values * 1

    df_all['bad_ap_hi'] = 0
    df_all.ix[(df_all.ap_hi < 80).values + (df_all.ap_hi > 220).values, 'bad_ap_hi'] = 1

    df_all['bad_ap_lo'] = 0
    df_all.ix[(df_all.ap_lo < 40).values + (df_all.ap_lo > 200).values, 'bad_ap_lo'] = 1

    df_all['has_bad_data'] = (df_all.bad_bwi + df_all.bad_height + df_all.bad_weight + df_all.bad_ap_hi + df_all.bad_ap_lo) > 0

    return df_all[:ntrain].reindex(), y, df_all[ntrain:].reindex()
项目:aq_weather    作者:eliucidate    | 项目源码 | 文件源码
def multireg(self,Xtrain,ytrain, Xtest, ytest):    
                self.normalize(Xtrain)
                '''
                # polynomial try
                poly = PolynomialFeatures(degree=2)
                Xtrain = poly.fit_transform(Xtrain)
                Xtest = poly.fit_transform(Xtest)
                '''
                # normal clf fit
                clf = linear_model.LinearRegression()
                clf.fit (Xtrain, ytrain)
                coeffients = clf.coef_
                print "coefficients:", coeffients
                print "intercept:", clf.intercept_

                print "train score", clf.score(Xtrain,ytrain)
                print "test score", clf.score(Xtest,ytest)
                # manual calculate train accuracy
                train_results = clf.predict(Xtrain)
                print "first x:", Xtrain[0]
                print "first result:", train_results[0]
                correct = 0
                for i in range(len(train_results)):
                    if round(train_results[i], 1) == round(ytrain[i], 1):
                        correct += 1
                accuracy = correct * 1.0 / len(ytrain)
                print "train accuracy: ", accuracy * 100, "%"
                # cross validation
                score = cross_validation.cross_val_score(clf, Xtrain, ytrain, scoring='mean_squared_error', cv = 5)
                print "cross validation score: ", score

                predict = cross_val_predict(clf, Xtrain, ytrain, cv = 5)
                correct = 0
                for i in range(len(predict)):
                    if round(predict[i], 1) == round(ytrain[i], 1):
                        correct += 1
                accuracy = correct * 1.0 / len(ytrain)
                print "cross validation accuracy: ", accuracy * 100, "%"
                # manual calculate test accuracy
                self.normalize(Xtest)
                results = clf.predict(Xtest)
                correct = 0
                for i in range(len(results)):
                    if round(results[i], 1) == round(ytest[i], 1):
                        correct += 1
                accuracy = correct * 1.0 / len(ytest)
                print "test accuracy: ", accuracy * 100, "%"

                return coeffients
项目:reinforcement-learning-market-microstructure    作者:jacobkahn    | 项目源码 | 文件源码
def __init__(self, T, L, backup):
        self.backup = backup
        self.T = T
        self.L = L
        self.pre_process = PolynomialFeatures(degree=2, include_bias=False)
        if self.backup['name'] == 'sampling':
            self.Q = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False)
        elif self.backup['name'] == 'doubleQ':
            self.Q_1 = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False)
            self.Q_2 = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False)
        elif self.backup['name'] == 'replay buffer':
            self.Q = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False)
            self.buff = []
        else:
            print "Illegal Backup Type"
项目:Black-Swan    作者:12190143    | 项目源码 | 文件源码
def transform_pf(data, degree=2):
    PF = PolynomialFeatures(degree=degree)
    pf = PF.fit_transform(data)
    # print pf.shape
    return pf


# ????????  max min sum std mean median
项目:AliMusicTrendPredict    作者:strint    | 项目源码 | 文件源码
def fitJA(j, start_date_rank):
    pltf.clf()
    p = artists_play_inday[j]
    p = p[start_date_rank:]
    print p
    apcount = [0] * (183 - start_date_rank)
    apdate = range(start_date_rank, 183)
    for i in p:
        apcount[i[1] - start_date_rank] = i[0]

    print apcount

    d_train = np.asarray(apdate)
    c_train = np.asarray(apcount)

    # create matrix versions of these arrays
    D_train = d_train[:, np.newaxis]
    d_test_plot = np.asarray(range(start_date_rank, 244))
    D_test_plot = d_test_plot[:, np.newaxis]

    pltf.scatter(d_train, c_train, label="training points")

    for degree in [1,2,3]:
        model = make_pipeline(PolynomialFeatures(degree), Ridge())
        model.fit(D_train, c_train)
        c_test_plot = model.predict(D_test_plot)
        pltf.plot(d_test_plot, c_test_plot, label="degree %d" % degree)

    pltf.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=5, mode="expand", borderaxespad=0.)
    pltf.show()
项目:AliMusicTrendPredict    作者:strint    | 项目源码 | 文件源码
def pred(degree):
    predict_file_path = "./data/mars_tianchi_artist_plays_predict.csv"
    fp = open(predict_file_path, 'wb')
    fpwriter = csv.writer(fp, delimiter=',', quotechar='"', quoting=csv.QUOTE_NONE)
    for j in range(0, 50):
        p = artists_play_inday[j]
        apcount = [0] * 184
        apdate = range(0, 184)
        for i in p:
            apcount[i[1]] = i[0]

        x = np.asarray(apdate)
        X = x[:, np.newaxis]
        y = np.asarray(apcount)

        x_future = np.asarray(range(184, 245))
        X_future = x_future[:, np.newaxis]

        model = make_pipeline(PolynomialFeatures(degree), Ridge())
        model.fit(X, y)
        y_future = model.predict(X_future)

        artist_id = artists_rank_to_id[j]
        for idx in range(0, 61):
            date = rank_to_date[x_future[idx]]
            play_num = int(math.ceil(y_future[idx]))
            if play_num < 0:
                play_num = 0
            row = [artist_id, play_num, date]
            print row
            fpwriter.writerow(row)

    fp.close()
项目:AliMusicTrendPredict    作者:strint    | 项目源码 | 文件源码
def pred(degree):
    predict_file_path = "./data/mars_tianchi_artist_plays_predict.csv"
    fp = open(predict_file_path, 'wb')
    fpwriter = csv.writer(fp, delimiter=',', quotechar='"', quoting=csv.QUOTE_NONE)
    for j in range(0, 50):
        p = artists_play_inday[j]
        apcount = [0] * 184
        apdate = range(0, 184)
        for i in p:
            apcount[i[1]] = i[0]

        x = np.asarray(apdate)
        X = x[:, np.newaxis]
        y = np.asarray(apcount)

        x_future = np.asarray(range(184, 245))
        X_future = x_future[:, np.newaxis]

        model = make_pipeline(PolynomialFeatures(degree), Ridge())
        model.fit(X, y)
        y_future = model.predict(X_future)

        artist_id = artists_rank_to_id[j]
        for idx in range(0, 61):
            date = rank_to_date[x_future[idx]]
            play_num = int(math.ceil(y_future[idx]))
            if play_num < 0:
                play_num = 0
            row = [artist_id, play_num, date]
            print row
            fpwriter.writerow(row)

    fp.close()
项目:AliMusicTrendPredict    作者:strint    | 项目源码 | 文件源码
def predDegs(degree, start_date_rank_list):
    predict_file_path = "./data/mars_tianchi_artist_plays_predict.csv"
    fp = open(predict_file_path, 'wb')
    fpwriter = csv.writer(fp, delimiter=',', quotechar='"', quoting=csv.QUOTE_NONE)
    for j in range(0, 50):
        start_date_rank = start_date_rank_list[j]
        p = artists_play_inday[j]
        p = p[start_date_rank:]
        apcount = [0] * (183 - start_date_rank)
        apdate = range(start_date_rank, 183)
        for i in p:
            apcount[i[1] - start_date_rank] = i[0]

        d_train = np.asarray(apdate)
        c_train = np.asarray(apcount)

        # create matrix versions of these arrays
        D_train = d_train[:, np.newaxis]

        d_future = np.asarray(range(184, 244))
        D_future = d_future[:, np.newaxis]

        model = make_pipeline(PolynomialFeatures(degree[j]), Ridge())
        model.fit(D_train, c_train)
        c_future = model.predict(D_future)

        artist_id = artists_rank_to_id[j]
        for idx in range(0, 60):
            date = rank_to_date[d_future[idx]]
            play_num = int(math.ceil(c_future[idx]))
            if play_num < 0:
                play_num = 0
            row = [artist_id, play_num, date]
            print row
            fpwriter.writerow(row)

    fp.close()
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def BasicFactorRegress(inputs, window_length, mask, n_fwd_days, algo_mode=None, cross=True):
    class BasicFactorRegress(CustomFactor):
        # params = {'trigger_date': None, }
        init = False

        def __shift_mask_data(self, X, Y, n_fwd_days=1):
            # Shift X to match factors at t to returns at t+n_fwd_days (we want to predict future returns after all)
            shifted_X = np.roll(X, n_fwd_days, axis=0)
            # Slice off rolled elements
            X = shifted_X[n_fwd_days:]
            Y = Y[n_fwd_days:]
            n_time, n_stocks, n_factors = X.shape
            # Flatten X
            X = X.reshape((n_time * n_stocks, n_factors))
            Y = Y.reshape((n_time * n_stocks))
            return X, Y

        def __get_last_values(self, input_data):
            last_values = []
            for dataset in input_data:
                last_values.append(dataset[-1])
            return np.vstack(last_values).T

        def compute(self, today, assets, out, returns, *inputs):
            if (not self.init):
                self.clf = algo_mode
                X = np.dstack(inputs)  # (time, stocks, factors)  ??????

                Y = returns  # (time, stocks)
                X, Y = self.__shift_mask_data(X, Y, n_fwd_days)  # n????????1???- ??factor ????
                X = np.nan_to_num(X)
                Y = np.nan_to_num(Y)
                if cross == True:
                    quadratic_featurizer = PolynomialFeatures(interaction_only=True)
                    X = quadratic_featurizer.fit_transform(X)

                self.clf.fit(X, Y)
                # self.init = True
            last_factor_values = self.__get_last_values(inputs)
            last_factor_values = np.nan_to_num(last_factor_values)

            out[:] = self.clf.predict(last_factor_values)

    return BasicFactorRegress(inputs=inputs, window_length=window_length, mask=mask)
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def BasicFactorRegress(inputs, window_length, mask, n_fwd_days, algo_mode=None, cross=True):
    class BasicFactorRegress(CustomFactor):
        # params = {'trigger_date': None, }
        init = False

        def __shift_mask_data(self, X, Y, n_fwd_days=1):
            # Shift X to match factors at t to returns at t+n_fwd_days (we want to predict future returns after all)
            shifted_X = np.roll(X, n_fwd_days, axis=0)
            # Slice off rolled elements
            X = shifted_X[n_fwd_days:]
            Y = Y[n_fwd_days:]
            n_time, n_stocks, n_factors = X.shape
            # Flatten X
            X = X.reshape((n_time * n_stocks, n_factors))
            Y = Y.reshape((n_time * n_stocks))
            return X, Y

        def __get_last_values(self, input_data):
            last_values = []
            for dataset in input_data:
                last_values.append(dataset[-1])
            return np.vstack(last_values).T

        def compute(self, today, assets, out, returns, *inputs):
            if (not self.init):
                self.clf = algo_mode
                X = np.dstack(inputs)  # (time, stocks, factors)  ??????
                Y = returns  # (time, stocks)
                X, Y = self.__shift_mask_data(X, Y, n_fwd_days)  # n????????1???- ??factor ????
                X = np.nan_to_num(X)
                Y = np.nan_to_num(Y)
                if cross == True:
                    quadratic_featurizer = PolynomialFeatures(interaction_only=True)
                    X = quadratic_featurizer.fit_transform(X)

                self.clf.fit(X, Y)
                # self.init = True
            last_factor_values = self.__get_last_values(inputs)
            last_factor_values = np.nan_to_num(last_factor_values)

            out[:] = self.clf.predict(last_factor_values)

    return BasicFactorRegress(inputs=inputs, window_length=window_length, mask=mask)
项目:aq_weather    作者:eliucidate    | 项目源码 | 文件源码
def ridge_multireg(self,Xtrain,ytrain, Xtest, ytest):    
                self.normalize(Xtrain)
                '''
                # polynomial try
                poly = PolynomialFeatures(degree=2)
                Xtrain = poly.fit_transform(Xtrain)
                Xtest = poly.fit_transform(Xtest)
                '''
                # normal clf try
                clf = linear_model.Ridge(alpha = 10000)
                clf.fit (Xtrain, ytrain)
                coeffients = clf.coef_
                print "train score", clf.score(Xtrain,ytrain)
                print "test score", clf.score(Xtest,ytest)
                # manual calculate train accuracy
                train_results = clf.predict(Xtrain)
                correct = 0
                for i in range(len(train_results)):
                    if round(train_results[i], 1) == round(ytrain[i], 1):
                        correct += 1
                accuracy = correct * 1.0 / len(ytrain)
                print "train accuracy: ", accuracy * 100, "%"
                # cross validation
                score = cross_validation.cross_val_score(clf, Xtrain, ytrain, scoring='mean_squared_error', cv = 5)
                print "cross validation score: ", score
                '''
                predict = cross_val_predict(clf, Xtrain, ytrain, cv = 5)
                correct = 0
                for i in range(len(predict)):
                    if round(predict[i]) == round(ytrain[i]):
                        correct += 1
                accuracy = correct * 1.0 / len(ytrain)
                print "cross validation accuracy: ", accuracy * 100, "%"
                '''
                # manual calculate test accuracy
                self.normalize(Xtest)
                results = clf.predict(Xtest)
                correct = 0
                for i in range(len(results)):
                    if round(results[i], 1) == round(ytest[i], 1):
                        correct += 1
                accuracy = correct * 1.0 / len(ytest)
                print "test accuracy: ", accuracy * 100, "%"

                return coeffients
项目:AliMusicTrendPredict    作者:strint    | 项目源码 | 文件源码
def test(degree):
    error_rate_of_artist = []
    weight_of_artist = []
    f_of_artist = []
    F = 0.0
    for j in range(0, 50):
        p = artists_play_inday[j]
        apcount = [0] * 184
        apdate = range(0, 184)
        for i in p:
            apcount[i[1]] = i[0]

        x = np.asarray(apdate[:122])
        x_test = np.asarray(apdate[122:])
        X = x[:, np.newaxis]
        y = np.asarray(apcount[:122])
        y_test_true = np.asarray(apcount[122:])

        X_test = x_test[:, np.newaxis]

        model = make_pipeline(PolynomialFeatures(degree), Ridge())
        model.fit(X, y)
        y_test_pred = model.predict(X_test)

        error_rate_pow2_sum = 0.0
        weight = 0.0
        for idx in range(0, len(x_test)):
            y_true = y_test_true[idx]
            if y_true == 0:
                y_true = 1 # deal with divide by zero

            error_rate_pow2_sum += (float((int(math.ceil(y_test_pred[idx])) - y_true)) / float(y_true) )**2
            weight += y_test_true[idx]

        error_rate_j = math.sqrt(error_rate_pow2_sum / float(len(x_test)))
        error_rate_of_artist.append(error_rate_j)
        weight_j = math.sqrt(weight)
        weight_of_artist.append(weight_j)
        f_j = (1 - error_rate_j) * weight_j
        f_of_artist.append(f_j)
        F += f_j

    print 'degree', degree
    print 'error_rate_of_artist', error_rate_of_artist
    print 'weight_of_artist', weight_of_artist
    print 'f_of_artist', f_of_artist
    print 'F', F