我们从Python开源项目中,提取了以下26个代码示例,用于说明如何使用sklearn.preprocessing.PolynomialFeatures()。
def mapFeatures(X): ''' MAPFEATURE Feature mapping function to polynomial features MAPFEATURE(X1, X2) maps the two input features to quadratic features used in the regularization exercise. Returns a new feature array with more features, comprising of X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc.. Inputs X1, X2 must be the same size :param X: :return: XTransform ''' degree = 4 poly = PolynomialFeatures(degree) XTransform = poly.fit_transform(X) return XTransform
def fit(self, x, y=None): if y is not None: xdot = y else: xdot = self.derivative.transform(x) if self.operators is not None: feature_transformer = SymbolicFeatures(exponents=np.linspace(1, self.degree, self.degree), operators=self.operators) else: feature_transformer = PolynomialFeatures(degree=self.degree, include_bias=False) steps = [("features", feature_transformer), ("model", STRidge(alpha=self.alpha, threshold=self.threshold, **self.kw))] self.model = MultiOutputRegressor(Pipeline(steps), n_jobs=self.n_jobs) self.model.fit(x, xdot) self.n_input_features_ = self.model.estimators_[0].steps[0][1].n_input_features_ self.n_output_features_ = self.model.estimators_[0].steps[0][1].n_output_features_ return self
def fit_linear_regression(X, y, degree): return Pipeline([("polynomial_features", PolynomialFeatures(degree=degree, include_bias=False)), ("linear_regression", LinearRegression())] ).fit(X, y)
def send_data(self): if self.data is not None: attributes = self.x_var_model[self.x_var_index] class_var = self.y_var_model[self.y_var_index] data_table = Table( Domain([attributes], class_vars=[class_var]), self.data) polyfeatures = skl_preprocessing.PolynomialFeatures( int(self.polynomialexpansion)) valid_mask = ~np.isnan(data_table.X).any(axis=1) x = data_table.X[valid_mask] x = polyfeatures.fit_transform(x) x_label = data_table.domain.attributes[0].name out_array = np.concatenate((x, data_table.Y[np.newaxis].T[valid_mask]), axis=1) out_domain = Domain( [ContinuousVariable("1")] + ([data_table.domain.attributes[0]] if self.polynomialexpansion > 0 else []) + [ContinuousVariable("{}^{}".format(x_label, i)) for i in range(2, int(self.polynomialexpansion) + 1)], class_vars=[class_var]) self.Outputs.data.send(Table(out_domain, out_array)) return self.Outputs.data.send(None)
def get_polynomials(features, poly_degree): r"""Generate interactions that are products of distinct features. Parameters ---------- features : pandas.DataFrame Dataframe containing the features for generating interactions. poly_degree : int The degree of the polynomial features. Returns ------- poly_features : numpy array The interaction features only. References ---------- You can find more information on polynomial interactions here [POLY]_. .. [POLY] http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html """ polyf = PolynomialFeatures(interaction_only=True, degree=poly_degree, include_bias=False) poly_features = polyf.fit_transform(features) return poly_features # # Function get_text_features #
def scaled_pipelines(): # Model parameters # RANSAC parameters # 500 max trials takes 90s ransac_kwargs = { 'max_trials': 1000, 'min_samples': 5000, 'loss': 'absolute_loss', 'residual_threshold': 2.0, 'random_state': _RANDOM_STATE, } # Ridge CV parameters alphas = [.01, .1, 1, 10] # Model instances model_steps = [ LinearRegression(), # [PolynomialFeatures(degree=2), LinearRegression()], # [PolynomialFeatures(degree=3), LinearRegression()], # RANSACRegressor(base_estimator=LinearRegression(), **ransac_kwargs), # RANSACRegressor with polynomial regression? # RidgeCV(alphas=alphas), # LassoCV(), # Alphas set automatically by default # ElasticNetCV(l1_ratio=0.5), # Same as default # [PolynomialFeatures(degree=2), ElasticNetCV(l1_ratio=0.5)], # SGDRegressor(), ] # Pipelines pipelines = [] for m in model_steps: # Steps common_steps = [ StandardScaler(), PCA(**_PCA_KWARGS) ] model_steps = m if isinstance(m, list) else [m] steps = common_steps + model_steps pipelines.append(make_pipeline(*steps)) return pipelines
def get_models4ensamble(conf): models = [] #models = [RFRModel(conf), DLModel(conf), LRModel(conf)] #models = [LRModel(conf)] # see http://scikit-learn.org/stable/modules/linear_model.html #0 was too big to run with depth set to 1, and 1 was overfitting a bit if conf.command == 1: xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":3, "eta":0.1, "min_child_weight":5, "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0} else: xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8, "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0} #xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8, # "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0} models = [ #DLModel(conf), #LRModel(conf, model=linear_model.BayesianRidge()), #LRModel(conf, model=linear_model.LassoLars(alpha=.1)), #LRModel(conf, model=linear_model.Lasso(alpha = 0.1)), #LRModel(conf, model=Pipeline([('poly', PolynomialFeatures(degree=3)), #LRModel(conf, model=linear_model.Ridge (alpha = .5)) # ('linear', LinearRegression(fit_intercept=False))])), XGBoostModel(conf, xgb_params, use_cv=True), LRModel(conf, model=linear_model.Lasso(alpha = 0.3)), RFRModel(conf, RandomForestRegressor(oob_score=True, n_jobs=4)), #LRModel(conf, model=linear_model.Lasso(alpha = 0.2)), ETRModel(conf, model=ExtraTreesRegressor(n_jobs=4)), #AdaBoostRModel(conf, model=AdaBoostRegressor(loss='square')) ] return models #return [XGBoostModel(conf, xgb_params, use_cv=True)]
def polynomial(self, X, deg=1): return PolynomialFeatures(deg).fit_transform(X)
def Identified_Model(y, t, library, estimator) : ''' Simulates the model from Sparse identification. Inputs ------ library: library object used in the sparse identification (e.g. poly_lib = PolynomialFeatures(degree=3) ) estimator: estimator object obtained from the sparse identification Output ------ dy : numpy array object containing the derivatives evaluated using the model identified from sparse regression. ''' dy = np.zeros_like(y) lib = library.fit_transform(y.reshape(1,-1)) Theta = block_diag(lib, lib, lib) dy = Theta.dot(estimator.coef_) return dy
def feature_transform(X, mode='polynomial', degree=1): poly = PolynomialFeatures(degree) process_X = poly.fit_transform(X) if mode == 'legendre': lege = legendre(degree) process_X = lege(process_X) return process_X
def fit_twosls(x, z, t, y): ''' Two stage least squares with polynomial basis function. ''' params = dict(poly__degree=range(1,4), ridge__alpha=np.logspace(-5, 5, 11)) pipe = Pipeline([('poly', PolynomialFeatures()), ('ridge', Ridge())]) stage_1 = GridSearchCV(pipe, param_grid=params, cv=5) if z.shape[1] > 0: X = np.concatenate([x,z], axis=1) else: X = z stage_1.fit(X,t) t_hat = stage_1.predict(X) print("First stage paramers: " + str(stage_1.best_params_ )) pipe2 = Pipeline([('poly', PolynomialFeatures()), ('ridge', Ridge())]) stage_2 = GridSearchCV(pipe2, param_grid=params, cv=5) X2 = np.concatenate([x,t_hat], axis=1) stage_2.fit(X2, y) print("Best in sample score: %f" % stage_2.score(X2, y)) print("Second stage paramers: " + str(stage_2.best_params_ )) def g_hat(x,z,t): X_new = np.concatenate([x, t], axis=1) return stage_2.predict(X_new) return g_hat
def measure(y): x=np.linspace(1,183,183) y_ex=[] y_ex=np.array(y_ex) pred=Pipeline([('poly',PolynomialFeatures(10)), ('linear',LinearRegression(fit_intercept=False))]) pred.fit(x[:,np.newaxis],y) y_ex=pred.predict(x[:,np.newaxis]) t=comp(y_ex,y) return t
def fit(self, X, y): sdim, fdim = X.shape for i in range(self.n_estimators): ridge = Ridge(alpha=self.alpha, normalize=self.normalize, random_state=self.random_state) fidx = self._random_feature_idx(fdim, self.random_state+i*100) sidx = self._random_sample_idx(sdim, self.random_state+i*10) X_tmp = X[sidx][:,fidx] if self.poly: X_tmp = PolynomialFeatures(degree=2).fit_transform(X_tmp)[:,1:] ridge.fit(X_tmp, y[sidx]) self.ridge_list[i] = ridge self.feature_idx_list[i] = fidx return self
def predict(self, X): y_pred = np.zeros((X.shape[0], self.n_estimators)) for i in range(self.n_estimators): fidx = self.feature_idx_list[i] ridge = self.ridge_list[i] X_tmp = X[:,fidx] if self.poly: X_tmp = PolynomialFeatures(degree=2).fit_transform(X_tmp)[:,1:] y_pred[:,i] = ridge.predict(X_tmp) y_pred = np.mean(y_pred, axis=1) return y_pred
def gen_features(train, y, test): ntrain = len(train) df_all = pd.concat([train, test]) poly = preprocessing.PolynomialFeatures(degree=3) dpoly = poly.fit_transform(df_all) df_all['ap_diff'] = df_all.ap_hi - df_all.ap_lo h = df_all['height'] / 100 df_all['BWI'] = df_all['weight'] / (h * h) df_all['bad_bwi'] = (df_all.BWI > 60).values * 1 + (df_all.BWI < 10).values * 1 df_all['bad_height'] = (df_all.height < 130).values * 1 df_all['bad_weight'] = (df_all.weight + 120 < df_all.height).values * 1 df_all['bad_ap_hi'] = 0 df_all.ix[(df_all.ap_hi < 80).values + (df_all.ap_hi > 220).values, 'bad_ap_hi'] = 1 df_all['bad_ap_lo'] = 0 df_all.ix[(df_all.ap_lo < 40).values + (df_all.ap_lo > 200).values, 'bad_ap_lo'] = 1 df_all['has_bad_data'] = (df_all.bad_bwi + df_all.bad_height + df_all.bad_weight + df_all.bad_ap_hi + df_all.bad_ap_lo) > 0 return df_all[:ntrain].reindex(), y, df_all[ntrain:].reindex()
def multireg(self,Xtrain,ytrain, Xtest, ytest): self.normalize(Xtrain) ''' # polynomial try poly = PolynomialFeatures(degree=2) Xtrain = poly.fit_transform(Xtrain) Xtest = poly.fit_transform(Xtest) ''' # normal clf fit clf = linear_model.LinearRegression() clf.fit (Xtrain, ytrain) coeffients = clf.coef_ print "coefficients:", coeffients print "intercept:", clf.intercept_ print "train score", clf.score(Xtrain,ytrain) print "test score", clf.score(Xtest,ytest) # manual calculate train accuracy train_results = clf.predict(Xtrain) print "first x:", Xtrain[0] print "first result:", train_results[0] correct = 0 for i in range(len(train_results)): if round(train_results[i], 1) == round(ytrain[i], 1): correct += 1 accuracy = correct * 1.0 / len(ytrain) print "train accuracy: ", accuracy * 100, "%" # cross validation score = cross_validation.cross_val_score(clf, Xtrain, ytrain, scoring='mean_squared_error', cv = 5) print "cross validation score: ", score predict = cross_val_predict(clf, Xtrain, ytrain, cv = 5) correct = 0 for i in range(len(predict)): if round(predict[i], 1) == round(ytrain[i], 1): correct += 1 accuracy = correct * 1.0 / len(ytrain) print "cross validation accuracy: ", accuracy * 100, "%" # manual calculate test accuracy self.normalize(Xtest) results = clf.predict(Xtest) correct = 0 for i in range(len(results)): if round(results[i], 1) == round(ytest[i], 1): correct += 1 accuracy = correct * 1.0 / len(ytest) print "test accuracy: ", accuracy * 100, "%" return coeffients
def __init__(self, T, L, backup): self.backup = backup self.T = T self.L = L self.pre_process = PolynomialFeatures(degree=2, include_bias=False) if self.backup['name'] == 'sampling': self.Q = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False) elif self.backup['name'] == 'doubleQ': self.Q_1 = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False) self.Q_2 = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False) elif self.backup['name'] == 'replay buffer': self.Q = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False) self.buff = [] else: print "Illegal Backup Type"
def transform_pf(data, degree=2): PF = PolynomialFeatures(degree=degree) pf = PF.fit_transform(data) # print pf.shape return pf # ???????? max min sum std mean median
def fitJA(j, start_date_rank): pltf.clf() p = artists_play_inday[j] p = p[start_date_rank:] print p apcount = [0] * (183 - start_date_rank) apdate = range(start_date_rank, 183) for i in p: apcount[i[1] - start_date_rank] = i[0] print apcount d_train = np.asarray(apdate) c_train = np.asarray(apcount) # create matrix versions of these arrays D_train = d_train[:, np.newaxis] d_test_plot = np.asarray(range(start_date_rank, 244)) D_test_plot = d_test_plot[:, np.newaxis] pltf.scatter(d_train, c_train, label="training points") for degree in [1,2,3]: model = make_pipeline(PolynomialFeatures(degree), Ridge()) model.fit(D_train, c_train) c_test_plot = model.predict(D_test_plot) pltf.plot(d_test_plot, c_test_plot, label="degree %d" % degree) pltf.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=5, mode="expand", borderaxespad=0.) pltf.show()
def pred(degree): predict_file_path = "./data/mars_tianchi_artist_plays_predict.csv" fp = open(predict_file_path, 'wb') fpwriter = csv.writer(fp, delimiter=',', quotechar='"', quoting=csv.QUOTE_NONE) for j in range(0, 50): p = artists_play_inday[j] apcount = [0] * 184 apdate = range(0, 184) for i in p: apcount[i[1]] = i[0] x = np.asarray(apdate) X = x[:, np.newaxis] y = np.asarray(apcount) x_future = np.asarray(range(184, 245)) X_future = x_future[:, np.newaxis] model = make_pipeline(PolynomialFeatures(degree), Ridge()) model.fit(X, y) y_future = model.predict(X_future) artist_id = artists_rank_to_id[j] for idx in range(0, 61): date = rank_to_date[x_future[idx]] play_num = int(math.ceil(y_future[idx])) if play_num < 0: play_num = 0 row = [artist_id, play_num, date] print row fpwriter.writerow(row) fp.close()
def predDegs(degree, start_date_rank_list): predict_file_path = "./data/mars_tianchi_artist_plays_predict.csv" fp = open(predict_file_path, 'wb') fpwriter = csv.writer(fp, delimiter=',', quotechar='"', quoting=csv.QUOTE_NONE) for j in range(0, 50): start_date_rank = start_date_rank_list[j] p = artists_play_inday[j] p = p[start_date_rank:] apcount = [0] * (183 - start_date_rank) apdate = range(start_date_rank, 183) for i in p: apcount[i[1] - start_date_rank] = i[0] d_train = np.asarray(apdate) c_train = np.asarray(apcount) # create matrix versions of these arrays D_train = d_train[:, np.newaxis] d_future = np.asarray(range(184, 244)) D_future = d_future[:, np.newaxis] model = make_pipeline(PolynomialFeatures(degree[j]), Ridge()) model.fit(D_train, c_train) c_future = model.predict(D_future) artist_id = artists_rank_to_id[j] for idx in range(0, 60): date = rank_to_date[d_future[idx]] play_num = int(math.ceil(c_future[idx])) if play_num < 0: play_num = 0 row = [artist_id, play_num, date] print row fpwriter.writerow(row) fp.close()
def BasicFactorRegress(inputs, window_length, mask, n_fwd_days, algo_mode=None, cross=True): class BasicFactorRegress(CustomFactor): # params = {'trigger_date': None, } init = False def __shift_mask_data(self, X, Y, n_fwd_days=1): # Shift X to match factors at t to returns at t+n_fwd_days (we want to predict future returns after all) shifted_X = np.roll(X, n_fwd_days, axis=0) # Slice off rolled elements X = shifted_X[n_fwd_days:] Y = Y[n_fwd_days:] n_time, n_stocks, n_factors = X.shape # Flatten X X = X.reshape((n_time * n_stocks, n_factors)) Y = Y.reshape((n_time * n_stocks)) return X, Y def __get_last_values(self, input_data): last_values = [] for dataset in input_data: last_values.append(dataset[-1]) return np.vstack(last_values).T def compute(self, today, assets, out, returns, *inputs): if (not self.init): self.clf = algo_mode X = np.dstack(inputs) # (time, stocks, factors) ?????? Y = returns # (time, stocks) X, Y = self.__shift_mask_data(X, Y, n_fwd_days) # n????????1???- ??factor ???? X = np.nan_to_num(X) Y = np.nan_to_num(Y) if cross == True: quadratic_featurizer = PolynomialFeatures(interaction_only=True) X = quadratic_featurizer.fit_transform(X) self.clf.fit(X, Y) # self.init = True last_factor_values = self.__get_last_values(inputs) last_factor_values = np.nan_to_num(last_factor_values) out[:] = self.clf.predict(last_factor_values) return BasicFactorRegress(inputs=inputs, window_length=window_length, mask=mask)
def ridge_multireg(self,Xtrain,ytrain, Xtest, ytest): self.normalize(Xtrain) ''' # polynomial try poly = PolynomialFeatures(degree=2) Xtrain = poly.fit_transform(Xtrain) Xtest = poly.fit_transform(Xtest) ''' # normal clf try clf = linear_model.Ridge(alpha = 10000) clf.fit (Xtrain, ytrain) coeffients = clf.coef_ print "train score", clf.score(Xtrain,ytrain) print "test score", clf.score(Xtest,ytest) # manual calculate train accuracy train_results = clf.predict(Xtrain) correct = 0 for i in range(len(train_results)): if round(train_results[i], 1) == round(ytrain[i], 1): correct += 1 accuracy = correct * 1.0 / len(ytrain) print "train accuracy: ", accuracy * 100, "%" # cross validation score = cross_validation.cross_val_score(clf, Xtrain, ytrain, scoring='mean_squared_error', cv = 5) print "cross validation score: ", score ''' predict = cross_val_predict(clf, Xtrain, ytrain, cv = 5) correct = 0 for i in range(len(predict)): if round(predict[i]) == round(ytrain[i]): correct += 1 accuracy = correct * 1.0 / len(ytrain) print "cross validation accuracy: ", accuracy * 100, "%" ''' # manual calculate test accuracy self.normalize(Xtest) results = clf.predict(Xtest) correct = 0 for i in range(len(results)): if round(results[i], 1) == round(ytest[i], 1): correct += 1 accuracy = correct * 1.0 / len(ytest) print "test accuracy: ", accuracy * 100, "%" return coeffients
def test(degree): error_rate_of_artist = [] weight_of_artist = [] f_of_artist = [] F = 0.0 for j in range(0, 50): p = artists_play_inday[j] apcount = [0] * 184 apdate = range(0, 184) for i in p: apcount[i[1]] = i[0] x = np.asarray(apdate[:122]) x_test = np.asarray(apdate[122:]) X = x[:, np.newaxis] y = np.asarray(apcount[:122]) y_test_true = np.asarray(apcount[122:]) X_test = x_test[:, np.newaxis] model = make_pipeline(PolynomialFeatures(degree), Ridge()) model.fit(X, y) y_test_pred = model.predict(X_test) error_rate_pow2_sum = 0.0 weight = 0.0 for idx in range(0, len(x_test)): y_true = y_test_true[idx] if y_true == 0: y_true = 1 # deal with divide by zero error_rate_pow2_sum += (float((int(math.ceil(y_test_pred[idx])) - y_true)) / float(y_true) )**2 weight += y_test_true[idx] error_rate_j = math.sqrt(error_rate_pow2_sum / float(len(x_test))) error_rate_of_artist.append(error_rate_j) weight_j = math.sqrt(weight) weight_of_artist.append(weight_j) f_j = (1 - error_rate_j) * weight_j f_of_artist.append(f_j) F += f_j print 'degree', degree print 'error_rate_of_artist', error_rate_of_artist print 'weight_of_artist', weight_of_artist print 'f_of_artist', f_of_artist print 'F', F