我们从Python开源项目中,提取了以下4个代码示例,用于说明如何使用sklearn.feature_selection.f_regression()。
def select_kbest_reg(data_frame, target, k=5): """ Selecting K-Best features regression :param data_frame: A pandas dataFrame with the training data :param target: target variable name in DataFrame :param k: desired number of features from the data :returns feature_scores: scores for each feature in the data as pandas DataFrame """ feat_selector = SelectKBest(f_regression, k=k) _ = feat_selector.fit(data_frame.drop(target, axis=1), data_frame[target]) feat_scores = pd.DataFrame() feat_scores["F Score"] = feat_selector.scores_ feat_scores["P Value"] = feat_selector.pvalues_ feat_scores["Support"] = feat_selector.get_support() feat_scores["Attribute"] = data_frame.drop(target, axis=1).columns return feat_scores
def main(): from sklearn import svm from sklearn.datasets import samples_generator from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_regression from sklearn.preprocessing import MinMaxScaler X, y = samples_generator.make_classification(n_samples=1000, n_informative=5, n_redundant=4, random_state=_random_state) anova_filter = SelectKBest(f_regression, k=5) scaler = MinMaxScaler() clf = svm.SVC(kernel='linear') steps = [scaler, anova_filter, clf] cached_run(steps, X, y)
def ANOVA(X,y): '''Univariate linear regression tests Quick linear model for sequentially testing the effect of many regressors Using scikit learn's Feature selection toolbox Returns: F (array) = F-values for regressors pvalues (array) = p-values for F-scores''' (F,pvalues) = f_regression(X,y) return (F,pvalues)
def get_params_for_est(estimator, name): '''Choose initialization parameters for an estimator for auto-testing''' is_classifier = ClassifierMixin in estimator.__mro__ is_cluster = ClusterMixin in estimator.__mro__ is_ensemble = BaseEnsemble in estimator.__mro__ uses_counts = any(c in name for c in USES_COUNTS) as_1d = name in REQUIRES_1D args, params, _ = get_args_kwargs_defaults(estimator.__init__) est_keys = set(('estimator', 'base_estimator', 'estimators')) est_keys = (set(params) | set(args)) & est_keys if is_classifier: score_func = feat.f_classif else: score_func = feat.f_regression for key in est_keys: if name == 'SelectFromModel': params[key] = sklearn.linear_model.LassoCV() elif is_classifier: params[key] = sklearn.tree.DecisionTreeClassifier() else: params[key] = sklearn.tree.DecisionTreeRegressor() if key == 'estimators': params[key] = [(str(_), clone(params[key])) for _ in range(10)] kw = dict(is_classifier=is_classifier, is_cluster=is_cluster, is_ensemble=is_ensemble, uses_counts=uses_counts) if 'score_func' in params: params['score_func'] = score_func X, y = make_X_y(**kw) return X, y, params, kw