我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用sklearn.preprocessing.FunctionTransformer()。
def __init__(self, *args, **kwargs): self.ft = FunctionTransformer(*args, **kwargs)
def __init__(self, alpha=1.0, threshold=0.1, degree=3, operators=None, dt=1.0, n_jobs=1, derivative=None, feature_names=None, kw={}): self.alpha = alpha self.threshold = threshold self.degree = degree self.operators = operators self.n_jobs = n_jobs self.derivative = derivative or FunctionTransformer(func=_derivative, kw_args={"dt": dt}) self.feature_names = feature_names self.kw = kw
def drop_first_component(X, y): """ Create a pipeline with PCA and the column selector and use it to transform the dataset. """ pipeline = make_pipeline( PCA(), FunctionTransformer(all_but_first_column), ) X_train, X_test, y_train, y_test = train_test_split(X, y) pipeline.fit(X_train, y_train) return pipeline.transform(X_test), y_test
def test_np_log(): X = np.arange(10).reshape((5, 2)) # Test that the numpy.log example still works. testing.assert_array_equal( FunctionTransformer(np.log1p).transform(X), np.log1p(X), )
def test_kw_arg(): X = np.linspace(0, 1, num=10).reshape((5, 2)) F = FunctionTransformer(np.around, kw_args=dict(decimals=3)) # Test that rounding is correct testing.assert_array_equal(F.transform(X), np.around(X, decimals=3))
def test_kw_arg_update(): X = np.linspace(0, 1, num=10).reshape((5, 2)) F = FunctionTransformer(np.around, kw_args=dict(decimals=3)) F.kw_args['decimals'] = 1 # Test that rounding is correct testing.assert_array_equal(F.transform(X), np.around(X, decimals=1))
def _construct_pipeline(self, model_name='LogisticRegression', trained_pipeline=None, final_model=None, feature_learning=False, final_model_step_name='final_model'): pipeline_list = [] if self.user_input_func is not None: if trained_pipeline is not None: pipeline_list.append(('user_func', trained_pipeline.named_steps['user_func'])) elif self.transformation_pipeline is None: print('Including the user_input_func in the pipeline! Please remember to return X, and not modify the length or order of X at all.') print('Your function will be called as the first step of the pipeline at both training and prediction times.') pipeline_list.append(('user_func', FunctionTransformer(func=self.user_input_func, pass_y=False, validate=False))) # These parts will be included no matter what. if trained_pipeline is not None: pipeline_list.append(('basic_transform', trained_pipeline.named_steps['basic_transform'])) else: pipeline_list.append(('basic_transform', utils_data_cleaning.BasicDataCleaning(column_descriptions=self.column_descriptions))) if self.perform_feature_scaling is True: if trained_pipeline is not None: pipeline_list.append(('scaler', trained_pipeline.named_steps['scaler'])) else: pipeline_list.append(('scaler', utils_scaling.CustomSparseScaler(self.column_descriptions))) if trained_pipeline is not None: pipeline_list.append(('dv', trained_pipeline.named_steps['dv'])) else: pipeline_list.append(('dv', DataFrameVectorizer.DataFrameVectorizer(sparse=True, sort=True, column_descriptions=self.column_descriptions))) if self.perform_feature_selection == True: if trained_pipeline is not None: # This is the step we are trying to remove from the trained_pipeline, since it has already been combined with dv using dv.restrict pass else: pipeline_list.append(('feature_selection', utils_feature_selection.FeatureSelectionTransformer(type_of_estimator=self.type_of_estimator, column_descriptions=self.column_descriptions, feature_selection_model='SelectFromModel') )) if trained_pipeline is not None: # First, check and see if we have any steps with some version of keyword matching on something like 'intermediate_model_predictions' or 'feature_learning_model' or 'ensemble_model' or something like that in them. # add all of those steps # then try to add in the final_model that was passed in as a param # if it's none, then we've already added in the final model with our keyword matching above! for step in trained_pipeline.steps: step_name = step[0] if step_name[-6:] == '_model': pipeline_list.append((step_name, trained_pipeline.named_steps[step_name])) # Handling the case where we have run gscv on just the final model itself, and we now need to integrate it back into the rest of the pipeline if final_model is not None: pipeline_list.append((final_model_step_name, final_model)) # else: # pipeline_list.append(('final_model', trained_pipeline.named_steps['final_model'])) else: final_model = utils_models.get_model_from_name(model_name, training_params=self.training_params) pipeline_list.append(('final_model', utils_model_training.FinalModelATC(model=final_model, type_of_estimator=self.type_of_estimator, ml_for_analytics=self.ml_for_analytics, name=self.name, scoring_method=self._scorer, feature_learning=feature_learning))) constructed_pipeline = Pipeline(pipeline_list) return constructed_pipeline
def test_delegate_to_func(): # (args|kwargs)_store will hold the positional and keyword arguments # passed to the function inside the FunctionTransformer. args_store = [] kwargs_store = {} X = np.arange(10).reshape((5, 2)) testing.assert_array_equal( FunctionTransformer(_make_func(args_store, kwargs_store)).transform(X), X, 'transform should have returned X unchanged', ) # The function should only have received X. assert_equal( args_store, [X], 'Incorrect positional arguments passed to func: {args}'.format( args=args_store, ), ) assert_equal( kwargs_store, {}, 'Unexpected keyword arguments passed to func: {args}'.format( args=kwargs_store, ), ) # reset the argument stores. args_store[:] = [] # python2 compatible inplace list clear. kwargs_store.clear() y = object() testing.assert_array_equal( FunctionTransformer( _make_func(args_store, kwargs_store), pass_y=True, ).transform(X, y), X, 'transform should have returned X unchanged', ) # The function should have received X and y. assert_equal( args_store, [X, y], 'Incorrect positional arguments passed to func: {args}'.format( args=args_store, ), ) assert_equal( kwargs_store, {}, 'Unexpected keyword arguments passed to func: {args}'.format( args=kwargs_store, ), )