Python sklearn.preprocessing 模块,FunctionTransformer() 实例源码

我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用sklearn.preprocessing.FunctionTransformer()

项目:pandas-pipelines-custom-transformers    作者:jem1031    | 项目源码 | 文件源码
def __init__(self, *args, **kwargs):
        self.ft = FunctionTransformer(*args, **kwargs)
项目:sparsereg    作者:Ohjeah    | 项目源码 | 文件源码
def __init__(self, alpha=1.0, threshold=0.1, degree=3, operators=None, dt=1.0, n_jobs=1, derivative=None, feature_names=None, kw={}):
        self.alpha = alpha
        self.threshold = threshold
        self.degree = degree
        self.operators = operators
        self.n_jobs = n_jobs
        self.derivative = derivative or FunctionTransformer(func=_derivative, kw_args={"dt": dt})
        self.feature_names = feature_names
        self.kw = kw
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def drop_first_component(X, y):
    """
    Create a pipeline with PCA and the column selector and use it to
    transform the dataset.
    """
    pipeline = make_pipeline(
        PCA(), FunctionTransformer(all_but_first_column),
    )
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    pipeline.fit(X_train, y_train)
    return pipeline.transform(X_test), y_test
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_np_log():
    X = np.arange(10).reshape((5, 2))

    # Test that the numpy.log example still works.
    testing.assert_array_equal(
        FunctionTransformer(np.log1p).transform(X),
        np.log1p(X),
    )
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_kw_arg():
    X = np.linspace(0, 1, num=10).reshape((5, 2))

    F = FunctionTransformer(np.around, kw_args=dict(decimals=3))

    # Test that rounding is correct
    testing.assert_array_equal(F.transform(X),
                                  np.around(X, decimals=3))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_kw_arg_update():
    X = np.linspace(0, 1, num=10).reshape((5, 2))

    F = FunctionTransformer(np.around, kw_args=dict(decimals=3))

    F.kw_args['decimals'] = 1

    # Test that rounding is correct
    testing.assert_array_equal(F.transform(X),
                                  np.around(X, decimals=1))
项目:auto_ml    作者:doordash    | 项目源码 | 文件源码
def _construct_pipeline(self, model_name='LogisticRegression', trained_pipeline=None, final_model=None, feature_learning=False, final_model_step_name='final_model'):

        pipeline_list = []


        if self.user_input_func is not None:
            if trained_pipeline is not None:
                pipeline_list.append(('user_func', trained_pipeline.named_steps['user_func']))
            elif self.transformation_pipeline is None:
                print('Including the user_input_func in the pipeline! Please remember to return X, and not modify the length or order of X at all.')
                print('Your function will be called as the first step of the pipeline at both training and prediction times.')
                pipeline_list.append(('user_func', FunctionTransformer(func=self.user_input_func, pass_y=False, validate=False)))

        # These parts will be included no matter what.
        if trained_pipeline is not None:
            pipeline_list.append(('basic_transform', trained_pipeline.named_steps['basic_transform']))
        else:
            pipeline_list.append(('basic_transform', utils_data_cleaning.BasicDataCleaning(column_descriptions=self.column_descriptions)))

        if self.perform_feature_scaling is True:
            if trained_pipeline is not None:
                pipeline_list.append(('scaler', trained_pipeline.named_steps['scaler']))
            else:
                pipeline_list.append(('scaler', utils_scaling.CustomSparseScaler(self.column_descriptions)))


        if trained_pipeline is not None:
            pipeline_list.append(('dv', trained_pipeline.named_steps['dv']))
        else:
            pipeline_list.append(('dv', DataFrameVectorizer.DataFrameVectorizer(sparse=True, sort=True, column_descriptions=self.column_descriptions)))


        if self.perform_feature_selection == True:
            if trained_pipeline is not None:
                # This is the step we are trying to remove from the trained_pipeline, since it has already been combined with dv using dv.restrict
                pass
            else:
                pipeline_list.append(('feature_selection', utils_feature_selection.FeatureSelectionTransformer(type_of_estimator=self.type_of_estimator, column_descriptions=self.column_descriptions, feature_selection_model='SelectFromModel') ))

        if trained_pipeline is not None:
            # First, check and see if we have any steps with some version of keyword matching on something like 'intermediate_model_predictions' or 'feature_learning_model' or 'ensemble_model' or something like that in them.
            # add all of those steps
            # then try to add in the final_model that was passed in as a param
            # if it's none, then we've already added in the final model with our keyword matching above!
            for step in trained_pipeline.steps:
                step_name = step[0]
                if step_name[-6:] == '_model':
                    pipeline_list.append((step_name, trained_pipeline.named_steps[step_name]))

            # Handling the case where we have run gscv on just the final model itself, and we now need to integrate it back into the rest of the pipeline
            if final_model is not None:
                pipeline_list.append((final_model_step_name, final_model))
            # else:
            #     pipeline_list.append(('final_model', trained_pipeline.named_steps['final_model']))
        else:
            final_model = utils_models.get_model_from_name(model_name, training_params=self.training_params)
            pipeline_list.append(('final_model', utils_model_training.FinalModelATC(model=final_model, type_of_estimator=self.type_of_estimator, ml_for_analytics=self.ml_for_analytics, name=self.name, scoring_method=self._scorer, feature_learning=feature_learning)))

        constructed_pipeline = Pipeline(pipeline_list)
        return constructed_pipeline
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_delegate_to_func():
    # (args|kwargs)_store will hold the positional and keyword arguments
    # passed to the function inside the FunctionTransformer.
    args_store = []
    kwargs_store = {}
    X = np.arange(10).reshape((5, 2))
    testing.assert_array_equal(
        FunctionTransformer(_make_func(args_store, kwargs_store)).transform(X),
        X,
        'transform should have returned X unchanged',
    )

    # The function should only have received X.
    assert_equal(
        args_store,
        [X],
        'Incorrect positional arguments passed to func: {args}'.format(
            args=args_store,
        ),
    )
    assert_equal(
        kwargs_store,
        {},
        'Unexpected keyword arguments passed to func: {args}'.format(
            args=kwargs_store,
        ),
    )

    # reset the argument stores.
    args_store[:] = []  # python2 compatible inplace list clear.
    kwargs_store.clear()
    y = object()

    testing.assert_array_equal(
        FunctionTransformer(
            _make_func(args_store, kwargs_store),
            pass_y=True,
        ).transform(X, y),
        X,
        'transform should have returned X unchanged',
    )

    # The function should have received X and y.
    assert_equal(
        args_store,
        [X, y],
        'Incorrect positional arguments passed to func: {args}'.format(
            args=args_store,
        ),
    )
    assert_equal(
        kwargs_store,
        {},
        'Unexpected keyword arguments passed to func: {args}'.format(
            args=kwargs_store,
        ),
    )