Python sklearn.pipeline 模块,make_pipeline() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.pipeline.make_pipeline()

项目:time_series_modeling    作者:rheineke    | 项目源码 | 文件源码
def unscaled_pipelines():
    # Random forest parameters
    random_forest_kwargs = {
        'n_estimators': 10,
        'criterion': 'mse',
        'random_state': _RANDOM_STATE,
        'n_jobs': cpu_count(),
        'verbose': True,
    }
    # Gradient boosting parameters
    gradient_boost_kwargs = {
        'random_state': _RANDOM_STATE,
        'verbose': 1,
    }
    models = [
        DecisionTreeRegressor(max_depth=3, random_state=_RANDOM_STATE),
        # RandomForestRegressor(**random_forest_kwargs),
        # GradientBoostingRegressor(**gradient_boost_kwargs),
    ]
    pipelines = []
    for m in models:
        # Steps
        pipelines.append(make_pipeline(m))
    return pipelines
项目:decoding-brain-challenge-2016    作者:alexandrebarachant    | 项目源码 | 文件源码
def fit(self, X, y):
        """Fit TSclassifier.

        Parameters
        ----------
        X : ndarray, shape (n_trials, n_channels, n_channels)
            ndarray of SPD matrices.
        y : ndarray shape (n_trials, 1)
            labels corresponding to each trial.

        Returns
        -------
        self : TSclassifier. instance
            The TSclassifier. instance.
        """
        ts = TangentSpace(metric=self.metric, tsupdate=self.tsupdate)
        self._pipe = make_pipeline(ts, self.clf)
        self._pipe.fit(X, y)
        return self
项目:time_series_modeling    作者:rheineke    | 项目源码 | 文件源码
def sample_pipelines(pca_kernels=None, svr_kernels=None):
    """
    Pipelines that can't be fit in a reasonable amount of time on the whole
    dataset
    """
    # Model instances
    model_steps = []
    if pca_kernels is None:
        pca_kernels = ['poly', 'rbf', 'sigmoid', 'cosine']
    for pca_kernel in pca_kernels:
        model_steps.append([
            KernelPCA(n_components=2, kernel=pca_kernel),
            LinearRegression(),
        ])
    if svr_kernels is None:
        svr_kernels = ['poly', 'rbf', 'sigmoid']
    for svr_kernel in svr_kernels:
        model_steps.append(SVR(kernel=svr_kernel, verbose=True, cache_size=1000))

    # Pipelines
    pipelines = []
    for m in model_steps:
        # Steps
        common_steps = [
            StandardScaler(),
        ]
        model_steps = m if isinstance(m, list) else [m]
        steps = common_steps + model_steps
        pipelines.append(make_pipeline(*steps))
    return pipelines
项目:Optimus    作者:Yatoom    | 项目源码 | 文件源码
def build_pipeline(base_estimator, parameters):
        """
        Builds a pipeline where the base estimator is initialized with given parameters. The `@preprocessor` parameter
        is a special parameter that will determine which pre-processing steps to use.
        :param base_estimator: The base estimator of the pipeline
        :param parameters: The parameters for the base estimator, includes special parameters for the pipeline itself
        :return: The (pipeline with the) base estimator, initialized with given parameters
        """
        params = copy(parameters)
        preprocessors = Builder.extract_preprocessors(params)
        estimator = Builder.setup_estimator(base_estimator, params)

        if preprocessors is None:
            return estimator

        return make_pipeline(*preprocessors, estimator)
项目:document_classification    作者:scotthlee    | 项目源码 | 文件源码
def decompose(doc_vecs, n_features=100, normalize=False, flip=False):
    svd = TruncatedSVD(n_features)  
    if normalize:   
        if flip:
            lsa = make_pipeline(svd, Normalizer(copy=False))
            doc_mat = lsa.fit_transform(doc_vecs.transpose())
            doc_mat = doc_mat.transpose()
        else:
            lsa = make_pipeline(svd, Normalizer(copy=False))        
            doc_mat = lsa.fit_transform(doc_vecs)
        return doc_mat
    else:
        if flip:
            doc_mat = svd.fit_transform(doc_vecs.transpose())
            doc_mat = doc_mat.transpose()
        else:
            doc_mat = svd.fit_transform(doc_vecs)
        return doc_mat
项目:scikit-mdr    作者:EpistasisLab    | 项目源码 | 文件源码
def test_mdr_sklearn_pipeline():
    """Ensure that MDR can be used as a transformer in a scikit-learn pipeline"""
    features = np.array([[2,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [1,    1],
                         [1,    1]])

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
    clf = make_pipeline(MDR(), LogisticRegression())
    cv_scores = cross_val_score(clf, features, classes, cv=StratifiedKFold(n_splits=5, shuffle=True))
    assert np.mean(cv_scores) > 0.
项目:scikit-mdr    作者:EpistasisLab    | 项目源码 | 文件源码
def test_mdr_sklearn_pipeline_parallel():
    """Ensure that MDR can be used as a transformer in a parallelized scikit-learn pipeline"""
    features = np.array([[2,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [1,    1],
                         [1,    1]])

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
    clf = make_pipeline(MDR(), LogisticRegression())
    cv_scores = cross_val_score(clf, features, classes, cv=StratifiedKFold(n_splits=5, shuffle=True), n_jobs=-1)
    assert np.mean(cv_scores) > 0.
项目:decoding_challenge_cortana_2016_3rd    作者:kingjr    | 项目源码 | 文件源码
def fit(self, X, y):
        """Fit TSclassifier.

        Parameters
        ----------
        X : ndarray, shape (n_trials, n_channels, n_channels)
            ndarray of SPD matrices.
        y : ndarray shape (n_trials, 1)
            labels corresponding to each trial.

        Returns
        -------
        self : TSclassifier. instance
            The TSclassifier. instance.
        """
        ts = TangentSpace(metric=self.metric, tsupdate=self.tsupdate)
        self._pipe = make_pipeline(ts, self.clf)
        self._pipe.fit(X, y)
        return self
项目:bitcoin-forecast    作者:roksela    | 项目源码 | 文件源码
def __init__(self, model_type=DEFAULT_MODEL_TYPE):
        """
        Set ups model and pipeline for learning and predicting.

        :param model_type: only 'SVR' model is supported for now
        """
        assert (model_type == 'SVR'), "Model '{}' is not supported. " \
                                      "We support only SVR for now.".format(model_type)
        self._model_type = model_type
        self._model_params = BTCForecast.DEFAULT_SVR_MODEL_PARAMS

        # set up SVR pipeline
        self._scaler = preprocessing.StandardScaler(copy=True, with_mean=True, with_std=True)
        self._model = SVR(kernel=self._model_params['kernel'],
                          epsilon=self._model_params['epsilon'],
                          C=self._model_params['c'],
                          gamma=self._model_params['gamma'])
        self._pipeline = make_pipeline(self._scaler, self._model)
        self.has_learned = False
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_check_scoring_gridsearchcv():
    # test that check_scoring works on GridSearchCV and pipeline.
    # slightly redundant non-regression test.

    grid = GridSearchCV(LinearSVC(), param_grid={'C': [.1, 1]})
    scorer = check_scoring(grid, "f1")
    assert_true(isinstance(scorer, _PredictScorer))

    pipe = make_pipeline(LinearSVC())
    scorer = check_scoring(pipe, "f1")
    assert_true(isinstance(scorer, _PredictScorer))

    # check that cross_val_score definitely calls the scorer
    # and doesn't make any assumptions about the estimator apart from having a
    # fit.
    scores = cross_val_score(EstimatorWithFit(), [[1], [2], [3]], [1, 0, 1],
                             scoring=DummyScorer())
    assert_array_equal(scores, 1)
项目:hh-page-classifier    作者:TeamHG-Memex    | 项目源码 | 文件源码
def LDAPageVctorizer(*,
                     n_topics: int,
                     min_df: int,
                     max_features: int,
                     max_iter: int,
                     ngram_range: Tuple[int, int],
                     vocabulary=None,
                     batch_size: int=4096,
                     verbose=1):
    vec = _vectorizer(min_df=min_df, max_features=max_features,
                      ngram_range=ngram_range, vocabulary=vocabulary)
    lda = LatentDirichletAllocation(
        learning_method='online',
        n_topics=n_topics,
        batch_size=batch_size,
        evaluate_every=2,
        verbose=verbose,
        max_iter=max_iter,
        n_jobs=1,
    )
    return make_pipeline(vec, lda)
项目:dask-ml    作者:dask    | 项目源码 | 文件源码
def test_in_pipeline():
    X, y = make_classification(n_samples=100, n_features=5, chunks=50)
    pipe = make_pipeline(DoNothingTransformer(), LogisticRegression())
    pipe.fit(X, y)
项目:dask-ml    作者:dask    | 项目源码 | 文件源码
def test_gridsearch():
    X, y = make_classification(n_samples=100, n_features=5, chunks=50)
    grid = {
        'logisticregression__C': [1000, 100, 10, 2]
    }
    pipe = make_pipeline(DoNothingTransformer(), LogisticRegression())
    search = dcv.GridSearchCV(pipe, grid, cv=3)
    search.fit(X, y)
项目:whereami    作者:kootenpv    | 项目源码 | 文件源码
def get_pipeline(clf=RandomForestClassifier(n_estimators=100, class_weight="balanced")):
    return make_pipeline(DictVectorizer(sparse=False), clf)
项目:nlp-lt    作者:minven    | 项目源码 | 文件源码
def truncated_svd(self):
        # https://github.com/chrisjmccormick/LSA_Classification/blob/master/inspect_LSA.py
        svd = TruncatedSVD(self.dimensions)   
        lsa = make_pipeline(svd, Normalizer(copy=False))
        X_reduced = lsa.fit_transform(self.bag_of_words_matrix)
        print(svd.components_[0])
        print(svd.explained_variance_ratio_) 
        print(svd.explained_variance_ratio_.sum())
项目:time_series_modeling    作者:rheineke    | 项目源码 | 文件源码
def scaled_pipelines():
    # Model parameters
    # RANSAC parameters
    # 500 max trials takes 90s
    ransac_kwargs = {
        'max_trials': 1000,
        'min_samples': 5000,
        'loss': 'absolute_loss',
        'residual_threshold': 2.0,
        'random_state': _RANDOM_STATE,
    }
    # Ridge CV parameters
    alphas = [.01, .1, 1, 10]
    # Model instances
    model_steps = [
        LinearRegression(),
        # [PolynomialFeatures(degree=2), LinearRegression()],
        # [PolynomialFeatures(degree=3), LinearRegression()],
        # RANSACRegressor(base_estimator=LinearRegression(), **ransac_kwargs),
        # RANSACRegressor with polynomial regression?
        # RidgeCV(alphas=alphas),
        # LassoCV(),  # Alphas set automatically by default
        # ElasticNetCV(l1_ratio=0.5),  # Same as default
        # [PolynomialFeatures(degree=2), ElasticNetCV(l1_ratio=0.5)],
        # SGDRegressor(),
    ]
    # Pipelines
    pipelines = []
    for m in model_steps:
        # Steps
        common_steps = [
            StandardScaler(),
            PCA(**_PCA_KWARGS)
        ]
        model_steps = m if isinstance(m, list) else [m]
        steps = common_steps + model_steps
        pipelines.append(make_pipeline(*steps))
    return pipelines
项目:sktransformers    作者:TomAugspurger    | 项目源码 | 文件源码
def fit():
    X, y = generate()
    dX = dd.from_pandas(X, npartitions=10)
    y = dd.from_pandas(y, npartitions=10)

    pre_pipe = make_pipeline(
        CategoricalEncoder(),
        DummyEncoder(),
        Imputer(),
        SGDRegressor(),
    )

    pipe = make_pipeline(
        SelectFromModel(pre_pipe),
        GradientBoostingRegressor(),
    )
    X_ = pre_pipe.fit_transform(dX)

    for i in range(X_.npartitions):
        for j in range(5):
            print(i, j)
            X_sub = X_.get_partition(i).compute()
            y_sub = y.get_partition(i).compute()
            clf.partial_fit(X_sub, y_sub)

    sfm = SelectFromModel(clf, prefit=True)
    return pipe, clf, sfm
项目:postlearn    作者:TomAugspurger    | 项目源码 | 文件源码
def regression_pipeline(regression_model):
    return make_pipeline([StandardScaler(), regression_model])
项目:false-friends    作者:pln-fing-udelar    | 项目源码 | 文件源码
def build_classifier(base_clf=svm.SVC()):
    # The imputer is for "use_taxonomy", and shouldn't affect if it's False.
    # TODO: should also try with other imputer strategies
    return pipeline.make_pipeline(preprocessing.Imputer(strategy='most_frequent'), preprocessing.StandardScaler(),
                                  base_clf)


# noinspection PyPep8Naming
项目:TextStageProcessor    作者:mhyhre    | 项目源码 | 文件源码
def make_ward_clustering(self, short_filenames, input_texts):

        output_dir = self.output_dir + 'WARD/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        if self.need_tf_idf:
            self.signals.PrintInfo.emit("?????? TF-IDF...")
            idf_filename = output_dir + 'tf_idf.csv'
            msg = self.calculate_and_write_tf_idf(idf_filename, input_texts)
            self.signals.PrintInfo.emit(msg)

        vectorizer = CountVectorizer()
        X = vectorizer.fit_transform(input_texts)

        svd = TruncatedSVD(2)
        normalizer = Normalizer(copy=False)
        lsa = make_pipeline(svd, normalizer)
        X = lsa.fit_transform(X)

        ward = AgglomerativeClustering(n_clusters=self.ward_clusters_count, linkage='ward')
        predict_result = ward.fit_predict(X)

        self.signals.PrintInfo.emit('\n??????? ?? ??????????:\n')

        clasters_output = ''
        for claster_index in range(max(predict_result) + 1):
            clasters_output += ('??????? ' + str(claster_index) + ':\n')
            for predict, document in zip(predict_result, short_filenames):
                if predict == claster_index:
                    clasters_output += ('  ' + str(document) + '\n')
            clasters_output += '\n'
        self.signals.PrintInfo.emit(clasters_output)
        self.signals.PrintInfo.emit('????????? ?:' + str(output_dir + 'clusters.txt'))
        writeStringToFile(clasters_output, output_dir + 'clusters.txt')

        self.draw_clusters_plot(X, predict_result, short_filenames)
项目:TextStageProcessor    作者:mhyhre    | 项目源码 | 文件源码
def make_spectral_clustering(self, short_filenames, input_texts):

        output_dir = self.output_dir + 'spectral/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        if self.need_tf_idf:
            self.signals.PrintInfo.emit("?????? TF-IDF...")
            idf_filename = output_dir + 'tf_idf.csv'
            msg = self.calculate_and_write_tf_idf(idf_filename, input_texts)
            self.signals.PrintInfo.emit(msg)

        vectorizer = CountVectorizer()
        X = vectorizer.fit_transform(input_texts)

        svd = TruncatedSVD(2)
        normalizer = Normalizer(copy=False)
        lsa = make_pipeline(svd, normalizer)
        X = lsa.fit_transform(X)

        spectral = SpectralClustering(n_clusters=self.spectral_clusters_count)
        predict_result = spectral.fit_predict(X)
        self.signals.PrintInfo.emit('\n??????? ?? ??????????:\n')

        clasters_output = ''
        for claster_index in range(max(predict_result) + 1):
            clasters_output += ('??????? ' + str(claster_index) + ':\n')
            for predict, document in zip(predict_result, short_filenames):
                if predict == claster_index:
                    clasters_output += ('  ' + str(document) + '\n')
            clasters_output += '\n'
        self.signals.PrintInfo.emit(clasters_output)
        self.signals.PrintInfo.emit('????????? ?:' + str(output_dir + 'clusters.txt'))
        writeStringToFile(clasters_output, output_dir + 'clusters.txt')

        self.draw_clusters_plot(X, predict_result, short_filenames)

    # aa = Affinity Propagation
项目:TextStageProcessor    作者:mhyhre    | 项目源码 | 文件源码
def make_aa_clustering(self, short_filenames, input_texts):

        output_dir = self.output_dir + 'affinity_propagation/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        if self.need_tf_idf:
            self.signals.PrintInfo.emit("?????? TF-IDF...")
            idf_filename = output_dir + 'tf_idf.csv'
            msg = self.calculate_and_write_tf_idf(idf_filename, input_texts)
            self.signals.PrintInfo.emit(msg)

        vectorizer = CountVectorizer()
        X = vectorizer.fit_transform(input_texts)

        svd = TruncatedSVD(2)
        normalizer = Normalizer(copy=False)
        lsa = make_pipeline(svd, normalizer)
        X = lsa.fit_transform(X)

        aa_clusterizator = AffinityPropagation(damping=self.aa_damping,
                                               max_iter=self.aa_max_iter,
                                               convergence_iter=self.aa_no_change_stop)

        predict_result = aa_clusterizator.fit_predict(X)
        self.signals.PrintInfo.emit('\n??????? ?? ??????????:\n')

        clasters_output = ''
        for claster_index in range(max(predict_result) + 1):
            clasters_output += ('??????? ' + str(claster_index) + ':\n')
            for predict, document in zip(predict_result, short_filenames):
                if predict == claster_index:
                    clasters_output += ('  ' + str(document) + '\n')
            clasters_output += '\n'
        self.signals.PrintInfo.emit(clasters_output)
        self.signals.PrintInfo.emit('????????? ?:' + str(output_dir + 'clusters.txt'))
        writeStringToFile(clasters_output, output_dir + 'clusters.txt')

        self.draw_clusters_plot(X, predict_result, short_filenames)
项目:TextStageProcessor    作者:mhyhre    | 项目源码 | 文件源码
def make_birch_clustering(self, short_filenames, input_texts):

        output_dir = self.output_dir + 'birch/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        if self.need_tf_idf:
            self.signals.PrintInfo.emit("?????? TF-IDF...")
            idf_filename = output_dir + 'tf_idf.csv'
            msg = self.calculate_and_write_tf_idf(idf_filename, input_texts)
            self.signals.PrintInfo.emit(msg)

        vectorizer = CountVectorizer()
        X = vectorizer.fit_transform(input_texts)

        svd = TruncatedSVD(2)
        normalizer = Normalizer(copy=False)
        lsa = make_pipeline(svd, normalizer)
        X = lsa.fit_transform(X)

        birch = Birch(threshold=self.birch_threshold,
                      branching_factor=self.birch_branching_factor,
                      n_clusters=self.birch_clusters_count)

        predict_result = birch.fit_predict(X)
        self.signals.PrintInfo.emit('\n??????? ?? ??????????:\n')

        clasters_output = ''
        for claster_index in range(max(predict_result) + 1):
            clasters_output += ('??????? ' + str(claster_index) + ':\n')
            for predict, document in zip(predict_result, short_filenames):
                if predict == claster_index:
                    clasters_output += ('  ' + str(document) + '\n')
            clasters_output += '\n'
        self.signals.PrintInfo.emit(clasters_output)
        self.signals.PrintInfo.emit('????????? ?:' + str(output_dir + 'clusters.txt'))
        writeStringToFile(clasters_output, output_dir + 'clusters.txt')

        self.draw_clusters_plot(X, predict_result, short_filenames)
项目:mlprojects-py    作者:srinathperera    | 项目源码 | 文件源码
def doPCA(X, output_columns_count):
    #DO PCA on the data and use it to transform
    svd = TruncatedSVD(output_columns_count)
    normalizer = Normalizer(copy=False)
    lsa = make_pipeline(svd, normalizer)

    X = lsa.fit_transform(X)
    return X
项目:PredictiveServer    作者:KeyboardNerd    | 项目源码 | 文件源码
def test_train():
        # define input streams
        names = ['v', 'p', 't', 'w', 'a']
        # define first transformation
        units = ["knot","in_Hg","celsius","force_pound","degree"]
        tounits = ["m/s", "pascal", "kelvin", "newton", "radian"]
        tuple_units = []
        for i, unit in enumerate(units):
            tuple_units.append((unit, tounits[i]))
        s1 = UnitTransformer(tuple_units)
        # second layer of transformation
        constants = {"s": 61.0, "R": 286.9}
        labels = ["2*w/(v**2*(p/R/t)*s)"]
        s2 = FormulaTransformer(labels, names, constants)
        # sink ( any sink transformation could be used to predict )
        # no fit_transform rule, can only predict
        features = ["a"]
        s3 = make_pipeline(FormulaTransformer(features, names), LinearRegression())
        # train the shit outof it
        with (open("data/training.csv")) as f:
            df = pd.read_csv(f, names=names, header=0)
            # awkward transformation from dataframe to numpy matrix
            # could use panda sklearn to solve
            ndarray = df.as_matrix(names)
            rawX = s1.fit_transform(ndarray)
            y = s2.fit_transform(rawX)
            X = rawX
            s3.fit(X, y)
            y_ = s3.predict(X)
            print X.shape, y_.shape
            #plt.scatter(FormulaTransformer(features, names).fit_transform(X), y_)
            #plt.show()
        # wrap the process as StreamPipeline for learning machine
        sp = StreamPipeline(names, s3)
        sp.predict(v=1.0, p=2.0, t=3.0, w=4.0, a=5.0)
项目:stegasawus    作者:rokkuran    | 项目源码 | 文件源码
def _get_pipeline(self, name):
        return make_pipeline(self.pipeline, classifiers[name])
项目:DocumentClassification    作者:bahmanh    | 项目源码 | 文件源码
def featuresByLSA(features,ncomponents=100):
    svd = TruncatedSVD(n_components=ncomponents)
    normalizer =  Normalizer(copy=False)
    lsa = make_pipeline(svd, normalizer)
    dtm_lsa = lsa.fit_transform(features)
    return dtm_lsa
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def fit_quadratic(X, y):
    """
    Uses OLS with Polynomial order 2.
    """
    model = make_pipeline(
        PolynomialFeatures(2), linear_model.LinearRegression()
    )
    model.fit(X, y)
    return model
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_relieff_pipeline():
    """Ensure that ReliefF works in a sklearn pipeline when it is parallelized"""
    np.random.seed(49082)

    clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=100, n_jobs=-1),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features, labels, cv=3)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_relieff_pipeline_parallel():
    """Ensure that ReliefF works in a sklearn pipeline where cross_val_score is parallelized"""
    np.random.seed(49082)

    clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=100),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features, labels, cv=3, n_jobs=-1)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_relieffpercent_pipeline():
    """Ensure that ReliefF with % neighbors works in a sklearn pipeline when it is parallelized"""
    np.random.seed(49082)

    clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=0.1, n_jobs=-1),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features, labels, cv=3)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_relieffpercent_pipeline_parallel():
    """Ensure that ReliefF with % neighbors works in a sklearn pipeline where cross_val_score is parallelized"""
    np.random.seed(49082)

    clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=0.1),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features, labels, cv=3, n_jobs=-1)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_surf_pipeline():
    """Ensure that SURF works in a sklearn pipeline when it is parallelized"""
    np.random.seed(240932)

    clf = make_pipeline(SURF(n_features_to_select=2, n_jobs=-1),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features, labels, cv=3)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_surfstar_pipeline():
    """Ensure that SURF* works in a sklearn pipeline when it is parallelized"""
    np.random.seed(9238745)

    clf = make_pipeline(SURFstar(n_features_to_select=2, n_jobs=-1),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features, labels, cv=3)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_surfstar_pipeline_parallel():
    """Ensure that SURF* works in a sklearn pipeline where cross_val_score is parallelized"""
    np.random.seed(9238745)

    clf = make_pipeline(SURFstar(n_features_to_select=2),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features, labels, cv=3, n_jobs=-1)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_multisurf_pipeline():
    """Ensure that MultiSURF works in a sklearn pipeline when it is parallelized"""
    np.random.seed(320931)

    clf = make_pipeline(MultiSURF(n_features_to_select=2, n_jobs=-1),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features, labels, cv=3)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_multisurf_pipeline_parallel():
    """Ensure that MultiSURF works in a sklearn pipeline where cross_val_score is parallelized"""
    np.random.seed(320931)

    clf = make_pipeline(MultiSURF(n_features_to_select=2),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features, labels, cv=3, n_jobs=-1)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_multisurfstar_pipeline():
    """Ensure that MultiSURF* works in a sklearn pipeline when it is parallelized"""
    np.random.seed(320931)

    clf = make_pipeline(MultiSURFstar(n_features_to_select=2, n_jobs=-1),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features, labels, cv=3)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_relieff_pipeline_cont_endpoint():
    """Ensure that ReliefF works in a sklearn pipeline with continuous endpoint data"""
    np.random.seed(49082)

    clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=100, n_jobs=-1),
                        RandomForestRegressor(n_estimators=100, n_jobs=-1))

    assert abs(np.mean(cross_val_score(clf, features_cont_endpoint, labels_cont_endpoint, cv=3))) < 0.5
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_relieff_pipeline_cont_endpoint():
    """Ensure that ReliefF with % neighbors works in a sklearn pipeline with continuous endpoint data"""
    np.random.seed(49082)

    clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=0.1, n_jobs=-1),
                        RandomForestRegressor(n_estimators=100, n_jobs=-1))

    assert abs(np.mean(cross_val_score(clf, features_cont_endpoint, labels_cont_endpoint, cv=3))) < 0.5
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_surf_pipeline_cont_endpoint():
    """Ensure that SURF works in a sklearn pipeline with continuous endpoint data"""
    np.random.seed(240932)

    clf = make_pipeline(SURF(n_features_to_select=2, n_jobs=-1),
                        RandomForestRegressor(n_estimators=100, n_jobs=-1))

    assert abs(np.mean(cross_val_score(clf, features_cont_endpoint, labels_cont_endpoint, cv=3))) < 0.5
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_surfstar_pipeline_cont_endpoint():
    """Ensure that SURF* works in a sklearn pipeline with continuous endpoint data"""
    np.random.seed(9238745)

    clf = make_pipeline(SURFstar(n_features_to_select=2, n_jobs=-1),
                        RandomForestRegressor(n_estimators=100, n_jobs=-1))

    assert abs(np.mean(cross_val_score(clf, features_cont_endpoint, labels_cont_endpoint, cv=3))) < 0.5
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_multisurf_pipeline_cont_endpoint():
    """Ensure that MultiSURF works in a sklearn pipeline with continuous endpoint data"""
    np.random.seed(320931)

    clf = make_pipeline(MultiSURF(n_features_to_select=2, n_jobs=-1),
                        RandomForestRegressor(n_estimators=100, n_jobs=-1))

    assert abs(np.mean(cross_val_score(clf, features_cont_endpoint, labels_cont_endpoint, cv=3))) < 0.5
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_relieff_pipeline_mixed_attributes():
    """Ensure that ReliefF works in a sklearn pipeline with mixed attributes"""
    np.random.seed(49082)

    clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=100, n_jobs=-1),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features_mixed_attributes, labels_mixed_attributes, cv=3)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_relieffpercent_pipeline_mixed_attributes():
    """Ensure that ReliefF with % neighbors works in a sklearn pipeline with mixed attributes"""
    np.random.seed(49082)

    clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=0.1, n_jobs=-1),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features_mixed_attributes, labels_mixed_attributes, cv=3)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_surf_pipeline_mixed_attributes():
    """Ensure that SURF works in a sklearn pipeline with mixed attributes"""
    np.random.seed(240932)

    clf = make_pipeline(SURF(n_features_to_select=2, n_jobs=-1),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features_mixed_attributes, labels_mixed_attributes, cv=3)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_surfstar_pipeline_mixed_attributes():
    """Ensure that SURF* works in a sklearn pipeline with mixed attributes"""
    np.random.seed(9238745)

    clf = make_pipeline(SURFstar(n_features_to_select=2, n_jobs=-1),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features_mixed_attributes, labels_mixed_attributes, cv=3)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_multisurf_pipeline_mixed_attributes():
    """Ensure that MultiSURF works in a sklearn pipeline with mixed attributes"""
    np.random.seed(320931)

    clf = make_pipeline(MultiSURF(n_features_to_select=2, n_jobs=-1),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features_mixed_attributes, labels_mixed_attributes, cv=3)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_relieff_pipeline_missing_values():
    """Ensure that ReliefF works in a sklearn pipeline with missing values"""
    np.random.seed(49082)

    clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=100, n_jobs=-1),
                        Imputer(),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features_missing_values, labels_missing_values, cv=3)) > 0.7
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def test_relieffpercent_pipeline_missing_values():
    """Ensure that ReliefF with % neighbors works in a sklearn pipeline with missing values"""
    np.random.seed(49082)

    clf = make_pipeline(ReliefF(n_features_to_select=2, n_neighbors=0.1, n_jobs=-1),
                        Imputer(),
                        RandomForestClassifier(n_estimators=100, n_jobs=-1))

    assert np.mean(cross_val_score(clf, features_missing_values, labels_missing_values, cv=3)) > 0.7