Python sklearn.base 模块,ClassifierMixin() 实例源码

我们从Python开源项目中,提取了以下15个代码示例,用于说明如何使用sklearn.base.ClassifierMixin()

项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def _get_child_predict(self, clf, X, index=None):
        if self.stack_by_proba and hasattr(clf, 'predict_proba'):
            if self.save_stage0 and index is not None:
                proba = util.saving_predict_proba(clf, X, index)
            else:
                proba = clf.predict_proba(X)
            return proba[:, 1:]
        elif hasattr(clf, 'predict'):
            predict_result = clf.predict(X)
            if isinstance(clf, ClassifierMixin):
                lb = LabelBinarizer()
                lb.fit(predict_result)
                return lb.fit_transform(predict_result)
            else:
                return predict_result.reshape((predict_result.size, 1))
        else:
            return clf.fit_transform(X)
项目:scikit-garden    作者:scikit-garden    | 项目源码 | 文件源码
def test_tree_identical_labels():
    rng = np.random.RandomState(0)
    for est in estimators:
        X = rng.randn(100, 5)
        y = np.ones(100)
        c_est = clone(est)
        c_est.set_params(min_samples_split=2, max_depth=None)
        c_est.fit(X, y)
        assert_equal(c_est.tree_.n_node_samples, [100])
        if isinstance(c_est, ClassifierMixin):
            assert_equal(c_est.tree_.value, [[[100]]])
        else:
            assert_equal(c_est.tree_.value, [[[1.0]]])

        X = np.reshape(np.linspace(0.0, 1.0, 100), (-1, 1))
        y = np.array([0.0]*50 + [1.0]*50)
        c_est.fit(X, y)
        leaf_ids = c_est.tree_.children_left == -1
        assert_true(np.any(c_est.tree_.n_node_samples[leaf_ids] > 2))
项目:scikit-garden    作者:scikit-garden    | 项目源码 | 文件源码
def test_tree_identical_labels():
    rng = np.random.RandomState(0)
    for ensemble in ensembles:
        X = rng.randn(100, 5)
        y = np.ones(100)
        ensemble.fit(X, y)
        for est in ensemble.estimators_:
            assert_equal(est.tree_.n_node_samples, [100])

            if isinstance(est, ClassifierMixin):
                assert_equal(est.tree_.value, [[[100]]])
            else:
                assert_equal(est.tree_.value, [[[1.0]]])

        X = np.reshape(np.linspace(0.0, 1.0, 100), (-1, 1))
        y = np.array([0.0]*50 + [1.0]*50)
        ensemble.fit(X, y)
        for est in ensemble.estimators_:
            leaf_ids = est.tree_.children_left == -1
            assert_true(np.any(est.tree_.n_node_samples[leaf_ids] > 2))
项目:fake_news    作者:bmassman    | 项目源码 | 文件源码
def test_probabilities(model: ClassifierMixin, X: np.array, y: pd.Series,
                       bins: int = 10, threshold: float = 0.5):
    """Print confusion matrix based on class probability."""
    probs = [p[1] for p in model.predict_proba(X)]
    print('\tProbabilities')
    df = pd.DataFrame({'prob': probs, 'label': y})
    step = 1 / bins
    cut_labels = [round(step * f, 1) for f in range(10)]
    by_prob = (df.groupby(pd.cut(df['prob'], bins, labels=cut_labels))
                 .agg(['sum', 'count'])['label'])
    print('\t\tprobs\t1\t0\tacc')
    for index, row in by_prob.iloc[::-1].iterrows():
        ones = row['sum']
        if math.isnan(ones):
            ones = 0
        else:
            ones = int(ones)
        count = row['count']
        zeros = int(count) - ones
        if count > 0:
            acc = zeros / count if index < threshold else ones / count
        else:
            acc = 0.0
        print(f'\t\t{index}\t{ones}\t{zeros}\t{acc:.3f}')
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def _get_blend_init(self, y_train, clf):
        if self.stack_by_proba and hasattr(clf, 'predict_proba'):
            width = self.n_classes_ - 1
        elif hasattr(clf, 'predict') and isinstance(clf, ClassifierMixin):
            width = self.n_classes_
        elif hasattr(clf, 'predict'):
            width = 1
        elif hasattr(clf, 'n_components'):
            width = clf.n_components
        else:
            raise Exception('Unimplemented for {0}'.format(type(clf)))
        return np.zeros((y_train.size, width))
项目:decoding-brain-challenge-2016    作者:alexandrebarachant    | 项目源码 | 文件源码
def __init__(self, metric='riemann', tsupdate=False,
                 clf=LogisticRegression()):
        """Init."""
        self.metric = metric
        self.tsupdate = tsupdate
        self.clf = clf

        if not isinstance(clf, ClassifierMixin):
            raise TypeError('clf must be a ClassifierMixin')

        TangentSpace(metric=self.metric, tsupdate=self.tsupdate)
项目:scikit-garden    作者:scikit-garden    | 项目源码 | 文件源码
def predict(self, X, check_input=True, return_std=False):
        """Predict class or regression value for X.

        For a classification model, the predicted class for each sample in X is
        returned. For a regression model, the predicted value based on X is
        returned.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples. Internally, it will be converted to
            ``dtype=np.float32`` and if a sparse matrix is provided
            to a sparse ``csr_matrix``.

        check_input : boolean, (default=True)
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.

        return_std : boolean, (default=True)
            Whether or not to return the standard deviation.

        Returns
        -------
        y : array of shape = [n_samples] or [n_samples, n_outputs]
            The predicted classes, or the predict values.
        """
        check_is_fitted(self, 'tree_')
        X = self._validate_X_predict(X, check_input)

        # Classification
        if isinstance(self, ClassifierMixin):
            return self.classes_[self.predict_proba(X).argmax(axis=1)]

        # Regression
        else:
            mean_and_std = self.tree_.predict(
                X, return_std=return_std, is_regression=True)
            if return_std:
                return mean_and_std
            return mean_and_std[0]
项目:scikit-garden    作者:scikit-garden    | 项目源码 | 文件源码
def test_numerical_stability():
    X = np.array([
        [152.08097839, 140.40744019, 129.75102234, 159.90493774],
        [142.50700378, 135.81935120, 117.82884979, 162.75781250],
        [127.28772736, 140.40744019, 129.75102234, 159.90493774],
        [132.37025452, 143.71923828, 138.35694885, 157.84558105],
        [103.10237122, 143.71928406, 138.35696411, 157.84559631],
        [127.71276855, 143.71923828, 138.35694885, 157.84558105],
        [120.91514587, 140.40744019, 129.75102234, 159.90493774]])

    y = np.array(
        [1., 0.70209277, 0.53896582, 0., 0.90914464, 0.48026916, 0.49622521])

    with np.errstate(all="raise"):
        for est in estimators:
            new_est = clone(est)
            if isinstance(est, ClassifierMixin):
                y_curr = np.round(y)
            else:
                y_curr = y
            new_est.fit(X, y_curr)
            new_est.fit(X, -y_curr)
            new_est.fit(-X, y_curr)
            new_est.fit(-X, -y_curr)
            new_est.partial_fit(X, y_curr)
            new_est.partial_fit(-X, y_curr)
项目:fake_news    作者:bmassman    | 项目源码 | 文件源码
def variable_importance(estimator: Type[ClassifierMixin]) -> np.array:
    """Return variable importances for estimator."""
    if hasattr(estimator, 'coef_'):
        return estimator.coef_[0]
    if hasattr(estimator, 'feature_importances_'):
        return estimator.feature_importances_
项目:SVM-CNN    作者:dlmacedo    | 项目源码 | 文件源码
def score(self, X, y):
        """Force use of accuracy score since we don't inherit
           from ClassifierMixin"""

        from sklearn.metrics import accuracy_score
        return accuracy_score(y, self.predict(X))
项目:decoding_challenge_cortana_2016_3rd    作者:kingjr    | 项目源码 | 文件源码
def __init__(self, metric='riemann', tsupdate=False,
                 clf=LogisticRegression()):
        """Init."""
        self.metric = metric
        self.tsupdate = tsupdate
        self.clf = clf

        if not isinstance(clf, ClassifierMixin):
            raise TypeError('clf must be a ClassifierMixin')

        TangentSpace(metric=self.metric, tsupdate=self.tsupdate)
项目:ibex    作者:atavory    | 项目源码 | 文件源码
def _generate_bases_test(est, pd_est):
    def test(self):
        self.assertTrue(isinstance(pd_est, FrameMixin), pd_est)
        self.assertFalse(isinstance(est, FrameMixin))
        self.assertTrue(isinstance(pd_est, base.BaseEstimator))
        try:
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.DensityMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        except:
            if _sklearn_ver > 17:
                raise
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        for mixin in mixins:
            self.assertEqual(
                isinstance(pd_est, mixin),
                isinstance(est, mixin),
                mixin)

    return test
项目:elm    作者:ContinuumIO    | 项目源码 | 文件源码
def get_params_for_est(estimator, name):
    '''Choose initialization parameters for an estimator for auto-testing'''
    is_classifier = ClassifierMixin in estimator.__mro__
    is_cluster = ClusterMixin in estimator.__mro__
    is_ensemble = BaseEnsemble in estimator.__mro__
    uses_counts = any(c in name for c in USES_COUNTS)
    as_1d = name in REQUIRES_1D
    args, params, _ = get_args_kwargs_defaults(estimator.__init__)
    est_keys = set(('estimator', 'base_estimator', 'estimators'))
    est_keys = (set(params) | set(args)) & est_keys
    if is_classifier:
        score_func = feat.f_classif
    else:
        score_func = feat.f_regression
    for key in est_keys:
        if name == 'SelectFromModel':
            params[key] = sklearn.linear_model.LassoCV()
        elif is_classifier:
            params[key] = sklearn.tree.DecisionTreeClassifier()
        else:
            params[key] = sklearn.tree.DecisionTreeRegressor()
        if key == 'estimators':
            params[key] = [(str(_), clone(params[key])) for _ in range(10)]
    kw = dict(is_classifier=is_classifier, is_cluster=is_cluster,
              is_ensemble=is_ensemble, uses_counts=uses_counts)
    if 'score_func' in params:
        params['score_func'] = score_func
    X, y = make_X_y(**kw)
    return X, y, params, kw
项目:fake_news    作者:bmassman    | 项目源码 | 文件源码
def train_model(data: ArticleDB,
                learner: Type[ClassifierMixin],
                param_grid: dict, *,
                test_articles: Optional[ArticleDB] = None,
                most_important_features: bool = False,
                examples: bool = False,
                ground_truth_as_test: bool = False,
                probabilities: bool = False) -> ClassifierMixin:
    """Trains classifier learner on data and reports test set accuracy."""
    if ground_truth_as_test and test_articles:
        raise ValueError('ground_truth_as_test must be False if test_articles'
                         'are supplied')
    if callable(learner):
        learner = learner()
    X, y = data.X, data.y
    if ground_truth_as_test or test_articles:
        X_train = X
        y_train = y
    if ground_truth_as_test:
        X_test = data.ground_truth_X
        y_test = data.ground_truth_y
        df_test = data.ground_truth
    elif test_articles:
        X_test = test_articles.X
        y_test = test_articles.y
        df_test = test_articles.df
    else:
        X_train, X_test, y_train, y_test, df_train, df_test = (
            train_test_split(X, y, data.df, test_size=0.2))
    model = GridSearchCV(learner, param_grid).fit(X_train, y_train)
    best_model = model.best_estimator_
    preds = best_model.predict(X_test)
    conf_mat = confusion_matrix(y_test, preds, labels=[1, 0])
    accuracy = np.mean(y_test == preds)
    learner_repr = repr(learner)[:repr(learner).find('(')]
    print(f'{learner_repr} with parameters {model.best_params_}:')
    print(f'\tval-accuracy: {model.best_score_}')
    print(f'\ttest-accuracy: {accuracy}')
    print(f'\tconfusion matrix: [{conf_mat[0]}')
    print(f'\t                   {conf_mat[1]}]')
    var_imp = variable_importance(model.best_estimator_)
    if most_important_features:
        print_top_vars(var_imp, 50, data.feature_names)
    if examples:
        article_examples(df_test, y_test, preds)
    if probabilities and hasattr(best_model, 'predict_proba'):
        test_probabilities(best_model, X_test, y_test)
    return best_model
项目:scikit-optimize    作者:scikit-optimize    | 项目源码 | 文件源码
def evaluate(self, point):
        """
        Fits model using the particular setting of hyperparameters and
        evaluates the model validation data.

        Parameters
        ----------
        * `point`: dict
            A mapping of parameter names to the corresponding values

        Returns
        -------
        * `score`: float
            Score (more is better!) for some specific point
        """
        X_train, y_train, X_test, y_test = (
            self.X_train, self.y_train, self.X_test, self.y_test)

        # apply transformation to model parameters, for example exp transformation
        point_mapped = {}
        for param, val in point.items():
            point_mapped[param] = self.space[param][1](val)

        model_instance = self.model(**point_mapped)

        if 'random_state' in model_instance.get_params():
            model_instance.set_params(random_state=self.random_state)

        min_obj_val = -5.0

        # Infeasible parameters are expected to raise an exception, thus the try
        # catch below, infeasible parameters yield assumed smallest objective.
        try:
            model_instance.fit(X_train, y_train)
            if isinstance(model_instance, RegressorMixin): # r^2 metric
                y_predicted = model_instance.predict(X_test)
                score = r2_score(y_test, y_predicted)
            elif isinstance(model_instance, ClassifierMixin): # log loss
                y_predicted = model_instance.predict_proba(X_test)
                score = -log_loss(y_test, y_predicted) # in the context of this function, the higher score is better
            # avoid any kind of singularitites, eg probability being zero, and thus breaking the log_loss
            if math.isnan(score):
                score = min_obj_val
            score = max(score, min_obj_val) # this is necessary to avoid -inf or NaN
        except BaseException as ex:
            score = min_obj_val # on error: return assumed smallest value of objective function

        return score

# this is necessary to generate table for README in the end