我们从Python开源项目中,提取了以下15个代码示例,用于说明如何使用sklearn.base.ClassifierMixin()。
def _get_child_predict(self, clf, X, index=None): if self.stack_by_proba and hasattr(clf, 'predict_proba'): if self.save_stage0 and index is not None: proba = util.saving_predict_proba(clf, X, index) else: proba = clf.predict_proba(X) return proba[:, 1:] elif hasattr(clf, 'predict'): predict_result = clf.predict(X) if isinstance(clf, ClassifierMixin): lb = LabelBinarizer() lb.fit(predict_result) return lb.fit_transform(predict_result) else: return predict_result.reshape((predict_result.size, 1)) else: return clf.fit_transform(X)
def test_tree_identical_labels(): rng = np.random.RandomState(0) for est in estimators: X = rng.randn(100, 5) y = np.ones(100) c_est = clone(est) c_est.set_params(min_samples_split=2, max_depth=None) c_est.fit(X, y) assert_equal(c_est.tree_.n_node_samples, [100]) if isinstance(c_est, ClassifierMixin): assert_equal(c_est.tree_.value, [[[100]]]) else: assert_equal(c_est.tree_.value, [[[1.0]]]) X = np.reshape(np.linspace(0.0, 1.0, 100), (-1, 1)) y = np.array([0.0]*50 + [1.0]*50) c_est.fit(X, y) leaf_ids = c_est.tree_.children_left == -1 assert_true(np.any(c_est.tree_.n_node_samples[leaf_ids] > 2))
def test_tree_identical_labels(): rng = np.random.RandomState(0) for ensemble in ensembles: X = rng.randn(100, 5) y = np.ones(100) ensemble.fit(X, y) for est in ensemble.estimators_: assert_equal(est.tree_.n_node_samples, [100]) if isinstance(est, ClassifierMixin): assert_equal(est.tree_.value, [[[100]]]) else: assert_equal(est.tree_.value, [[[1.0]]]) X = np.reshape(np.linspace(0.0, 1.0, 100), (-1, 1)) y = np.array([0.0]*50 + [1.0]*50) ensemble.fit(X, y) for est in ensemble.estimators_: leaf_ids = est.tree_.children_left == -1 assert_true(np.any(est.tree_.n_node_samples[leaf_ids] > 2))
def test_probabilities(model: ClassifierMixin, X: np.array, y: pd.Series, bins: int = 10, threshold: float = 0.5): """Print confusion matrix based on class probability.""" probs = [p[1] for p in model.predict_proba(X)] print('\tProbabilities') df = pd.DataFrame({'prob': probs, 'label': y}) step = 1 / bins cut_labels = [round(step * f, 1) for f in range(10)] by_prob = (df.groupby(pd.cut(df['prob'], bins, labels=cut_labels)) .agg(['sum', 'count'])['label']) print('\t\tprobs\t1\t0\tacc') for index, row in by_prob.iloc[::-1].iterrows(): ones = row['sum'] if math.isnan(ones): ones = 0 else: ones = int(ones) count = row['count'] zeros = int(count) - ones if count > 0: acc = zeros / count if index < threshold else ones / count else: acc = 0.0 print(f'\t\t{index}\t{ones}\t{zeros}\t{acc:.3f}')
def _get_blend_init(self, y_train, clf): if self.stack_by_proba and hasattr(clf, 'predict_proba'): width = self.n_classes_ - 1 elif hasattr(clf, 'predict') and isinstance(clf, ClassifierMixin): width = self.n_classes_ elif hasattr(clf, 'predict'): width = 1 elif hasattr(clf, 'n_components'): width = clf.n_components else: raise Exception('Unimplemented for {0}'.format(type(clf))) return np.zeros((y_train.size, width))
def __init__(self, metric='riemann', tsupdate=False, clf=LogisticRegression()): """Init.""" self.metric = metric self.tsupdate = tsupdate self.clf = clf if not isinstance(clf, ClassifierMixin): raise TypeError('clf must be a ClassifierMixin') TangentSpace(metric=self.metric, tsupdate=self.tsupdate)
def predict(self, X, check_input=True, return_std=False): """Predict class or regression value for X. For a classification model, the predicted class for each sample in X is returned. For a regression model, the predicted value based on X is returned. Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``. check_input : boolean, (default=True) Allow to bypass several input checking. Don't use this parameter unless you know what you do. return_std : boolean, (default=True) Whether or not to return the standard deviation. Returns ------- y : array of shape = [n_samples] or [n_samples, n_outputs] The predicted classes, or the predict values. """ check_is_fitted(self, 'tree_') X = self._validate_X_predict(X, check_input) # Classification if isinstance(self, ClassifierMixin): return self.classes_[self.predict_proba(X).argmax(axis=1)] # Regression else: mean_and_std = self.tree_.predict( X, return_std=return_std, is_regression=True) if return_std: return mean_and_std return mean_and_std[0]
def test_numerical_stability(): X = np.array([ [152.08097839, 140.40744019, 129.75102234, 159.90493774], [142.50700378, 135.81935120, 117.82884979, 162.75781250], [127.28772736, 140.40744019, 129.75102234, 159.90493774], [132.37025452, 143.71923828, 138.35694885, 157.84558105], [103.10237122, 143.71928406, 138.35696411, 157.84559631], [127.71276855, 143.71923828, 138.35694885, 157.84558105], [120.91514587, 140.40744019, 129.75102234, 159.90493774]]) y = np.array( [1., 0.70209277, 0.53896582, 0., 0.90914464, 0.48026916, 0.49622521]) with np.errstate(all="raise"): for est in estimators: new_est = clone(est) if isinstance(est, ClassifierMixin): y_curr = np.round(y) else: y_curr = y new_est.fit(X, y_curr) new_est.fit(X, -y_curr) new_est.fit(-X, y_curr) new_est.fit(-X, -y_curr) new_est.partial_fit(X, y_curr) new_est.partial_fit(-X, y_curr)
def variable_importance(estimator: Type[ClassifierMixin]) -> np.array: """Return variable importances for estimator.""" if hasattr(estimator, 'coef_'): return estimator.coef_[0] if hasattr(estimator, 'feature_importances_'): return estimator.feature_importances_
def score(self, X, y): """Force use of accuracy score since we don't inherit from ClassifierMixin""" from sklearn.metrics import accuracy_score return accuracy_score(y, self.predict(X))
def _generate_bases_test(est, pd_est): def test(self): self.assertTrue(isinstance(pd_est, FrameMixin), pd_est) self.assertFalse(isinstance(est, FrameMixin)) self.assertTrue(isinstance(pd_est, base.BaseEstimator)) try: mixins = [ base.ClassifierMixin, base.ClusterMixin, base.BiclusterMixin, base.TransformerMixin, base.DensityMixin, base.MetaEstimatorMixin, base.ClassifierMixin, base.RegressorMixin] except: if _sklearn_ver > 17: raise mixins = [ base.ClassifierMixin, base.ClusterMixin, base.BiclusterMixin, base.TransformerMixin, base.MetaEstimatorMixin, base.ClassifierMixin, base.RegressorMixin] for mixin in mixins: self.assertEqual( isinstance(pd_est, mixin), isinstance(est, mixin), mixin) return test
def get_params_for_est(estimator, name): '''Choose initialization parameters for an estimator for auto-testing''' is_classifier = ClassifierMixin in estimator.__mro__ is_cluster = ClusterMixin in estimator.__mro__ is_ensemble = BaseEnsemble in estimator.__mro__ uses_counts = any(c in name for c in USES_COUNTS) as_1d = name in REQUIRES_1D args, params, _ = get_args_kwargs_defaults(estimator.__init__) est_keys = set(('estimator', 'base_estimator', 'estimators')) est_keys = (set(params) | set(args)) & est_keys if is_classifier: score_func = feat.f_classif else: score_func = feat.f_regression for key in est_keys: if name == 'SelectFromModel': params[key] = sklearn.linear_model.LassoCV() elif is_classifier: params[key] = sklearn.tree.DecisionTreeClassifier() else: params[key] = sklearn.tree.DecisionTreeRegressor() if key == 'estimators': params[key] = [(str(_), clone(params[key])) for _ in range(10)] kw = dict(is_classifier=is_classifier, is_cluster=is_cluster, is_ensemble=is_ensemble, uses_counts=uses_counts) if 'score_func' in params: params['score_func'] = score_func X, y = make_X_y(**kw) return X, y, params, kw
def train_model(data: ArticleDB, learner: Type[ClassifierMixin], param_grid: dict, *, test_articles: Optional[ArticleDB] = None, most_important_features: bool = False, examples: bool = False, ground_truth_as_test: bool = False, probabilities: bool = False) -> ClassifierMixin: """Trains classifier learner on data and reports test set accuracy.""" if ground_truth_as_test and test_articles: raise ValueError('ground_truth_as_test must be False if test_articles' 'are supplied') if callable(learner): learner = learner() X, y = data.X, data.y if ground_truth_as_test or test_articles: X_train = X y_train = y if ground_truth_as_test: X_test = data.ground_truth_X y_test = data.ground_truth_y df_test = data.ground_truth elif test_articles: X_test = test_articles.X y_test = test_articles.y df_test = test_articles.df else: X_train, X_test, y_train, y_test, df_train, df_test = ( train_test_split(X, y, data.df, test_size=0.2)) model = GridSearchCV(learner, param_grid).fit(X_train, y_train) best_model = model.best_estimator_ preds = best_model.predict(X_test) conf_mat = confusion_matrix(y_test, preds, labels=[1, 0]) accuracy = np.mean(y_test == preds) learner_repr = repr(learner)[:repr(learner).find('(')] print(f'{learner_repr} with parameters {model.best_params_}:') print(f'\tval-accuracy: {model.best_score_}') print(f'\ttest-accuracy: {accuracy}') print(f'\tconfusion matrix: [{conf_mat[0]}') print(f'\t {conf_mat[1]}]') var_imp = variable_importance(model.best_estimator_) if most_important_features: print_top_vars(var_imp, 50, data.feature_names) if examples: article_examples(df_test, y_test, preds) if probabilities and hasattr(best_model, 'predict_proba'): test_probabilities(best_model, X_test, y_test) return best_model
def evaluate(self, point): """ Fits model using the particular setting of hyperparameters and evaluates the model validation data. Parameters ---------- * `point`: dict A mapping of parameter names to the corresponding values Returns ------- * `score`: float Score (more is better!) for some specific point """ X_train, y_train, X_test, y_test = ( self.X_train, self.y_train, self.X_test, self.y_test) # apply transformation to model parameters, for example exp transformation point_mapped = {} for param, val in point.items(): point_mapped[param] = self.space[param][1](val) model_instance = self.model(**point_mapped) if 'random_state' in model_instance.get_params(): model_instance.set_params(random_state=self.random_state) min_obj_val = -5.0 # Infeasible parameters are expected to raise an exception, thus the try # catch below, infeasible parameters yield assumed smallest objective. try: model_instance.fit(X_train, y_train) if isinstance(model_instance, RegressorMixin): # r^2 metric y_predicted = model_instance.predict(X_test) score = r2_score(y_test, y_predicted) elif isinstance(model_instance, ClassifierMixin): # log loss y_predicted = model_instance.predict_proba(X_test) score = -log_loss(y_test, y_predicted) # in the context of this function, the higher score is better # avoid any kind of singularitites, eg probability being zero, and thus breaking the log_loss if math.isnan(score): score = min_obj_val score = max(score, min_obj_val) # this is necessary to avoid -inf or NaN except BaseException as ex: score = min_obj_val # on error: return assumed smallest value of objective function return score # this is necessary to generate table for README in the end