我们从Python开源项目中,提取了以下30个代码示例,用于说明如何使用sklearn.utils.validation.check_random_state()。
def __init__(self, l1_ratios=(0.4, 0.8, 0.95), num_alphas=30, eps=1e-5, random_state=None, strategies=None, target_score=0.01, n_tail=5, decision="min", max_complexity=50, exponents=[1, 2], operators={}, n_jobs=1, rational=True, **kw): self.l1_ratios = l1_ratios self.num_alphas = num_alphas self.eps = eps self.random_state = check_random_state(random_state) self.strategies = strategies self.target_score = target_score self.n_tail = n_tail self.exponents = exponents self.operators = operators self.kw = kw self.decision = decision self.max_complexity = max_complexity self.n_jobs = n_jobs self.rational = rational
def create(cls, random_state=None): """ Each gene is picked with a uniform distribution from all allowed inputs or functions. :param random_state: an instance of np.random.RandomState, a seed integer or None :return: A random new class instance. """ random_state = check_random_state(random_state) n_in = len(cls.pset.terminals) operator_keys = list(range(n_in, max(cls.pset.mapping) + 1)) code = [] for i in range(cls.n_columns): column = [] for j in range(cls.n_rows): index = _code_index(n_in, cls.n_rows, i, j) in_ = cls._valid_inputs[index] gene = [random_state.choice(operator_keys)] + [random_state.choice(in_) for _ in range(cls.pset.max_arity)] column.append(gene) code.append(column) outputs = [random_state.choice(cls._valid_inputs[_out_index(cls.n_rows, cls.n_columns, n_in, o)]) for o in range(cls.n_out)] return cls(code, outputs)
def test_KNeighborsRegressor_multioutput_uniform_weight(): # Test k-neighbors in multi-output regression with uniform weight rng = check_random_state(0) n_features = 5 n_samples = 40 n_output = 4 X = rng.rand(n_samples, n_features) y = rng.rand(n_samples, n_output) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) for algorithm, weights in product(ALGORITHMS, [None, 'uniform']): knn = neighbors.KNeighborsRegressor(weights=weights, algorithm=algorithm) knn.fit(X_train, y_train) neigh_idx = knn.kneighbors(X_test, return_distance=False) y_pred_idx = np.array([np.mean(y_train[idx], axis=0) for idx in neigh_idx]) y_pred = knn.predict(X_test) assert_equal(y_pred.shape, y_test.shape) assert_equal(y_pred_idx.shape, y_test.shape) assert_array_almost_equal(y_pred, y_pred_idx)
def test_parallel_train(): rng = check_random_state(12321) n_samples, n_features = 80, 30 X_train = rng.randn(n_samples, n_features) y_train = rng.randint(0, 2, n_samples) clfs = [ RandomForestClassifier(n_estimators=20, n_jobs=n_jobs, random_state=12345).fit(X_train, y_train) for n_jobs in [1, 2, 3, 8, 16, 32] ] X_test = rng.randn(n_samples, n_features) probas = [clf.predict_proba(X_test) for clf in clfs] for proba1, proba2 in zip(probas, probas[1:]): assert_array_almost_equal(proba1, proba2)
def test_multioutput_regression_invariance_to_dimension_shuffling(): # test invariance to dimension shuffling random_state = check_random_state(0) y_true = random_state.uniform(0, 2, size=(20, 5)) y_pred = random_state.uniform(0, 2, size=(20, 5)) for name in MULTIOUTPUT_METRICS: metric = ALL_METRICS[name] error = metric(y_true, y_pred) for _ in range(3): perm = random_state.permutation(y_true.shape[1]) assert_almost_equal(metric(y_true[:, perm], y_pred[:, perm]), error, err_msg="%s is not dimension shuffling " "invariant" % name)
def check_zero_or_all_relevant_labels(lrap_score): random_state = check_random_state(0) for n_labels in range(2, 5): y_score = random_state.uniform(size=(1, n_labels)) y_score_ties = np.zeros_like(y_score) # No relevant labels y_true = np.zeros((1, n_labels)) assert_equal(lrap_score(y_true, y_score), 1.) assert_equal(lrap_score(y_true, y_score_ties), 1.) # Only relevant labels y_true = np.ones((1, n_labels)) assert_equal(lrap_score(y_true, y_score), 1.) assert_equal(lrap_score(y_true, y_score_ties), 1.) # Degenerate case: only one label assert_almost_equal(lrap_score([[1], [0], [1], [0]], [[0.5], [0.5], [0.5], [0.5]]), 1.)
def check_alternative_lrap_implementation(lrap_score, n_classes=5, n_samples=20, random_state=0): _, y_true = make_multilabel_classification(n_features=1, allow_unlabeled=False, random_state=random_state, n_classes=n_classes, n_samples=n_samples) # Score with ties y_score = sparse_random_matrix(n_components=y_true.shape[0], n_features=y_true.shape[1], random_state=random_state) if hasattr(y_score, "toarray"): y_score = y_score.toarray() score_lrap = label_ranking_average_precision_score(y_true, y_score) score_my_lrap = _my_lrap(y_true, y_score) assert_almost_equal(score_lrap, score_my_lrap) # Uniform score random_state = check_random_state(random_state) y_score = random_state.uniform(size=(n_samples, n_classes)) score_lrap = label_ranking_average_precision_score(y_true, y_score) score_my_lrap = _my_lrap(y_true, y_score) assert_almost_equal(score_lrap, score_my_lrap)
def _check_inputs(self, X, accept_sparse_negative=False): if isinstance(X, (pd.DataFrame, dd.DataFrame)): X = X.values if isinstance(X, np.ndarray): C = len(X) // min(multiprocessing.cpu_count(), 2) X = da.from_array(X, chunks=C) rng = check_random_state(self.random_state) # TODO: non-float dtypes? # TODO: sparse arrays? # TODO: mix of sparse, dense? sample = rng.uniform(size=(5, X.shape[1])).astype(X.dtype) super(QuantileTransformer, self)._check_inputs( sample, accept_sparse_negative=accept_sparse_negative) return X
def __init__(self, configuration, task, random_state=None): self.configuration = configuration self.task = task self._output_dtype = np.float32 if random_state is None: self.random_state = check_random_state(1) else: self.random_state = check_random_state(random_state)
def setUp(self): iris = datasets.load_iris() rng = check_random_state(0) perm = rng.permutation(iris.target.size) iris.data = iris.data[perm] iris.target = iris.target[perm] self.iris = iris
def setUp(self): iris = datasets.load_iris() rng = check_random_state(0) iris.data = iris.data iris.target = iris.target self.iris = iris for csv_file in glob.glob("*.csv"): os.remove(csv_file)
def __init__(self, operators=None, n_const=0, n_rows=1, n_columns=3, n_back=1, max_iter=1000, max_nfev=10000, lambda_=4, f_tol=0, seed=None, random_state=None, n_jobs=1, metric=mean_squared_error): """ :param operators: list of primitive excluding terminals :param n_const: number of symbolic constants :param n_rows: number of rows in the code block :param n_columns: number of columns in the code block :param n_back: number of rows to look back for connections :param metric: what to optimize for :param fun: `callable(individual)`, function to be optimized :param random_state: an instance of np.random.RandomState, a seed integer or None :param cls: The base class for individuals :type cls: (optional) instance of cartesian.cgp.Cartesian :param seed: (optional) can be passed instead of cls. :param lambda_: number of offspring per generation :param max_iter: maximum number of generations :param max_nfev: maximum number of function evaluations. Important, if fun is another optimizer :param f_tol: threshold for precision :param n_jobs: number of jobs for joblib embarrassingly easy parallel """ self.operators = DEFAULT_PRIMITIVES or operators self.constants = [Constant("c_{}".format(i)) for i in range(n_const)] self.n_rows = n_rows self.n_back = n_back self.n_columns = n_columns self.n_out = None self.pset = None self.res = None self.model = None # parameters for algorithm self.max_nfev = max_nfev self.max_iter = max_iter self.lambda_ = lambda_ self.f_tol = f_tol self.metric = metric self.random_state = check_random_state(random_state) self.n_jobs = n_jobs self.seed = seed
def point_mutation(individual, random_state=None): """ Randomly pick a gene in individual and mutate it. The mutation is either rewiring, i.e. changing the inputs, or changing the operator (head of gene) :param individual: instance of Base :type individual: instance of Cartesian :param random_state: an instance of np.random.RandomState, a seed integer or None :return: new instance of Base """ random_state = check_random_state(random_state) n_terminals = len(individual.pset.terminals) i = random_state.randint(n_terminals, len(individual) - 1) el, c, r, l = individual.mapping[i] gene = l[r] if isinstance(gene, list): new_gene = gene[:] j = random_state.randint(0, len(gene)) if j == 0: # function new_j = individual.pset.imapping[random_state.choice(individual.pset.operators)] else: # input new_j = random_state.choice(individual._valid_inputs[i]) new_gene[j] = new_j else: # output gene new_gene = random_state.randint(0, len(individual) - individual.n_out - 1) new_individual = copy.copy(individual) new_individual[i] = new_gene return new_individual
def test_algorithm_success(individual): cls = type(individual) fun = lambda x: 0 rng = check_random_state(0) res = oneplus(fun, random_state=rng, lambda_=4, max_iter=2, f_tol=-1, cls=cls) assert res.success == False res = oneplus(fun, random_state=rng, lambda_=4, max_iter=0, f_tol=0, cls=cls) assert res.success == True res = oneplus(fun, random_state=rng, lambda_=4, max_nfev=1, f_tol=-1, cls=cls) assert res.success == False
def test_only_constant_features(): random_state = check_random_state(0) X = np.zeros((10, 20)) y = random_state.randint(0, 2, (10, )) for name, TreeEstimator in ALL_TREES.items(): est = TreeEstimator(random_state=0) est.fit(X, y) assert_equal(est.tree_.max_depth, 0)
def test_RadiusNeighborsClassifier_multioutput(): # Test k-NN classifier on multioutput data rng = check_random_state(0) n_features = 2 n_samples = 40 n_output = 3 X = rng.rand(n_samples, n_features) y = rng.randint(0, 3, (n_samples, n_output)) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) weights = [None, 'uniform', 'distance', _weight_func] for algorithm, weights in product(ALGORITHMS, weights): # Stack single output prediction y_pred_so = [] for o in range(n_output): rnn = neighbors.RadiusNeighborsClassifier(weights=weights, algorithm=algorithm) rnn.fit(X_train, y_train[:, o]) y_pred_so.append(rnn.predict(X_test)) y_pred_so = np.vstack(y_pred_so).T assert_equal(y_pred_so.shape, y_test.shape) # Multioutput prediction rnn_mo = neighbors.RadiusNeighborsClassifier(weights=weights, algorithm=algorithm) rnn_mo.fit(X_train, y_train) y_pred_mo = rnn_mo.predict(X_test) assert_equal(y_pred_mo.shape, y_test.shape) assert_array_almost_equal(y_pred_mo, y_pred_so)
def test_RadiusNeighborsRegressor_multioutput_with_uniform_weight(): # Test radius neighbors in multi-output regression (uniform weight) rng = check_random_state(0) n_features = 5 n_samples = 40 n_output = 4 X = rng.rand(n_samples, n_features) y = rng.rand(n_samples, n_output) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) for algorithm, weights in product(ALGORITHMS, [None, 'uniform']): rnn = neighbors. RadiusNeighborsRegressor(weights=weights, algorithm=algorithm) rnn.fit(X_train, y_train) neigh_idx = rnn.radius_neighbors(X_test, return_distance=False) y_pred_idx = np.array([np.mean(y_train[idx], axis=0) for idx in neigh_idx]) y_pred_idx = np.array(y_pred_idx) y_pred = rnn.predict(X_test) assert_equal(y_pred_idx.shape, y_test.shape) assert_equal(y_pred.shape, y_test.shape) assert_array_almost_equal(y_pred, y_pred_idx)
def test_average_precision_score_score_non_binary_class(): # Test that average_precision_score function returns an error when trying # to compute average_precision_score for multiclass task. rng = check_random_state(404) y_pred = rng.rand(10) # y_true contains three different class values y_true = rng.randint(0, 3, size=10) assert_raise_message(ValueError, "multiclass format is not supported", average_precision_score, y_true, y_pred)
def test_symmetry(): # Test the symmetry of score and loss functions random_state = check_random_state(0) y_true = random_state.randint(0, 2, size=(20, )) y_pred = random_state.randint(0, 2, size=(20, )) # We shouldn't forget any metrics assert_equal(set(SYMMETRIC_METRICS).union( NOT_SYMMETRIC_METRICS, THRESHOLDED_METRICS, METRIC_UNDEFINED_BINARY_MULTICLASS), set(ALL_METRICS)) assert_equal( set(SYMMETRIC_METRICS).intersection(set(NOT_SYMMETRIC_METRICS)), set([])) # Symmetric metric for name in SYMMETRIC_METRICS: metric = ALL_METRICS[name] assert_almost_equal(metric(y_true, y_pred), metric(y_pred, y_true), err_msg="%s is not symmetric" % name) # Not symmetric metrics for name in NOT_SYMMETRIC_METRICS: metric = ALL_METRICS[name] assert_true(np.any(metric(y_true, y_pred) != metric(y_pred, y_true)), msg="%s seems to be symmetric" % name)
def test_sample_order_invariance_multilabel_and_multioutput(): random_state = check_random_state(0) # Generate some data y_true = random_state.randint(0, 2, size=(20, 25)) y_pred = random_state.randint(0, 2, size=(20, 25)) y_score = random_state.normal(size=y_true.shape) y_true_shuffle, y_pred_shuffle, y_score_shuffle = shuffle(y_true, y_pred, y_score, random_state=0) for name in MULTILABELS_METRICS: metric = ALL_METRICS[name] assert_almost_equal(metric(y_true, y_pred), metric(y_true_shuffle, y_pred_shuffle), err_msg="%s is not sample order invariant" % name) for name in THRESHOLDED_MULTILABEL_METRICS: metric = ALL_METRICS[name] assert_almost_equal(metric(y_true, y_score), metric(y_true_shuffle, y_score_shuffle), err_msg="%s is not sample order invariant" % name) for name in MULTIOUTPUT_METRICS: metric = ALL_METRICS[name] assert_almost_equal(metric(y_true, y_score), metric(y_true_shuffle, y_score_shuffle), err_msg="%s is not sample order invariant" % name) assert_almost_equal(metric(y_true, y_pred), metric(y_true_shuffle, y_pred_shuffle), err_msg="%s is not sample order invariant" % name)
def test_normalize_option_binary_classification(n_samples=20): # Test in the binary case random_state = check_random_state(0) y_true = random_state.randint(0, 2, size=(n_samples, )) y_pred = random_state.randint(0, 2, size=(n_samples, )) for name in METRICS_WITH_NORMALIZE_OPTION: metrics = ALL_METRICS[name] measure = metrics(y_true, y_pred, normalize=True) assert_greater(measure, 0, msg="We failed to test correctly the normalize option") assert_almost_equal(metrics(y_true, y_pred, normalize=False) / n_samples, measure)
def test_normalize_option_multiclasss_classification(): # Test in the multiclass case random_state = check_random_state(0) y_true = random_state.randint(0, 4, size=(20, )) y_pred = random_state.randint(0, 4, size=(20, )) n_samples = y_true.shape[0] for name in METRICS_WITH_NORMALIZE_OPTION: metrics = ALL_METRICS[name] measure = metrics(y_true, y_pred, normalize=True) assert_greater(measure, 0, msg="We failed to test correctly the normalize option") assert_almost_equal(metrics(y_true, y_pred, normalize=False) / n_samples, measure)
def test_averaging_multiclass(n_samples=50, n_classes=3): random_state = check_random_state(0) y_true = random_state.randint(0, n_classes, size=(n_samples, )) y_pred = random_state.randint(0, n_classes, size=(n_samples, )) y_score = random_state.uniform(size=(n_samples, n_classes)) lb = LabelBinarizer().fit(y_true) y_true_binarize = lb.transform(y_true) y_pred_binarize = lb.transform(y_pred) for name in METRICS_WITH_AVERAGING: yield (check_averaging, name, y_true, y_true_binarize, y_pred, y_pred_binarize, y_score)
def test_auc_score_non_binary_class(): # Test that roc_auc_score function returns an error when trying # to compute AUC for non-binary class values. rng = check_random_state(404) y_pred = rng.rand(10) # y_true contains only one class value y_true = np.zeros(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) y_true = np.ones(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) y_true = -np.ones(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) # y_true contains three different class values y_true = rng.randint(0, 3, size=10) assert_raise_message(ValueError, "multiclass format is not supported", roc_auc_score, y_true, y_pred) clean_warning_registry() with warnings.catch_warnings(record=True): rng = check_random_state(404) y_pred = rng.rand(10) # y_true contains only one class value y_true = np.zeros(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) y_true = np.ones(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) y_true = -np.ones(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) # y_true contains three different class values y_true = rng.randint(0, 3, size=10) assert_raise_message(ValueError, "multiclass format is not supported", roc_auc_score, y_true, y_pred)
def oneplus(fun, random_state=None, cls=None, lambda_=4, max_iter=100, max_nfev=None, f_tol=0, n_jobs=1, seed=None): """ 1 + lambda algorithm. In each generation, create lambda offspring and compare their fitness to the parent individual. The fittest individual carries over to the next generation. In case of a draw, the offspring is prefered. :param fun: `callable(individual)`, function to be optimized :param random_state: an instance of np.random.RandomState, a seed integer or None :param cls: The base class for individuals :type cls: (optional) instance of cartesian.cgp.Cartesian :param seed: (optional) can be passed instead of cls. :param lambda_: number of offspring per generation :param max_iter: maximum number of generations :param max_nfev: maximum number of function evaluations. Important, if fun is another optimizer :param f_tol: threshold for precision :param n_jobs: number of jobs for joblib embarrassingly easy parallel :return: scipy.optimize.OptimizeResult with non-standard attributes res.x = values for constants res.expr = expression res.fun = best value for the function """ max_iter = max_nfev if max_nfev else max_iter max_nfev = max_nfev or math.inf random_state = check_random_state(random_state) best = seed or cls.create(random_state=random_state) best_res = return_opt_result(fun, best) nfev = best_res.nfev res = OptimizeResult(expr=best, x=best_res.x, fun=best_res.fun, nit=0, nfev=nfev, success=False) if best_res.fun <= f_tol: res["success"] = True return res for i in range(1, max_iter): offspring = [point_mutation(best, random_state=random_state) for _ in range(lambda_)] # with Parallel(n_jobs=n_jobs) as parallel: # offspring_fitness = parallel(delayed(return_opt_result)(fun, o) for o in offspring) offspring_fitness = [return_opt_result(fun, o) for o in offspring] best, best_res = min(zip(offspring + [best], offspring_fitness + [best_res]), key=lambda x: x[1].fun) nfev += sum(of.nfev for of in offspring_fitness) res = OptimizeResult(expr=best, x=best_res.x, fun=best_res.fun, nit=i, nfev=nfev, success=False) if res.fun <= f_tol: res["success"] = True return res elif res.nfev >= max_nfev: return res return res
def test_KNeighborsClassifier_multioutput(): # Test k-NN classifier on multioutput data rng = check_random_state(0) n_features = 5 n_samples = 50 n_output = 3 X = rng.rand(n_samples, n_features) y = rng.randint(0, 3, (n_samples, n_output)) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) weights = [None, 'uniform', 'distance', _weight_func] for algorithm, weights in product(ALGORITHMS, weights): # Stack single output prediction y_pred_so = [] y_pred_proba_so = [] for o in range(n_output): knn = neighbors.KNeighborsClassifier(weights=weights, algorithm=algorithm) knn.fit(X_train, y_train[:, o]) y_pred_so.append(knn.predict(X_test)) y_pred_proba_so.append(knn.predict_proba(X_test)) y_pred_so = np.vstack(y_pred_so).T assert_equal(y_pred_so.shape, y_test.shape) assert_equal(len(y_pred_proba_so), n_output) # Multioutput prediction knn_mo = neighbors.KNeighborsClassifier(weights=weights, algorithm=algorithm) knn_mo.fit(X_train, y_train) y_pred_mo = knn_mo.predict(X_test) assert_equal(y_pred_mo.shape, y_test.shape) assert_array_almost_equal(y_pred_mo, y_pred_so) # Check proba y_pred_proba_mo = knn_mo.predict_proba(X_test) assert_equal(len(y_pred_proba_mo), n_output) for proba_mo, proba_so in zip(y_pred_proba_mo, y_pred_proba_so): assert_array_almost_equal(proba_mo, proba_so)
def test_distribution(): rng = check_random_state(12321) # Single variable with 4 values X = rng.randint(0, 4, size=(1000, 1)) y = rng.rand(1000) n_trees = 500 clf = ExtraTreesRegressor(n_estimators=n_trees, random_state=42).fit(X, y) uniques = defaultdict(int) for tree in clf.estimators_: tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-") for f, t in zip(tree.tree_.feature, tree.tree_.threshold)) uniques[tree] += 1 uniques = sorted([(1. * count / n_trees, tree) for tree, count in uniques.items()]) # On a single variable problem where X_0 has 4 equiprobable values, there # are 5 ways to build a random tree. The more compact (0,1/0,0/--0,2/--) of # them has probability 1/3 while the 4 others have probability 1/6. assert_equal(len(uniques), 5) assert_greater(0.20, uniques[0][0]) # Rough approximation of 1/6. assert_greater(0.20, uniques[1][0]) assert_greater(0.20, uniques[2][0]) assert_greater(0.20, uniques[3][0]) assert_greater(uniques[4][0], 0.3) assert_equal(uniques[4][1], "0,1/0,0/--0,2/--") # Two variables, one with 2 values, one with 3 values X = np.empty((1000, 2)) X[:, 0] = np.random.randint(0, 2, 1000) X[:, 1] = np.random.randint(0, 3, 1000) y = rng.rand(1000) clf = ExtraTreesRegressor(n_estimators=100, max_features=1, random_state=1).fit(X, y) uniques = defaultdict(int) for tree in clf.estimators_: tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-") for f, t in zip(tree.tree_.feature, tree.tree_.threshold)) uniques[tree] += 1 uniques = [(count, tree) for tree, count in uniques.items()] assert_equal(len(uniques), 8)
def make_prediction(dataset=None, binary=False): """Make some classification predictions on a toy dataset using a SVC If binary is True restrict to a binary classification problem instead of a multiclass classification problem """ if dataset is None: # import some data to play with dataset = datasets.load_iris() X = dataset.data y = dataset.target if binary: # restrict to a binary classification task X, y = X[y < 2], y[y < 2] n_samples, n_features = X.shape p = np.arange(n_samples) rng = check_random_state(37) rng.shuffle(p) X, y = X[p], y[p] half = int(n_samples / 2) # add noisy features to make the problem harder and avoid perfect results rng = np.random.RandomState(0) X = np.c_[X, rng.randn(n_samples, 200 * n_features)] # run classifier, get class probabilities and label predictions clf = svm.SVC(kernel='linear', probability=True, random_state=0) probas_pred = clf.fit(X[:half], y[:half]).predict_proba(X[half:]) if binary: # only interested in probabilities of the positive case # XXX: do we really want a special API for the binary case? probas_pred = probas_pred[:, 1] y_pred = clf.predict(X[half:]) y_true = y[half:] return y_true, y_pred, probas_pred ############################################################################### # Tests
def test_sample_weight_invariance(n_samples=50): random_state = check_random_state(0) # binary random_state = check_random_state(0) y_true = random_state.randint(0, 2, size=(n_samples, )) y_pred = random_state.randint(0, 2, size=(n_samples, )) y_score = random_state.random_sample(size=(n_samples,)) for name in ALL_METRICS: if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or name in METRIC_UNDEFINED_BINARY): continue metric = ALL_METRICS[name] if name in THRESHOLDED_METRICS: yield check_sample_weight_invariance, name, metric, y_true, y_score else: yield check_sample_weight_invariance, name, metric, y_true, y_pred # multiclass random_state = check_random_state(0) y_true = random_state.randint(0, 5, size=(n_samples, )) y_pred = random_state.randint(0, 5, size=(n_samples, )) y_score = random_state.random_sample(size=(n_samples, 5)) for name in ALL_METRICS: if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or name in METRIC_UNDEFINED_BINARY_MULTICLASS): continue metric = ALL_METRICS[name] if name in THRESHOLDED_METRICS: yield check_sample_weight_invariance, name, metric, y_true, y_score else: yield check_sample_weight_invariance, name, metric, y_true, y_pred # multilabel indicator _, ya = make_multilabel_classification(n_features=1, n_classes=20, random_state=0, n_samples=100, allow_unlabeled=False) _, yb = make_multilabel_classification(n_features=1, n_classes=20, random_state=1, n_samples=100, allow_unlabeled=False) y_true = np.vstack([ya, yb]) y_pred = np.vstack([ya, ya]) y_score = random_state.randint(1, 4, size=y_true.shape) for name in (MULTILABELS_METRICS + THRESHOLDED_MULTILABEL_METRICS + MULTIOUTPUT_METRICS): if name in METRICS_WITHOUT_SAMPLE_WEIGHT: continue metric = ALL_METRICS[name] if name in THRESHOLDED_METRICS: yield (check_sample_weight_invariance, name, metric, y_true, y_score) else: yield (check_sample_weight_invariance, name, metric, y_true, y_pred)