Python sklearn.utils.validation 模块,check_random_state() 实例源码

我们从Python开源项目中,提取了以下30个代码示例,用于说明如何使用sklearn.utils.validation.check_random_state()

项目:sparsereg    作者:Ohjeah    | 项目源码 | 文件源码
def __init__(self, l1_ratios=(0.4, 0.8, 0.95), num_alphas=30,
                 eps=1e-5, random_state=None, strategies=None, target_score=0.01,
                 n_tail=5, decision="min", max_complexity=50,
                 exponents=[1, 2], operators={}, n_jobs=1, rational=True, **kw):

        self.l1_ratios = l1_ratios
        self.num_alphas = num_alphas
        self.eps = eps
        self.random_state = check_random_state(random_state)
        self.strategies = strategies
        self.target_score = target_score
        self.n_tail = n_tail
        self.exponents = exponents
        self.operators = operators
        self.kw = kw
        self.decision = decision
        self.max_complexity = max_complexity
        self.n_jobs = n_jobs
        self.rational = rational
项目:cartesian    作者:Ohjeah    | 项目源码 | 文件源码
def create(cls, random_state=None):
        """
        Each gene is picked with a uniform distribution from all allowed inputs or functions.
        :param random_state: an instance of np.random.RandomState, a seed integer or None
        :return: A random new class instance.
        """
        random_state = check_random_state(random_state)
        n_in = len(cls.pset.terminals)
        operator_keys = list(range(n_in, max(cls.pset.mapping) + 1))
        code = []
        for i in range(cls.n_columns):
            column = []
            for j in range(cls.n_rows):
                index = _code_index(n_in, cls.n_rows, i, j)
                in_ = cls._valid_inputs[index]
                gene = [random_state.choice(operator_keys)] + [random_state.choice(in_) for _ in range(cls.pset.max_arity)]
                column.append(gene)
            code.append(column)
        outputs = [random_state.choice(cls._valid_inputs[_out_index(cls.n_rows, cls.n_columns, n_in, o)])
                   for o in range(cls.n_out)]
        return cls(code, outputs)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_KNeighborsRegressor_multioutput_uniform_weight():
    # Test k-neighbors in multi-output regression with uniform weight
    rng = check_random_state(0)
    n_features = 5
    n_samples = 40
    n_output = 4

    X = rng.rand(n_samples, n_features)
    y = rng.rand(n_samples, n_output)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    for algorithm, weights in product(ALGORITHMS, [None, 'uniform']):
        knn = neighbors.KNeighborsRegressor(weights=weights,
                                            algorithm=algorithm)
        knn.fit(X_train, y_train)

        neigh_idx = knn.kneighbors(X_test, return_distance=False)
        y_pred_idx = np.array([np.mean(y_train[idx], axis=0)
                               for idx in neigh_idx])

        y_pred = knn.predict(X_test)

        assert_equal(y_pred.shape, y_test.shape)
        assert_equal(y_pred_idx.shape, y_test.shape)
        assert_array_almost_equal(y_pred, y_pred_idx)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_parallel_train():
    rng = check_random_state(12321)
    n_samples, n_features = 80, 30
    X_train = rng.randn(n_samples, n_features)
    y_train = rng.randint(0, 2, n_samples)

    clfs = [
        RandomForestClassifier(n_estimators=20, n_jobs=n_jobs,
                               random_state=12345).fit(X_train, y_train)
        for n_jobs in [1, 2, 3, 8, 16, 32]
    ]

    X_test = rng.randn(n_samples, n_features)
    probas = [clf.predict_proba(X_test) for clf in clfs]
    for proba1, proba2 in zip(probas, probas[1:]):
        assert_array_almost_equal(proba1, proba2)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_multioutput_regression_invariance_to_dimension_shuffling():
    # test invariance to dimension shuffling
    random_state = check_random_state(0)
    y_true = random_state.uniform(0, 2, size=(20, 5))
    y_pred = random_state.uniform(0, 2, size=(20, 5))

    for name in MULTIOUTPUT_METRICS:
        metric = ALL_METRICS[name]
        error = metric(y_true, y_pred)

        for _ in range(3):
            perm = random_state.permutation(y_true.shape[1])
            assert_almost_equal(metric(y_true[:, perm], y_pred[:, perm]),
                                error,
                                err_msg="%s is not dimension shuffling "
                                        "invariant" % name)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def check_zero_or_all_relevant_labels(lrap_score):
    random_state = check_random_state(0)

    for n_labels in range(2, 5):
        y_score = random_state.uniform(size=(1, n_labels))
        y_score_ties = np.zeros_like(y_score)

        # No relevant labels
        y_true = np.zeros((1, n_labels))
        assert_equal(lrap_score(y_true, y_score), 1.)
        assert_equal(lrap_score(y_true, y_score_ties), 1.)

        # Only relevant labels
        y_true = np.ones((1, n_labels))
        assert_equal(lrap_score(y_true, y_score), 1.)
        assert_equal(lrap_score(y_true, y_score_ties), 1.)

    # Degenerate case: only one label
    assert_almost_equal(lrap_score([[1], [0], [1], [0]],
                                   [[0.5], [0.5], [0.5], [0.5]]), 1.)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def check_alternative_lrap_implementation(lrap_score, n_classes=5,
                                          n_samples=20, random_state=0):
    _, y_true = make_multilabel_classification(n_features=1,
                                               allow_unlabeled=False,
                                               random_state=random_state,
                                               n_classes=n_classes,
                                               n_samples=n_samples)

    # Score with ties
    y_score = sparse_random_matrix(n_components=y_true.shape[0],
                                   n_features=y_true.shape[1],
                                   random_state=random_state)

    if hasattr(y_score, "toarray"):
        y_score = y_score.toarray()
    score_lrap = label_ranking_average_precision_score(y_true, y_score)
    score_my_lrap = _my_lrap(y_true, y_score)
    assert_almost_equal(score_lrap, score_my_lrap)

    # Uniform score
    random_state = check_random_state(random_state)
    y_score = random_state.uniform(size=(n_samples, n_classes))
    score_lrap = label_ranking_average_precision_score(y_true, y_score)
    score_my_lrap = _my_lrap(y_true, y_score)
    assert_almost_equal(score_lrap, score_my_lrap)
项目:dask-ml    作者:dask    | 项目源码 | 文件源码
def _check_inputs(self, X, accept_sparse_negative=False):
        if isinstance(X, (pd.DataFrame, dd.DataFrame)):
            X = X.values
        if isinstance(X, np.ndarray):
            C = len(X) // min(multiprocessing.cpu_count(), 2)
            X = da.from_array(X, chunks=C)

        rng = check_random_state(self.random_state)
        # TODO: non-float dtypes?
        # TODO: sparse arrays?
        # TODO: mix of sparse, dense?
        sample = rng.uniform(size=(5, X.shape[1])).astype(X.dtype)
        super(QuantileTransformer, self)._check_inputs(
            sample, accept_sparse_negative=accept_sparse_negative)
        return X
项目:AutoML-Challenge    作者:postech-mlg-exbrain    | 项目源码 | 文件源码
def __init__(self, configuration, task, random_state=None):
        self.configuration = configuration
        self.task = task
        self._output_dtype = np.float32

        if random_state is None:
            self.random_state = check_random_state(1)
        else:
            self.random_state = check_random_state(random_state)
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def setUp(self):
        iris = datasets.load_iris()
        rng = check_random_state(0)
        perm = rng.permutation(iris.target.size)
        iris.data = iris.data[perm]
        iris.target = iris.target[perm]
        self.iris = iris
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def setUp(self):
        iris = datasets.load_iris()
        rng = check_random_state(0)
        iris.data = iris.data
        iris.target = iris.target
        self.iris = iris
        for csv_file in glob.glob("*.csv"):
            os.remove(csv_file)
项目:cartesian    作者:Ohjeah    | 项目源码 | 文件源码
def __init__(self, operators=None, n_const=0, n_rows=1, n_columns=3, n_back=1, max_iter=1000,
                 max_nfev=10000, lambda_=4, f_tol=0, seed=None, random_state=None, n_jobs=1, metric=mean_squared_error):
        """
        :param operators: list of primitive excluding terminals
        :param n_const: number of symbolic constants
        :param n_rows: number of rows in the code block
        :param n_columns: number of columns in the code block
        :param n_back: number of rows to look back for connections
        :param metric: what to optimize for
        :param fun: `callable(individual)`, function to be optimized
        :param random_state: an instance of np.random.RandomState, a seed integer or None
        :param cls: The base class for individuals
        :type cls: (optional) instance of cartesian.cgp.Cartesian
        :param seed: (optional) can be passed instead of cls.
        :param lambda_: number of offspring per generation
        :param max_iter: maximum number of generations
        :param max_nfev: maximum number of function evaluations. Important, if fun is another optimizer
        :param f_tol: threshold for precision
        :param n_jobs: number of jobs for joblib embarrassingly easy parallel
        """
        self.operators = DEFAULT_PRIMITIVES or operators
        self.constants = [Constant("c_{}".format(i)) for i in range(n_const)]
        self.n_rows = n_rows
        self.n_back = n_back
        self.n_columns = n_columns
        self.n_out = None
        self.pset = None
        self.res = None
        self.model = None

        # parameters for algorithm
        self.max_nfev = max_nfev
        self.max_iter = max_iter
        self.lambda_ = lambda_
        self.f_tol = f_tol
        self.metric = metric
        self.random_state = check_random_state(random_state)
        self.n_jobs = n_jobs
        self.seed = seed
项目:cartesian    作者:Ohjeah    | 项目源码 | 文件源码
def point_mutation(individual, random_state=None):
    """
    Randomly pick a gene in individual and mutate it.
    The mutation is either rewiring, i.e. changing the inputs, or changing the operator (head of gene)
    :param individual: instance of Base
    :type individual: instance of Cartesian
    :param random_state: an instance of np.random.RandomState, a seed integer or None
    :return: new instance of Base
    """
    random_state = check_random_state(random_state)
    n_terminals = len(individual.pset.terminals)
    i = random_state.randint(n_terminals, len(individual) - 1)
    el, c, r, l = individual.mapping[i]
    gene = l[r]
    if isinstance(gene, list):
        new_gene = gene[:]
        j = random_state.randint(0, len(gene))
        if j == 0:  # function
            new_j = individual.pset.imapping[random_state.choice(individual.pset.operators)]
        else:       # input
            new_j = random_state.choice(individual._valid_inputs[i])
        new_gene[j] = new_j

    else: # output gene
        new_gene = random_state.randint(0, len(individual) - individual.n_out - 1)
    new_individual = copy.copy(individual)
    new_individual[i] = new_gene
    return new_individual
项目:cartesian    作者:Ohjeah    | 项目源码 | 文件源码
def test_algorithm_success(individual):
    cls = type(individual)
    fun = lambda x: 0
    rng = check_random_state(0)
    res = oneplus(fun, random_state=rng, lambda_=4, max_iter=2, f_tol=-1, cls=cls)
    assert res.success == False
    res = oneplus(fun, random_state=rng, lambda_=4, max_iter=0, f_tol=0, cls=cls)
    assert res.success == True
    res = oneplus(fun, random_state=rng, lambda_=4, max_nfev=1, f_tol=-1, cls=cls)
    assert res.success == False
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_only_constant_features():
    random_state = check_random_state(0)
    X = np.zeros((10, 20))
    y = random_state.randint(0, 2, (10, ))
    for name, TreeEstimator in ALL_TREES.items():
        est = TreeEstimator(random_state=0)
        est.fit(X, y)
        assert_equal(est.tree_.max_depth, 0)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_RadiusNeighborsClassifier_multioutput():
    # Test k-NN classifier on multioutput data
    rng = check_random_state(0)
    n_features = 2
    n_samples = 40
    n_output = 3

    X = rng.rand(n_samples, n_features)
    y = rng.randint(0, 3, (n_samples, n_output))

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    weights = [None, 'uniform', 'distance', _weight_func]

    for algorithm, weights in product(ALGORITHMS, weights):
        # Stack single output prediction
        y_pred_so = []
        for o in range(n_output):
            rnn = neighbors.RadiusNeighborsClassifier(weights=weights,
                                                      algorithm=algorithm)
            rnn.fit(X_train, y_train[:, o])
            y_pred_so.append(rnn.predict(X_test))

        y_pred_so = np.vstack(y_pred_so).T
        assert_equal(y_pred_so.shape, y_test.shape)

        # Multioutput prediction
        rnn_mo = neighbors.RadiusNeighborsClassifier(weights=weights,
                                                     algorithm=algorithm)
        rnn_mo.fit(X_train, y_train)
        y_pred_mo = rnn_mo.predict(X_test)

        assert_equal(y_pred_mo.shape, y_test.shape)
        assert_array_almost_equal(y_pred_mo, y_pred_so)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_RadiusNeighborsRegressor_multioutput_with_uniform_weight():
    # Test radius neighbors in multi-output regression (uniform weight)

    rng = check_random_state(0)
    n_features = 5
    n_samples = 40
    n_output = 4

    X = rng.rand(n_samples, n_features)
    y = rng.rand(n_samples, n_output)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    for algorithm, weights in product(ALGORITHMS, [None, 'uniform']):

        rnn = neighbors. RadiusNeighborsRegressor(weights=weights,
                                                  algorithm=algorithm)
        rnn.fit(X_train, y_train)

        neigh_idx = rnn.radius_neighbors(X_test, return_distance=False)
        y_pred_idx = np.array([np.mean(y_train[idx], axis=0)
                               for idx in neigh_idx])

        y_pred_idx = np.array(y_pred_idx)
        y_pred = rnn.predict(X_test)

        assert_equal(y_pred_idx.shape, y_test.shape)
        assert_equal(y_pred.shape, y_test.shape)
        assert_array_almost_equal(y_pred, y_pred_idx)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_average_precision_score_score_non_binary_class():
    # Test that average_precision_score function returns an error when trying
    # to compute average_precision_score for multiclass task.
    rng = check_random_state(404)
    y_pred = rng.rand(10)

    # y_true contains three different class values
    y_true = rng.randint(0, 3, size=10)
    assert_raise_message(ValueError, "multiclass format is not supported",
                         average_precision_score, y_true, y_pred)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_symmetry():
    # Test the symmetry of score and loss functions
    random_state = check_random_state(0)
    y_true = random_state.randint(0, 2, size=(20, ))
    y_pred = random_state.randint(0, 2, size=(20, ))

    # We shouldn't forget any metrics
    assert_equal(set(SYMMETRIC_METRICS).union(
        NOT_SYMMETRIC_METRICS, THRESHOLDED_METRICS,
        METRIC_UNDEFINED_BINARY_MULTICLASS), set(ALL_METRICS))

    assert_equal(
        set(SYMMETRIC_METRICS).intersection(set(NOT_SYMMETRIC_METRICS)),
        set([]))

    # Symmetric metric
    for name in SYMMETRIC_METRICS:
        metric = ALL_METRICS[name]
        assert_almost_equal(metric(y_true, y_pred),
                            metric(y_pred, y_true),
                            err_msg="%s is not symmetric" % name)

    # Not symmetric metrics
    for name in NOT_SYMMETRIC_METRICS:
        metric = ALL_METRICS[name]
        assert_true(np.any(metric(y_true, y_pred) != metric(y_pred, y_true)),
                    msg="%s seems to be symmetric" % name)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_sample_order_invariance_multilabel_and_multioutput():
    random_state = check_random_state(0)

    # Generate some data
    y_true = random_state.randint(0, 2, size=(20, 25))
    y_pred = random_state.randint(0, 2, size=(20, 25))
    y_score = random_state.normal(size=y_true.shape)

    y_true_shuffle, y_pred_shuffle, y_score_shuffle = shuffle(y_true,
                                                              y_pred,
                                                              y_score,
                                                              random_state=0)

    for name in MULTILABELS_METRICS:
        metric = ALL_METRICS[name]
        assert_almost_equal(metric(y_true, y_pred),
                            metric(y_true_shuffle, y_pred_shuffle),
                            err_msg="%s is not sample order invariant"
                                    % name)

    for name in THRESHOLDED_MULTILABEL_METRICS:
        metric = ALL_METRICS[name]
        assert_almost_equal(metric(y_true, y_score),
                            metric(y_true_shuffle, y_score_shuffle),
                            err_msg="%s is not sample order invariant"
                                    % name)

    for name in MULTIOUTPUT_METRICS:
        metric = ALL_METRICS[name]
        assert_almost_equal(metric(y_true, y_score),
                            metric(y_true_shuffle, y_score_shuffle),
                            err_msg="%s is not sample order invariant"
                                    % name)
        assert_almost_equal(metric(y_true, y_pred),
                            metric(y_true_shuffle, y_pred_shuffle),
                            err_msg="%s is not sample order invariant"
                                    % name)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_normalize_option_binary_classification(n_samples=20):
    # Test in the binary case
    random_state = check_random_state(0)
    y_true = random_state.randint(0, 2, size=(n_samples, ))
    y_pred = random_state.randint(0, 2, size=(n_samples, ))

    for name in METRICS_WITH_NORMALIZE_OPTION:
        metrics = ALL_METRICS[name]
        measure = metrics(y_true, y_pred, normalize=True)
        assert_greater(measure, 0,
                       msg="We failed to test correctly the normalize option")
        assert_almost_equal(metrics(y_true, y_pred, normalize=False)
                            / n_samples, measure)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_normalize_option_multiclasss_classification():
    # Test in the multiclass case
    random_state = check_random_state(0)
    y_true = random_state.randint(0, 4, size=(20, ))
    y_pred = random_state.randint(0, 4, size=(20, ))
    n_samples = y_true.shape[0]

    for name in METRICS_WITH_NORMALIZE_OPTION:
        metrics = ALL_METRICS[name]
        measure = metrics(y_true, y_pred, normalize=True)
        assert_greater(measure, 0,
                       msg="We failed to test correctly the normalize option")
        assert_almost_equal(metrics(y_true, y_pred, normalize=False)
                            / n_samples, measure)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_averaging_multiclass(n_samples=50, n_classes=3):
    random_state = check_random_state(0)
    y_true = random_state.randint(0, n_classes, size=(n_samples, ))
    y_pred = random_state.randint(0, n_classes, size=(n_samples, ))
    y_score = random_state.uniform(size=(n_samples, n_classes))

    lb = LabelBinarizer().fit(y_true)
    y_true_binarize = lb.transform(y_true)
    y_pred_binarize = lb.transform(y_pred)

    for name in METRICS_WITH_AVERAGING:
        yield (check_averaging, name, y_true, y_true_binarize, y_pred,
               y_pred_binarize, y_score)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_auc_score_non_binary_class():
    # Test that roc_auc_score function returns an error when trying
    # to compute AUC for non-binary class values.
    rng = check_random_state(404)
    y_pred = rng.rand(10)
    # y_true contains only one class value
    y_true = np.zeros(10, dtype="int")
    assert_raise_message(ValueError, "ROC AUC score is not defined",
                         roc_auc_score, y_true, y_pred)
    y_true = np.ones(10, dtype="int")
    assert_raise_message(ValueError, "ROC AUC score is not defined",
                         roc_auc_score, y_true, y_pred)
    y_true = -np.ones(10, dtype="int")
    assert_raise_message(ValueError, "ROC AUC score is not defined",
                         roc_auc_score, y_true, y_pred)
    # y_true contains three different class values
    y_true = rng.randint(0, 3, size=10)
    assert_raise_message(ValueError, "multiclass format is not supported",
                         roc_auc_score, y_true, y_pred)

    clean_warning_registry()
    with warnings.catch_warnings(record=True):
        rng = check_random_state(404)
        y_pred = rng.rand(10)
        # y_true contains only one class value
        y_true = np.zeros(10, dtype="int")
        assert_raise_message(ValueError, "ROC AUC score is not defined",
                             roc_auc_score, y_true, y_pred)
        y_true = np.ones(10, dtype="int")
        assert_raise_message(ValueError, "ROC AUC score is not defined",
                             roc_auc_score, y_true, y_pred)
        y_true = -np.ones(10, dtype="int")
        assert_raise_message(ValueError, "ROC AUC score is not defined",
                             roc_auc_score, y_true, y_pred)

        # y_true contains three different class values
        y_true = rng.randint(0, 3, size=10)
        assert_raise_message(ValueError, "multiclass format is not supported",
                             roc_auc_score, y_true, y_pred)
项目:cartesian    作者:Ohjeah    | 项目源码 | 文件源码
def oneplus(fun, random_state=None, cls=None, lambda_=4, max_iter=100,
            max_nfev=None, f_tol=0, n_jobs=1, seed=None):

    """
    1 + lambda algorithm.
    In each generation, create lambda offspring and compare their fitness to the parent individual.
    The fittest individual carries over to the next generation. In case of a draw, the offspring is prefered.


    :param fun: `callable(individual)`, function to be optimized
    :param random_state: an instance of np.random.RandomState, a seed integer or None
    :param cls: The base class for individuals
    :type cls: (optional) instance of cartesian.cgp.Cartesian
    :param seed: (optional) can be passed instead of cls.
    :param lambda_: number of offspring per generation
    :param max_iter: maximum number of generations
    :param max_nfev: maximum number of function evaluations. Important, if fun is another optimizer
    :param f_tol: threshold for precision
    :param n_jobs: number of jobs for joblib embarrassingly easy parallel

    :return: scipy.optimize.OptimizeResult with non-standard attributes
    res.x = values for constants
    res.expr = expression
    res.fun = best value for the function
    """
    max_iter = max_nfev if max_nfev else max_iter
    max_nfev = max_nfev or math.inf

    random_state = check_random_state(random_state)

    best = seed or cls.create(random_state=random_state)
    best_res = return_opt_result(fun, best)

    nfev = best_res.nfev
    res = OptimizeResult(expr=best, x=best_res.x, fun=best_res.fun, nit=0, nfev=nfev, success=False)

    if best_res.fun <= f_tol:
        res["success"] = True
        return res

    for i in range(1, max_iter):
        offspring = [point_mutation(best, random_state=random_state) for _ in range(lambda_)]

        # with Parallel(n_jobs=n_jobs) as parallel:
        #         offspring_fitness = parallel(delayed(return_opt_result)(fun, o) for o in offspring)
        offspring_fitness = [return_opt_result(fun, o) for o in offspring]
        best, best_res = min(zip(offspring + [best], offspring_fitness + [best_res]), key=lambda x: x[1].fun)
        nfev += sum(of.nfev for of in offspring_fitness)

        res = OptimizeResult(expr=best, x=best_res.x, fun=best_res.fun, nit=i, nfev=nfev, success=False)
        if res.fun <= f_tol:
            res["success"] = True
            return res
        elif res.nfev >= max_nfev:
            return res

    return res
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_KNeighborsClassifier_multioutput():
    # Test k-NN classifier on multioutput data
    rng = check_random_state(0)
    n_features = 5
    n_samples = 50
    n_output = 3

    X = rng.rand(n_samples, n_features)
    y = rng.randint(0, 3, (n_samples, n_output))

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    weights = [None, 'uniform', 'distance', _weight_func]

    for algorithm, weights in product(ALGORITHMS, weights):
        # Stack single output prediction
        y_pred_so = []
        y_pred_proba_so = []
        for o in range(n_output):
            knn = neighbors.KNeighborsClassifier(weights=weights,
                                                 algorithm=algorithm)
            knn.fit(X_train, y_train[:, o])
            y_pred_so.append(knn.predict(X_test))
            y_pred_proba_so.append(knn.predict_proba(X_test))

        y_pred_so = np.vstack(y_pred_so).T
        assert_equal(y_pred_so.shape, y_test.shape)
        assert_equal(len(y_pred_proba_so), n_output)

        # Multioutput prediction
        knn_mo = neighbors.KNeighborsClassifier(weights=weights,
                                                algorithm=algorithm)
        knn_mo.fit(X_train, y_train)
        y_pred_mo = knn_mo.predict(X_test)

        assert_equal(y_pred_mo.shape, y_test.shape)
        assert_array_almost_equal(y_pred_mo, y_pred_so)

        # Check proba
        y_pred_proba_mo = knn_mo.predict_proba(X_test)
        assert_equal(len(y_pred_proba_mo), n_output)

        for proba_mo, proba_so in zip(y_pred_proba_mo, y_pred_proba_so):
            assert_array_almost_equal(proba_mo, proba_so)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_distribution():
    rng = check_random_state(12321)

    # Single variable with 4 values
    X = rng.randint(0, 4, size=(1000, 1))
    y = rng.rand(1000)
    n_trees = 500

    clf = ExtraTreesRegressor(n_estimators=n_trees, random_state=42).fit(X, y)

    uniques = defaultdict(int)
    for tree in clf.estimators_:
        tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-")
                       for f, t in zip(tree.tree_.feature,
                                       tree.tree_.threshold))

        uniques[tree] += 1

    uniques = sorted([(1. * count / n_trees, tree)
                      for tree, count in uniques.items()])

    # On a single variable problem where X_0 has 4 equiprobable values, there
    # are 5 ways to build a random tree. The more compact (0,1/0,0/--0,2/--) of
    # them has probability 1/3 while the 4 others have probability 1/6.

    assert_equal(len(uniques), 5)
    assert_greater(0.20, uniques[0][0])  # Rough approximation of 1/6.
    assert_greater(0.20, uniques[1][0])
    assert_greater(0.20, uniques[2][0])
    assert_greater(0.20, uniques[3][0])
    assert_greater(uniques[4][0], 0.3)
    assert_equal(uniques[4][1], "0,1/0,0/--0,2/--")

    # Two variables, one with 2 values, one with 3 values
    X = np.empty((1000, 2))
    X[:, 0] = np.random.randint(0, 2, 1000)
    X[:, 1] = np.random.randint(0, 3, 1000)
    y = rng.rand(1000)

    clf = ExtraTreesRegressor(n_estimators=100, max_features=1,
                              random_state=1).fit(X, y)

    uniques = defaultdict(int)
    for tree in clf.estimators_:
        tree = "".join(("%d,%d/" % (f, int(t)) if f >= 0 else "-")
                       for f, t in zip(tree.tree_.feature,
                                       tree.tree_.threshold))

        uniques[tree] += 1

    uniques = [(count, tree) for tree, count in uniques.items()]
    assert_equal(len(uniques), 8)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def make_prediction(dataset=None, binary=False):
    """Make some classification predictions on a toy dataset using a SVC

    If binary is True restrict to a binary classification problem instead of a
    multiclass classification problem
    """

    if dataset is None:
        # import some data to play with
        dataset = datasets.load_iris()

    X = dataset.data
    y = dataset.target

    if binary:
        # restrict to a binary classification task
        X, y = X[y < 2], y[y < 2]

    n_samples, n_features = X.shape
    p = np.arange(n_samples)

    rng = check_random_state(37)
    rng.shuffle(p)
    X, y = X[p], y[p]
    half = int(n_samples / 2)

    # add noisy features to make the problem harder and avoid perfect results
    rng = np.random.RandomState(0)
    X = np.c_[X, rng.randn(n_samples, 200 * n_features)]

    # run classifier, get class probabilities and label predictions
    clf = svm.SVC(kernel='linear', probability=True, random_state=0)
    probas_pred = clf.fit(X[:half], y[:half]).predict_proba(X[half:])

    if binary:
        # only interested in probabilities of the positive case
        # XXX: do we really want a special API for the binary case?
        probas_pred = probas_pred[:, 1]

    y_pred = clf.predict(X[half:])
    y_true = y[half:]
    return y_true, y_pred, probas_pred


###############################################################################
# Tests
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_sample_weight_invariance(n_samples=50):
    random_state = check_random_state(0)

    # binary
    random_state = check_random_state(0)
    y_true = random_state.randint(0, 2, size=(n_samples, ))
    y_pred = random_state.randint(0, 2, size=(n_samples, ))
    y_score = random_state.random_sample(size=(n_samples,))
    for name in ALL_METRICS:
        if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or
                name in METRIC_UNDEFINED_BINARY):
            continue
        metric = ALL_METRICS[name]
        if name in THRESHOLDED_METRICS:
            yield check_sample_weight_invariance, name, metric, y_true, y_score
        else:
            yield check_sample_weight_invariance, name, metric, y_true, y_pred

    # multiclass
    random_state = check_random_state(0)
    y_true = random_state.randint(0, 5, size=(n_samples, ))
    y_pred = random_state.randint(0, 5, size=(n_samples, ))
    y_score = random_state.random_sample(size=(n_samples, 5))
    for name in ALL_METRICS:
        if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or
                name in METRIC_UNDEFINED_BINARY_MULTICLASS):
            continue
        metric = ALL_METRICS[name]
        if name in THRESHOLDED_METRICS:
            yield check_sample_weight_invariance, name, metric, y_true, y_score
        else:
            yield check_sample_weight_invariance, name, metric, y_true, y_pred

    # multilabel indicator
    _, ya = make_multilabel_classification(n_features=1, n_classes=20,
                                           random_state=0, n_samples=100,
                                           allow_unlabeled=False)
    _, yb = make_multilabel_classification(n_features=1, n_classes=20,
                                           random_state=1, n_samples=100,
                                           allow_unlabeled=False)
    y_true = np.vstack([ya, yb])
    y_pred = np.vstack([ya, ya])
    y_score = random_state.randint(1, 4, size=y_true.shape)

    for name in (MULTILABELS_METRICS + THRESHOLDED_MULTILABEL_METRICS +
                 MULTIOUTPUT_METRICS):
        if name in METRICS_WITHOUT_SAMPLE_WEIGHT:
            continue

        metric = ALL_METRICS[name]
        if name in THRESHOLDED_METRICS:
            yield (check_sample_weight_invariance, name, metric, y_true,
                   y_score)
        else:
            yield (check_sample_weight_invariance, name, metric, y_true,
                   y_pred)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def make_prediction(dataset=None, binary=False):
    """Make some classification predictions on a toy dataset using a SVC

    If binary is True restrict to a binary classification problem instead of a
    multiclass classification problem
    """

    if dataset is None:
        # import some data to play with
        dataset = datasets.load_iris()

    X = dataset.data
    y = dataset.target

    if binary:
        # restrict to a binary classification task
        X, y = X[y < 2], y[y < 2]

    n_samples, n_features = X.shape
    p = np.arange(n_samples)

    rng = check_random_state(37)
    rng.shuffle(p)
    X, y = X[p], y[p]
    half = int(n_samples / 2)

    # add noisy features to make the problem harder and avoid perfect results
    rng = np.random.RandomState(0)
    X = np.c_[X, rng.randn(n_samples, 200 * n_features)]

    # run classifier, get class probabilities and label predictions
    clf = svm.SVC(kernel='linear', probability=True, random_state=0)
    probas_pred = clf.fit(X[:half], y[:half]).predict_proba(X[half:])

    if binary:
        # only interested in probabilities of the positive case
        # XXX: do we really want a special API for the binary case?
        probas_pred = probas_pred[:, 1]

    y_pred = clf.predict(X[half:])
    y_true = y[half:]
    return y_true, y_pred, probas_pred


###############################################################################
# Tests