Python sklearn.utils 模块,check_X_y() 实例源码

我们从Python开源项目中,提取了以下21个代码示例,用于说明如何使用sklearn.utils.check_X_y()

项目:palladio    作者:slipguru    | 项目源码 | 文件源码
def fit(self, X, y):
        """Fit the model to the training data."""
        X, y = check_X_y(X, y, force_all_finite=False,
                         multi_output=self.multi_output)

        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

        if RANK == 0:
            if self.experiments_folder is not None:
                assert_path(self.experiments_folder)

            self._fit_master(X, y)
        else:
            self._fit_slave(X, y)

        return self
项目:pyshgp    作者:erp12    | 项目源码 | 文件源码
def fit(self, X, y):
        """Fits the PushGPClassifier.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = (n_samples, n_features)
            Samples.

        y : {array-like, sparse matrix}, shape = (n_samples, 1)
            Target values.
        """
        X, y = check_X_y(X, y)
        n_feats = X.shape[1]
        n_classes = len(np.unique(y))
        self.output_types = ['_float'] * n_classes
        self.make_spawner(n_feats)
        return self.evolve(X, y)
项目:operalib    作者:operalib    | 项目源码 | 文件源码
def score(self, X, y, sample_weight=None):
        """Compute the pinball score for the given dataset.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training data.
        y : {array-like}, shape = [n_samples]
            Target values.

        Returns
        -------
        l : {float}
            Average pinball score (the higher, the better).
        """
        check_is_fitted(self, ['model_', 'linop_'], all_or_any=all)
        X, y = check_X_y(X, y)
        return 1 - Quantile.pinball_loss(y, self.predict(X), self.probs).mean()
项目:MetaHeuristic    作者:gonzalesMK    | 项目源码 | 文件源码
def _set_dataset(self, X, y, normalize):
        if normalize:
            self._sc_X = StandardScaler()
            X = self._sc_X.fit_transform(X)
        self.normalize_ = normalize

        y = self._validate_targets(y)
        X, y = check_X_y(X, y, dtype=np.float64, order='C', accept_sparse='csr')

        self.n_features_ = X.shape[1]
        self.mask_ = []
        self.fitnesses_ = []

        self.toolbox.register("evaluate", self._evaluate, X=X, y=y)

        return X,y
项目:MetaHeuristic    作者:gonzalesMK    | 项目源码 | 文件源码
def _set_dataset(self, X, y, normalize):
        if normalize:
            self._sc_X = StandardScaler()
            X = self._sc_X.fit_transform(X)
        self.normalize_ = normalize

        y = self._validate_targets(y)
        X, y = check_X_y(X, y, dtype=np.float64, order='C', accept_sparse='csr')

        self.n_features_ = X.shape[1]
        self.mask_ = []
        self.fitnesses_ = []

        self.toolbox.register("evaluate", self._evaluate, X=X, y=y)

        return X,y
项目:palladio    作者:slipguru    | 项目源码 | 文件源码
def fit(self, X, y):
        """Fit the model to the training data."""
        X, y = check_X_y(X, y, force_all_finite=False,
                         multi_output=self.multi_output)
        _check_param_grid(self.param_grid)

        # cv = _check_cv(self.cv, X, y, classifier=is_classifier(self.estimator))
        cv = _check_cv(self.cv, y, classifier=is_classifier(self.estimator))

        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

        if comm_rank == 0:
            self._fit_master(X, y, cv)
        else:
            self._fit_slave()

        return self
项目:pyshgp    作者:erp12    | 项目源码 | 文件源码
def fit(self, X, y):
        """Fits the PushGPRegressor.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = (n_samples, n_features)
            Samples.

        y : {array-like, sparse matrix}, shape = (n_samples, 1)
            Target values.
        """
        X, y = check_X_y(X, y)
        n_feats = X.shape[1]
        self.make_spawner(n_feats)
        return self.evolve(X, y)
项目:pines    作者:dmitru    | 项目源码 | 文件源码
def fit(self, X, y, **kwargs):
        X, y = check_X_y(X, y, dtype=np.float64)

        data_size, n_features = X.shape
        self._n_features = n_features

        self._tree_builder = self._tree_builder_class(
            problem=ProblemType.CLASSIFICATION,
            **self._tree_builder_kwargs
        )
        self.tree_ = self._tree_builder.build_tree(X, y)
        return self
项目:pines    作者:dmitru    | 项目源码 | 文件源码
def fit(self, X, y, **kwargs):
        X, y = check_X_y(X, y, dtype=np.float64)
        data_size, n_features = X.shape
        self._n_features = n_features

        self._tree_builder = self._tree_builder_class(
            problem=ProblemType.REGRESSION,
            **self._tree_builder_kwargs
        )
        self._tree = self._tree_builder.build_tree(X, y)
        return self
项目:operalib    作者:operalib    | 项目源码 | 文件源码
def fit(self, X, y):
        """Fit ORFF ridge regression model.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training data.

        y : {array-like}, shape = [n_samples] or [n_samples, n_targets]
            Target values.

        Returns
        -------
        self : returns an instance of self.
        """
        X, y = check_X_y(X, y, ['csr', 'csc', 'coo'],
                         y_numeric=True, multi_output=True)
        self._validate_params()
        self.p = y.shape[1] if y.ndim > 1 else 1

        solver_params = self.solver_params or {}

        self.linop_ = self._get_kernel(X, y)
        self.phix_ = self.linop_.get_orff_map(X, self.D)
        risk = ORFFRidgeRisk(self.lbda, self.loss)
        self.solver_res_ = minimize(risk.functional_grad_val,
                                    zeros(self.phix_.shape[1],
                                          dtype=X.dtype),
                                    args=(y.ravel(), self.phix_, self.linop_),
                                    method=self.solver,
                                    jac=True, options=solver_params)
        self.coefs_ = self.solver_res_.x
        return self
项目:operalib    作者:operalib    | 项目源码 | 文件源码
def fit(self, X, y):
        """Fit ONORMA model.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training data.

        y : {array-like}, shape = [n_samples] or [n_samples, n_targets]
            Target values.

        Returns
        -------
        self : returns an instance of self.
        """
        X, y = check_X_y(X, y, None, y_numeric=True, multi_output=True)
        self._validate_params()
        self.T_ = X.shape[0] if self.T is None else self.T

        self.t_ = 0
        if y.ndim > 1:
            self.coefs_ = zeros(self.T_ * y.shape[1])
            for i in range(self.T_):
                idx = i % X.shape[0]
                self.partial_fit(X[idx, :], y[idx, :])
        else:
            self.coefs_ = zeros(self.T_)
            for i in range(self.T_):
                idx = i % X.shape[0]
                self.partial_fit(X[idx, :], y[idx])
        return self
项目:operalib    作者:operalib    | 项目源码 | 文件源码
def fit(self, X, y):
        """Fit joint quantile regression model.

        Parameters
        ----------
        inputs : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training data.
        targets : {array-like}, shape = [n_samples]
            Target values.

        Returns
        -------
        self : returns an instance of self.
        """
        X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], y_numeric=True)
        self._validate_params()

        self.linop_ = self._get_kernel_map(X)
        gram = self.linop_.Gram_dense(X)
        self.reg_c_ = 1. / self.lbda

        # Solve the optimization problem
        probs = asarray(self.probs).reshape((-1, 1))
        if self.nc_const:
            self._qp_nc(gram, y, probs)
        else:
            self._qp(gram, y, probs)
        return self
项目:fri    作者:lpfann    | 项目源码 | 文件源码
def fit(self, X, y):
        """A reference implementation of a fitting function for a classifier.
        Parameters
        ----------
        X : array_like
            standardized data matrix
        y : array_like
            label vector
        Raises
        ------
        ValueError
            Only binary classification.
        """

        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)

        if len(self.classes_) > 2:
            raise ValueError("Only binary class data supported")
        # Negative class is set to -1 for decision surface
        y = preprocessing.LabelEncoder().fit_transform(y)
        y[y == 0] = -1

        super().fit(X, y)
项目:fri    作者:lpfann    | 项目源码 | 文件源码
def fit(self, X, y):
        """ Fit model to data and provide feature relevance intervals
        Parameters
        ----------
        X : array_like
            standardized data matrix
        y : array_like
            response vector
        """

        # Check that X and y have correct shape
        X, y = check_X_y(X, y)

        super().fit(X, y)
项目:xam    作者:MaxHalford    | 项目源码 | 文件源码
def fit(self, X, y, **fit_params):
        """Determine which are the best cut points for each column in X based on y."""

        X, y = check_X_y(X, y, y_numeric=True)

        self.cut_points_ = [mdlp_cut(x, y, []) for x in X.T]
        return self
项目:boruta_py    作者:scikit-learn-contrib    | 项目源码 | 文件源码
def _check_params(self, X, y):
        """
        Check hyperparameters as well as X and y before proceeding with fit.
        """
        # check X and y are consistent len, X is Array and y is column
        X, y = check_X_y(X, y)
        if self.perc <= 0 or self.perc > 100:
            raise ValueError('The percentile should be between 0 and 100.')

        if self.alpha <= 0 or self.alpha > 1:
            raise ValueError('Alpha should be between 0 and 1.')
项目:mifs    作者:danielhomola    | 项目源码 | 文件源码
def _check_params(self, X, y):
        # checking input data and scaling it if y is continuous
        X, y = check_X_y(X, y)

        if not self.categorical:
            ss = StandardScaler()
            X = ss.fit_transform(X)
            y = ss.fit_transform(y)

        # sanity checks
        methods = ['JMI', 'JMIM', 'MRMR']
        if self.method not in methods:
            raise ValueError('Please choose one of the following methods:\n' +
                             '\n'.join(methods))

        if not isinstance(self.k, int):
            raise ValueError("k must be an integer.")
        if self.k < 1:
            raise ValueError('k must be larger than 0.')
        if self.categorical and np.any(self.k > np.bincount(y)):
            raise ValueError('k must be smaller than your smallest class.')

        if not isinstance(self.categorical, bool):
            raise ValueError('Categorical must be Boolean.')
        if self.categorical and np.unique(y).shape[0] > 5:
            print 'Are you sure y is categorical? It has more than 5 levels.'
        if not self.categorical and self._isinteger(y):
            print 'Are you sure y is continuous? It seems to be discrete.'
        if self._isinteger(X):
            print ('The values of X seem to be discrete. MI_FS will treat them'
                   'as continuous.')
        return X, y
项目:SPHERE-HyperStream    作者:IRC-SPHERE    | 项目源码 | 文件源码
def fit(self, X, y, tol=None):
        """Fit the model according to the given training data and parameters.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y : array, shape = [n_samples]
            Target values (integers)
        """

        # X, y = check_X_y(X, y)
        if type_of_target(y) not in ['binary', 'multiclass']:
            raise ValueError("Unknown label type: %r" % type_of_target(y))
        self.classes_, y = np.unique(y, return_inverse=True)
        n_samples, n_features = X.shape
        n_classes = len(self.classes_)
        if n_classes < 2:
            raise ValueError('y has less than 2 classes')
        self.startprob_ = (bincount(y)+1.0) / (len(y)+n_classes)
        transmat = np.zeros((n_classes,n_classes))
        for i in xrange(len(y)-1):
            transmat[y[i],y[i+1]] = transmat[y[i],y[i+1]] + 1
        transmat = (transmat.transpose() / np.sum(transmat,1)).transpose()
        self.transmat_ = transmat
        pseudo_rows = np.tile(self.pseudo_rssi_list,(X.shape[1],1)).transpose()
        means = []
        covars = []
        miss_probs = []
        for cl in xrange(n_classes):
            X_cl = np.concatenate((X[y == cl, :],pseudo_rows),0)
            miss_probs_cl = np.mean(np.isnan(X_cl),0)
            mean_cl = np.nanmean(X_cl,0)
            covar_cl = np.diag(np.nanvar(X_cl,0,ddof=1))
            miss_probs.append(miss_probs_cl)
            means.append(mean_cl)
            covars.append(covar_cl)
        self.miss_probs_ = np.asarray(miss_probs)
        self.means_ = np.asarray(means)
        self.covars_ = np.asarray(covars)
        return self
项目:scikit-garden    作者:scikit-garden    | 项目源码 | 文件源码
def fit(self, X, y, sample_weight=None, check_input=True,
            X_idx_sorted=None):
        """
        Build a decision tree classifier from the training set (X, y).

        Parameters
        ----------
        X : array-like or sparse matrix, shape = [n_samples, n_features]
            The training input samples. Internally, it will be converted to
            ``dtype=np.float32`` and if a sparse matrix is provided
            to a sparse ``csc_matrix``.

        y : array-like, shape = [n_samples] or [n_samples, n_outputs]
            The target values (class labels) as integers or strings.

        sample_weight : array-like, shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted. Splits
            that would create child nodes with net zero or negative weight are
            ignored while searching for a split in each node. Splits are also
            ignored if they would result in any single class carrying a
            negative weight in either child node.

        check_input : boolean, (default=True)
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.

        X_idx_sorted : array-like, shape = [n_samples, n_features], optional
            The indexes of the sorted training input samples. If many tree
            are grown on the same dataset, this allows the ordering to be
            cached between trees. If None, the data will be sorted here.
            Don't use this parameter unless you know what to do.

        Returns
        -------
        self : object
            Returns self.
        """
        # y passed from a forest is 2-D. This is to silence the
        # annoying data-conversion warnings.
        y = np.asarray(y)
        if np.ndim(y) == 2 and y.shape[1] == 1:
            y = np.ravel(y)

        # apply method requires X to be of dtype np.float32
        X, y = check_X_y(
            X, y, accept_sparse="csc", dtype=np.float32, multi_output=False)
        super(BaseTreeQuantileRegressor, self).fit(
            X, y, sample_weight=sample_weight, check_input=check_input,
            X_idx_sorted=X_idx_sorted)
        self.y_train_ = y

        # Stores the leaf nodes that the samples lie in.
        self.y_train_leaves_ = self.tree_.apply(X)
        return self
项目:scikit-garden    作者:scikit-garden    | 项目源码 | 文件源码
def fit(self, X, y):
        """
        Build a forest from the training set (X, y).

        Parameters
        ----------
        X : array-like or sparse matrix, shape = [n_samples, n_features]
            The training input samples. Internally, it will be converted to
            ``dtype=np.float32`` and if a sparse matrix is provided
            to a sparse ``csc_matrix``.

        y : array-like, shape = [n_samples] or [n_samples, n_outputs]
            The target values (class labels) as integers or strings.

        sample_weight : array-like, shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted. Splits
            that would create child nodes with net zero or negative weight are
            ignored while searching for a split in each node. Splits are also
            ignored if they would result in any single class carrying a
            negative weight in either child node.

        check_input : boolean, (default=True)
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.

        X_idx_sorted : array-like, shape = [n_samples, n_features], optional
            The indexes of the sorted training input samples. If many tree
            are grown on the same dataset, this allows the ordering to be
            cached between trees. If None, the data will be sorted here.
            Don't use this parameter unless you know what to do.

        Returns
        -------
        self : object
            Returns self.
        """
        # apply method requires X to be of dtype np.float32
        X, y = check_X_y(
            X, y, accept_sparse="csc", dtype=np.float32, multi_output=False)
        super(BaseForestQuantileRegressor, self).fit(X, y)

        self.y_train_ = y
        self.y_train_leaves_ = -np.ones((self.n_estimators, len(y)), dtype=np.int32)
        self.y_weights_ = np.zeros_like((self.y_train_leaves_), dtype=np.float32)

        for i, est in enumerate(self.estimators_):
            if self.bootstrap:
                bootstrap_indices = generate_sample_indices(
                    est.random_state, len(y))
            else:
                bootstrap_indices = np.arange(len(y))

            est_weights = np.bincount(bootstrap_indices, minlength=len(y))
            y_train_leaves = est.y_train_leaves_
            for curr_leaf in np.unique(y_train_leaves):
                y_ind = y_train_leaves == curr_leaf
                self.y_weights_[i, y_ind] = (
                    est_weights[y_ind] / np.sum(est_weights[y_ind]))

            self.y_train_leaves_[i, bootstrap_indices] = y_train_leaves[bootstrap_indices]
        return self
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_check_array_min_samples_and_features_messages():
    # empty list is considered 2D by default:
    msg = "0 feature(s) (shape=(1, 0)) while a minimum of 1 is required."
    assert_raise_message(ValueError, msg, check_array, [[]])

    # If considered a 1D collection when ensure_2d=False, then the minimum
    # number of samples will break:
    msg = "0 sample(s) (shape=(0,)) while a minimum of 1 is required."
    assert_raise_message(ValueError, msg, check_array, [], ensure_2d=False)

    # Invalid edge case when checking the default minimum sample of a scalar
    msg = "Singleton array array(42) cannot be considered a valid collection."
    assert_raise_message(TypeError, msg, check_array, 42, ensure_2d=False)

    # But this works if the input data is forced to look like a 2 array with
    # one sample and one feature:
    X_checked = assert_warns(DeprecationWarning, check_array, [42],
                             ensure_2d=True)
    assert_array_equal(np.array([[42]]), X_checked)

    # Simulate a model that would need at least 2 samples to be well defined
    X = np.ones((1, 10))
    y = np.ones(1)
    msg = "1 sample(s) (shape=(1, 10)) while a minimum of 2 is required."
    assert_raise_message(ValueError, msg, check_X_y, X, y,
                         ensure_min_samples=2)

    # The same message is raised if the data has 2 dimensions even if this is
    # not mandatory
    assert_raise_message(ValueError, msg, check_X_y, X, y,
                         ensure_min_samples=2, ensure_2d=False)

    # Simulate a model that would require at least 3 features (e.g. SelectKBest
    # with k=3)
    X = np.ones((10, 2))
    y = np.ones(2)
    msg = "2 feature(s) (shape=(10, 2)) while a minimum of 3 is required."
    assert_raise_message(ValueError, msg, check_X_y, X, y,
                         ensure_min_features=3)

    # Only the feature check is enabled whenever the number of dimensions is 2
    # even if allow_nd is enabled:
    assert_raise_message(ValueError, msg, check_X_y, X, y,
                         ensure_min_features=3, allow_nd=True)

    # Simulate a case where a pipeline stage as trimmed all the features of a
    # 2D dataset.
    X = np.empty(0).reshape(10, 0)
    y = np.ones(10)
    msg = "0 feature(s) (shape=(10, 0)) while a minimum of 1 is required."
    assert_raise_message(ValueError, msg, check_X_y, X, y)

    # nd-data is not checked for any minimum number of features by default:
    X = np.ones((10, 0, 28, 28))
    y = np.ones(10)
    X_checked, y_checked = check_X_y(X, y, allow_nd=True)
    assert_array_equal(X, X_checked)
    assert_array_equal(y, y_checked)