我们从Python开源项目中,提取了以下21个代码示例,用于说明如何使用sklearn.utils.check_X_y()。
def fit(self, X, y): """Fit the model to the training data.""" X, y = check_X_y(X, y, force_all_finite=False, multi_output=self.multi_output) self.scorer_ = check_scoring(self.estimator, scoring=self.scoring) if RANK == 0: if self.experiments_folder is not None: assert_path(self.experiments_folder) self._fit_master(X, y) else: self._fit_slave(X, y) return self
def fit(self, X, y): """Fits the PushGPClassifier. Parameters ---------- X : {array-like, sparse matrix}, shape = (n_samples, n_features) Samples. y : {array-like, sparse matrix}, shape = (n_samples, 1) Target values. """ X, y = check_X_y(X, y) n_feats = X.shape[1] n_classes = len(np.unique(y)) self.output_types = ['_float'] * n_classes self.make_spawner(n_feats) return self.evolve(X, y)
def score(self, X, y, sample_weight=None): """Compute the pinball score for the given dataset. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training data. y : {array-like}, shape = [n_samples] Target values. Returns ------- l : {float} Average pinball score (the higher, the better). """ check_is_fitted(self, ['model_', 'linop_'], all_or_any=all) X, y = check_X_y(X, y) return 1 - Quantile.pinball_loss(y, self.predict(X), self.probs).mean()
def _set_dataset(self, X, y, normalize): if normalize: self._sc_X = StandardScaler() X = self._sc_X.fit_transform(X) self.normalize_ = normalize y = self._validate_targets(y) X, y = check_X_y(X, y, dtype=np.float64, order='C', accept_sparse='csr') self.n_features_ = X.shape[1] self.mask_ = [] self.fitnesses_ = [] self.toolbox.register("evaluate", self._evaluate, X=X, y=y) return X,y
def fit(self, X, y): """Fit the model to the training data.""" X, y = check_X_y(X, y, force_all_finite=False, multi_output=self.multi_output) _check_param_grid(self.param_grid) # cv = _check_cv(self.cv, X, y, classifier=is_classifier(self.estimator)) cv = _check_cv(self.cv, y, classifier=is_classifier(self.estimator)) self.scorer_ = check_scoring(self.estimator, scoring=self.scoring) if comm_rank == 0: self._fit_master(X, y, cv) else: self._fit_slave() return self
def fit(self, X, y): """Fits the PushGPRegressor. Parameters ---------- X : {array-like, sparse matrix}, shape = (n_samples, n_features) Samples. y : {array-like, sparse matrix}, shape = (n_samples, 1) Target values. """ X, y = check_X_y(X, y) n_feats = X.shape[1] self.make_spawner(n_feats) return self.evolve(X, y)
def fit(self, X, y, **kwargs): X, y = check_X_y(X, y, dtype=np.float64) data_size, n_features = X.shape self._n_features = n_features self._tree_builder = self._tree_builder_class( problem=ProblemType.CLASSIFICATION, **self._tree_builder_kwargs ) self.tree_ = self._tree_builder.build_tree(X, y) return self
def fit(self, X, y, **kwargs): X, y = check_X_y(X, y, dtype=np.float64) data_size, n_features = X.shape self._n_features = n_features self._tree_builder = self._tree_builder_class( problem=ProblemType.REGRESSION, **self._tree_builder_kwargs ) self._tree = self._tree_builder.build_tree(X, y) return self
def fit(self, X, y): """Fit ORFF ridge regression model. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training data. y : {array-like}, shape = [n_samples] or [n_samples, n_targets] Target values. Returns ------- self : returns an instance of self. """ X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], y_numeric=True, multi_output=True) self._validate_params() self.p = y.shape[1] if y.ndim > 1 else 1 solver_params = self.solver_params or {} self.linop_ = self._get_kernel(X, y) self.phix_ = self.linop_.get_orff_map(X, self.D) risk = ORFFRidgeRisk(self.lbda, self.loss) self.solver_res_ = minimize(risk.functional_grad_val, zeros(self.phix_.shape[1], dtype=X.dtype), args=(y.ravel(), self.phix_, self.linop_), method=self.solver, jac=True, options=solver_params) self.coefs_ = self.solver_res_.x return self
def fit(self, X, y): """Fit ONORMA model. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training data. y : {array-like}, shape = [n_samples] or [n_samples, n_targets] Target values. Returns ------- self : returns an instance of self. """ X, y = check_X_y(X, y, None, y_numeric=True, multi_output=True) self._validate_params() self.T_ = X.shape[0] if self.T is None else self.T self.t_ = 0 if y.ndim > 1: self.coefs_ = zeros(self.T_ * y.shape[1]) for i in range(self.T_): idx = i % X.shape[0] self.partial_fit(X[idx, :], y[idx, :]) else: self.coefs_ = zeros(self.T_) for i in range(self.T_): idx = i % X.shape[0] self.partial_fit(X[idx, :], y[idx]) return self
def fit(self, X, y): """Fit joint quantile regression model. Parameters ---------- inputs : {array-like, sparse matrix}, shape = [n_samples, n_features] Training data. targets : {array-like}, shape = [n_samples] Target values. Returns ------- self : returns an instance of self. """ X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], y_numeric=True) self._validate_params() self.linop_ = self._get_kernel_map(X) gram = self.linop_.Gram_dense(X) self.reg_c_ = 1. / self.lbda # Solve the optimization problem probs = asarray(self.probs).reshape((-1, 1)) if self.nc_const: self._qp_nc(gram, y, probs) else: self._qp(gram, y, probs) return self
def fit(self, X, y): """A reference implementation of a fitting function for a classifier. Parameters ---------- X : array_like standardized data matrix y : array_like label vector Raises ------ ValueError Only binary classification. """ # Check that X and y have correct shape X, y = check_X_y(X, y) # Store the classes seen during fit self.classes_ = unique_labels(y) if len(self.classes_) > 2: raise ValueError("Only binary class data supported") # Negative class is set to -1 for decision surface y = preprocessing.LabelEncoder().fit_transform(y) y[y == 0] = -1 super().fit(X, y)
def fit(self, X, y): """ Fit model to data and provide feature relevance intervals Parameters ---------- X : array_like standardized data matrix y : array_like response vector """ # Check that X and y have correct shape X, y = check_X_y(X, y) super().fit(X, y)
def fit(self, X, y, **fit_params): """Determine which are the best cut points for each column in X based on y.""" X, y = check_X_y(X, y, y_numeric=True) self.cut_points_ = [mdlp_cut(x, y, []) for x in X.T] return self
def _check_params(self, X, y): """ Check hyperparameters as well as X and y before proceeding with fit. """ # check X and y are consistent len, X is Array and y is column X, y = check_X_y(X, y) if self.perc <= 0 or self.perc > 100: raise ValueError('The percentile should be between 0 and 100.') if self.alpha <= 0 or self.alpha > 1: raise ValueError('Alpha should be between 0 and 1.')
def _check_params(self, X, y): # checking input data and scaling it if y is continuous X, y = check_X_y(X, y) if not self.categorical: ss = StandardScaler() X = ss.fit_transform(X) y = ss.fit_transform(y) # sanity checks methods = ['JMI', 'JMIM', 'MRMR'] if self.method not in methods: raise ValueError('Please choose one of the following methods:\n' + '\n'.join(methods)) if not isinstance(self.k, int): raise ValueError("k must be an integer.") if self.k < 1: raise ValueError('k must be larger than 0.') if self.categorical and np.any(self.k > np.bincount(y)): raise ValueError('k must be smaller than your smallest class.') if not isinstance(self.categorical, bool): raise ValueError('Categorical must be Boolean.') if self.categorical and np.unique(y).shape[0] > 5: print 'Are you sure y is categorical? It has more than 5 levels.' if not self.categorical and self._isinteger(y): print 'Are you sure y is continuous? It seems to be discrete.' if self._isinteger(X): print ('The values of X seem to be discrete. MI_FS will treat them' 'as continuous.') return X, y
def fit(self, X, y, tol=None): """Fit the model according to the given training data and parameters. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples is the number of samples and n_features is the number of features. y : array, shape = [n_samples] Target values (integers) """ # X, y = check_X_y(X, y) if type_of_target(y) not in ['binary', 'multiclass']: raise ValueError("Unknown label type: %r" % type_of_target(y)) self.classes_, y = np.unique(y, return_inverse=True) n_samples, n_features = X.shape n_classes = len(self.classes_) if n_classes < 2: raise ValueError('y has less than 2 classes') self.startprob_ = (bincount(y)+1.0) / (len(y)+n_classes) transmat = np.zeros((n_classes,n_classes)) for i in xrange(len(y)-1): transmat[y[i],y[i+1]] = transmat[y[i],y[i+1]] + 1 transmat = (transmat.transpose() / np.sum(transmat,1)).transpose() self.transmat_ = transmat pseudo_rows = np.tile(self.pseudo_rssi_list,(X.shape[1],1)).transpose() means = [] covars = [] miss_probs = [] for cl in xrange(n_classes): X_cl = np.concatenate((X[y == cl, :],pseudo_rows),0) miss_probs_cl = np.mean(np.isnan(X_cl),0) mean_cl = np.nanmean(X_cl,0) covar_cl = np.diag(np.nanvar(X_cl,0,ddof=1)) miss_probs.append(miss_probs_cl) means.append(mean_cl) covars.append(covar_cl) self.miss_probs_ = np.asarray(miss_probs) self.means_ = np.asarray(means) self.covars_ = np.asarray(covars) return self
def fit(self, X, y, sample_weight=None, check_input=True, X_idx_sorted=None): """ Build a decision tree classifier from the training set (X, y). Parameters ---------- X : array-like or sparse matrix, shape = [n_samples, n_features] The training input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csc_matrix``. y : array-like, shape = [n_samples] or [n_samples, n_outputs] The target values (class labels) as integers or strings. sample_weight : array-like, shape = [n_samples] or None Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. Splits are also ignored if they would result in any single class carrying a negative weight in either child node. check_input : boolean, (default=True) Allow to bypass several input checking. Don't use this parameter unless you know what you do. X_idx_sorted : array-like, shape = [n_samples, n_features], optional The indexes of the sorted training input samples. If many tree are grown on the same dataset, this allows the ordering to be cached between trees. If None, the data will be sorted here. Don't use this parameter unless you know what to do. Returns ------- self : object Returns self. """ # y passed from a forest is 2-D. This is to silence the # annoying data-conversion warnings. y = np.asarray(y) if np.ndim(y) == 2 and y.shape[1] == 1: y = np.ravel(y) # apply method requires X to be of dtype np.float32 X, y = check_X_y( X, y, accept_sparse="csc", dtype=np.float32, multi_output=False) super(BaseTreeQuantileRegressor, self).fit( X, y, sample_weight=sample_weight, check_input=check_input, X_idx_sorted=X_idx_sorted) self.y_train_ = y # Stores the leaf nodes that the samples lie in. self.y_train_leaves_ = self.tree_.apply(X) return self
def fit(self, X, y): """ Build a forest from the training set (X, y). Parameters ---------- X : array-like or sparse matrix, shape = [n_samples, n_features] The training input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csc_matrix``. y : array-like, shape = [n_samples] or [n_samples, n_outputs] The target values (class labels) as integers or strings. sample_weight : array-like, shape = [n_samples] or None Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. Splits are also ignored if they would result in any single class carrying a negative weight in either child node. check_input : boolean, (default=True) Allow to bypass several input checking. Don't use this parameter unless you know what you do. X_idx_sorted : array-like, shape = [n_samples, n_features], optional The indexes of the sorted training input samples. If many tree are grown on the same dataset, this allows the ordering to be cached between trees. If None, the data will be sorted here. Don't use this parameter unless you know what to do. Returns ------- self : object Returns self. """ # apply method requires X to be of dtype np.float32 X, y = check_X_y( X, y, accept_sparse="csc", dtype=np.float32, multi_output=False) super(BaseForestQuantileRegressor, self).fit(X, y) self.y_train_ = y self.y_train_leaves_ = -np.ones((self.n_estimators, len(y)), dtype=np.int32) self.y_weights_ = np.zeros_like((self.y_train_leaves_), dtype=np.float32) for i, est in enumerate(self.estimators_): if self.bootstrap: bootstrap_indices = generate_sample_indices( est.random_state, len(y)) else: bootstrap_indices = np.arange(len(y)) est_weights = np.bincount(bootstrap_indices, minlength=len(y)) y_train_leaves = est.y_train_leaves_ for curr_leaf in np.unique(y_train_leaves): y_ind = y_train_leaves == curr_leaf self.y_weights_[i, y_ind] = ( est_weights[y_ind] / np.sum(est_weights[y_ind])) self.y_train_leaves_[i, bootstrap_indices] = y_train_leaves[bootstrap_indices] return self
def test_check_array_min_samples_and_features_messages(): # empty list is considered 2D by default: msg = "0 feature(s) (shape=(1, 0)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_array, [[]]) # If considered a 1D collection when ensure_2d=False, then the minimum # number of samples will break: msg = "0 sample(s) (shape=(0,)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_array, [], ensure_2d=False) # Invalid edge case when checking the default minimum sample of a scalar msg = "Singleton array array(42) cannot be considered a valid collection." assert_raise_message(TypeError, msg, check_array, 42, ensure_2d=False) # But this works if the input data is forced to look like a 2 array with # one sample and one feature: X_checked = assert_warns(DeprecationWarning, check_array, [42], ensure_2d=True) assert_array_equal(np.array([[42]]), X_checked) # Simulate a model that would need at least 2 samples to be well defined X = np.ones((1, 10)) y = np.ones(1) msg = "1 sample(s) (shape=(1, 10)) while a minimum of 2 is required." assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_samples=2) # The same message is raised if the data has 2 dimensions even if this is # not mandatory assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_samples=2, ensure_2d=False) # Simulate a model that would require at least 3 features (e.g. SelectKBest # with k=3) X = np.ones((10, 2)) y = np.ones(2) msg = "2 feature(s) (shape=(10, 2)) while a minimum of 3 is required." assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_features=3) # Only the feature check is enabled whenever the number of dimensions is 2 # even if allow_nd is enabled: assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_features=3, allow_nd=True) # Simulate a case where a pipeline stage as trimmed all the features of a # 2D dataset. X = np.empty(0).reshape(10, 0) y = np.ones(10) msg = "0 feature(s) (shape=(10, 0)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_X_y, X, y) # nd-data is not checked for any minimum number of features by default: X = np.ones((10, 0, 28, 28)) y = np.ones(10) X_checked, y_checked = check_X_y(X, y, allow_nd=True) assert_array_equal(X, X_checked) assert_array_equal(y, y_checked)