我们从Python开源项目中,提取了以下22个代码示例,用于说明如何使用sklearn.utils.validation.check_X_y()。
def fit(self, X, y): """A reference implementation of a fitting function Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] or [n_samples, n_outputs] The target values (class labels in classification, real numbers in regression). Returns ------- self : object Returns self. """ X, y = check_X_y(X, y) # Return the estimator return self
def fit(self, X, y): """A reference implementation of a fitting function for a classifier. Parameters ---------- X : array-like, shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values. An array of int. Returns ------- self : object Returns self. """ # Check that X and y have correct shape X, y = check_X_y(X, y) # Store the classes seen during fit self.classes_ = unique_labels(y) self.X_ = X self.y_ = y # Return the classifier return self
def _check_X_y(self, X, y): # helpful error message for sklearn < 1.17 is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2 if is_2d or type_of_target(y) != 'binary': raise TypeError("Only binary targets supported. For training " "multiclass or multilabel models, you may use the " "OneVsRest or OneVsAll metaestimators in " "scikit-learn.") X, Y = check_X_y(X, y, dtype=np.double, accept_sparse='csc', multi_output=False) self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1) y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double) return X, y
def fit(self, x_, y, sample_weight=None): X, y = check_X_y(x_, y, accept_sparse=[], y_numeric=True, multi_output=False) x, y, X_offset, y_offset, X_scale = self._preprocess_data( x_, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=None) if sample_weight is not None: x, y = _rescale_data(x, y, sample_weight) self.iters = 0 self.ind_ = np.ones(x.shape[1], dtype=bool) # initial guess if self.threshold > 0: self._reduce(x, y) else: self.coef_ = self._regress(x[:, self.ind_], y, self.alpha) if self.unbias and self.alpha >= 0: self._unbias(x, y) self._set_intercept(X_offset, y_offset, X_scale) return self
def fit(self, x_, y, sample_weight=None): n_samples, n_features = x_.shape X, y = check_X_y(x_, y, accept_sparse=[], y_numeric=True, multi_output=False) x, y, X_offset, y_offset, X_scale = self._preprocess_data( x_, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=None) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. x, y = _rescale_data(x, y, sample_weight) coefs, intercept = fit_with_noise(x, y, self.sigma, self.alpha, self.n) self.intercept_ = intercept self.coef_ = coefs self._set_intercept(X_offset, y_offset, X_scale) return self
def fit(self, X, y=None, **fit_params): # scikit-learn checks X, y = check_X_y(X, y) n_terms = min(self.n_terms, X.shape[1]) # Get a list of unique labels from y labels = np.unique(y) # Determine the n top terms per class self.top_terms_per_class_ = { c: set(np.argpartition(np.sum(X[y == c], axis=0), -n_terms)[-n_terms:]) for c in labels } # Return the classifier return self
def fit(self, X, y): #import traceback from fabric.api import local X, y = check_X_y(X, y, allow_nd=True, multi_output=True, y_numeric=True, estimator="GridSearch") print "njobs = {}".format(self.njobs) if self.njobs > 1: assert False # iterable = [(i, pg, self.estimator_cls, self.kf, X, y, \ # self.score_fns, len(self.parameter_grid)) \ # for i,pg in enumerate(self.parameter_grid)] # try: # p = multiprocessing.Pool(self.njobs) # res = p.map(mp_grid_search, iterable) # print res # except: # traceback.print_exc() else: self.grid_scores = [] estimator = self.estimator_cls() num_tasks = len(self.parameter_grid) for i,params in enumerate(self.parameter_grid): print "Starting task {}/{}...".format(i+1, num_tasks) with stopwatch("Done. Elapsed time"): self.grid_scores.append(mp_grid_search((i, params, estimator, self.kf, X, y, self.score_fns, len(self.parameter_grid)))) if self.checkpoint_path is not None: local("rm -f {}*.p".format(self.checkpoint_path)) savepath = self.checkpoint_path + "_{}.p".format(i) with open(savepath, 'w') as f: pickle.dump(self.grid_scores, f)
def check_X_y(self, X, y): from sklearn.utils.validation import check_X_y if X.shape[0] > GPR.MAX_TRAIN_SIZE: raise Exception("X_train size cannot exceed {} ({})" .format(GPR.MAX_TRAIN_SIZE, X.shape[0])) return check_X_y(X, y, multi_output=True, allow_nd=True, y_numeric=True, estimator="GPR")
def fit(self, X_train, y_train, ridge=1.0): self._reset() X_train, y_train = self.check_X_y(X_train, y_train) self.X_train = np.float32(X_train) self.y_train = np.float32(y_train) sample_size = self.X_train.shape[0] if np.isscalar(ridge): ridge = np.ones(sample_size) * ridge assert ridge.ndim == 1 X_dists = np.zeros((sample_size, sample_size), dtype=np.float32) with tf.Session(graph=self.graph, config=tf.ConfigProto( intra_op_parallelism_threads=self.NUM_THREADS)) as sess: dist_op = self.ops['dist_op'] v1, v2 = self.vars['v1_h'], self.vars['v2_h'] for i in range(sample_size): X_dists[i] = sess.run(dist_op, feed_dict={v1:self.X_train[i], v2:self.X_train}) K_ridge_op = self.ops['K_ridge_op'] X_dists_ph = self.vars['X_dists_h'] ridge_ph = self.vars['ridge_h'] self.K = sess.run(K_ridge_op, feed_dict={X_dists_ph:X_dists, ridge_ph:ridge}) K_ph = self.vars['K_h'] K_inv_op = self.ops['K_inv_op'] self.K_inv = sess.run(K_inv_op, feed_dict={K_ph:self.K}) xy_op = self.ops['xy_op'] K_inv_ph = self.vars['K_inv_h'] yt_ph = self.vars['yt_h'] self.xy_ = sess.run(xy_op, feed_dict={K_inv_ph:self.K_inv, yt_ph:self.y_train}) return self
def fit(self, X, y): """ Fit on X. :param X: {array-like, sparse matrix}, shape (n_samples, n_features). Input data, where `n_samples` is the number of samples and `n_features` is the number of features. :return: Returns self """ # Numpy X = np.array(X) y = np.array(y) # Check that X and y have correct shape X, y = check_X_y(X, y) # Store the classes seen during fit self.classes_ = unique_labels(y) # Store so that we know what we fitted on self.X_ = X self.y_ = y # Get dimensions input_dim = X.shape[1] output_dim = len(self.classes_) # Create a model if needed if (input_dim, output_dim) != self.io: self.model = self._build(input_dim, output_dim) self.model.fit(X, y, batch_size=self.batch_size, epochs=self.epochs, verbose=self.verbose) # Return the classifier return self
def _check_X_y(self, X, y): X, y = check_X_y(X, y, accept_sparse='csc', multi_output=False, dtype=np.double, y_numeric=True) y = y.astype(np.double).ravel() return X, y
def fit(self, X, y): """Builds a forest of trees from the training set (X, y). Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The training input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csc_matrix``. y : array-like, shape = [n_samples] or [n_samples, n_outputs] The target values (class labels in classification, real numbers in regression). sample_weight : array-like, shape = [n_samples] or None Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. In the case of classification, splits are also ignored if they would result in any single class carrying a negative weight in either child node. Returns ------- self : object Returns self. """ X, y = check_X_y(X, y, dtype=np.float32, multi_output=False) return super(MondrianForestRegressor, self).fit(X, y)
def fit(self, X, y): """Builds a forest of trees from the training set (X, y). Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The training input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csc_matrix``. y : array-like, shape = [n_samples] or [n_samples, n_outputs] The target values (class labels in classification, real numbers in regression). sample_weight : array-like, shape = [n_samples] or None Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. In the case of classification, splits are also ignored if they would result in any single class carrying a negative weight in either child node. Returns ------- self : object Returns self. """ X, y = check_X_y(X, y, dtype=np.float32, multi_output=False) return super(MondrianForestClassifier, self).fit(X, y)
def fit(self, X, y): X, y = check_X_y(X, y) self.classes_ = unique_labels(y) self.X_ = DynamicBayesianClassifier._first_col(X) self.y_ = y self.size_ = self.X_.size for i in range(self.X_.size): if y[i] not in self.dbayesmode_major_.keys(): self.dbayesmode_major_[y[i]] = scalgoutil.DBayesMode(y[i]) self.dbayesmode_major_[y[i]].update(self.X_[i]) self.update_priors() return self
def fit(self, x, y, **kwargs): #x, y = check_X_y(x, y, multi_output=False) super().fit(self._transform(x, y), y, **kwargs) self._arrange_coef() return self
def fit(self, x, y=None): x, y = check_X_y(x, y) x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=self.random_state) self.front = run_ffx(x_train, x_test, y_train, y_test, self.exponents, self.operators, num_alphas=self.num_alphas, l1_ratios=self.l1_ratios, target_score=self.target_score, n_tail=self.n_tail, random_state=self.random_state, strategies=self.strategies, n_jobs=self.n_jobs, max_complexity=self.max_complexity, rational=self.rational, eps=self.eps, **self.kw) self.make_model(x_test, y_test) return self
def fit(self, X, y): X, y = check_X_y(X, y) return self
def fit(self, X, y): X, y = check_X_y(X, y, accept_sparse=['csr', 'csc']) if sp.issparse(X): raise ValueError("Nonsensical Error") return self
def fit(self, X, y): X, y = check_X_y(X, y) self.coef_ = np.ones(X.shape[1]) return self
def fit(self, X, y): """ Train the Logistic model, X and y are numpy arrays. """ X, y = check_X_y(X, y) #, accept_sparse=['csr', 'csc']) # not sure how to handle sparse self.classes_, y = np.unique(y, return_inverse=True) if self.fit_intercept: X = np.insert(X, 0, 1, axis=1) w0 = np.zeros(X.shape[1]) if self.bounds is None: self.bounds_ = [(None, None) for v in w0] elif isinstance(self.bounds, tuple) and len(self.bounds) == 2: self.bounds_ = [self.bounds for v in w0] elif self.fit_intercept and len(self.bounds) == len(w0) - 1: self.bounds_ = np.concatenate(([(None, None)], self.bounds)) else: self.bounds_ = self.bounds if len(self.bounds_) != len(w0): raise ValueError("Bounds must be the same length as the coef") if isinstance(self.l2, Number): self.l2_ = [self.l2 for v in w0] elif self.fit_intercept and len(self.l2) == len(w0) - 1: self.l2_ = np.insert(self.l2, 0, 0) else: self.l2_ = self.l2 if len(self.l2_) != len(w0): raise ValueError("L2 penalty must be the same length as the coef, be sure the intercept is accounted for.") # the intercept should never be regularized. if self.fit_intercept: self.l2_[0] = 0.0 w = minimize(_ll, w0, args=(X, y, self.l2_), jac=_ll_grad, method=self.method, bounds=self.bounds_, options={'maxiter': self.max_iter, #'disp': True })['x'] if self.fit_intercept: self.intercept_ = w[0:1] self.coef_ = w[1:] else: self.intercept_ = np.array([]) self.coef_ = w return self
def fit(self, X, y=None, **fit_params): # Check that X and y have correct shape X, y = check_X_y(X, y) # meta_features_ have as many rows as there are in X and as many # columns as there are models. However, if use_proba is True then # ((n_classes - 1) * n_models) columns have to be stored if self.use_proba: self.n_probas_ = len(np.unique(y)) - 1 self.meta_features_ = np.empty((len(X), len(self.models) * (self.n_probas_))) else: self.meta_features_ = np.empty((len(X), len(self.models))) # Generate CV folds folds = self.cv.split(X, y) for train_index, test_index in folds: for i, (name, model) in enumerate(self.models.items()): # Extract fit params for the model model_fit_params = fit_params.get(name, {}) # Train the model on the training set model.fit(X[train_index], y[train_index], **model_fit_params) # If use_proba is True then the probabilities of each class for # each model have to be predicted and then stored into # meta_features if self.use_proba: probabilities = model.predict_proba(X[test_index]) for j, k in enumerate(range(self.n_probas_ * i, self.n_probas_ * (i + 1))): self.meta_features_[test_index, k] = probabilities[:, j] else: self.meta_features_[test_index, i] = model.predict(X[test_index]) # Combine the predictions with the original features if self.use_base_features: self.meta_features_ = np.hstack((self.meta_features_, X)) self.meta_model.fit(self.meta_features_, y) # Each model has to be fit on all the data for further predictions for model in self.models.values(): model.fit(X, y) return self
def fit(self, X, y): """Fit Gaussian process classification model Parameters ---------- X : array-like, shape = (n_samples, n_features) Training data y : array-like, shape = (n_samples,) Target values, must be binary Returns ------- self : returns an instance of self. """ X, y = check_X_y(X, y, multi_output=False) self.base_estimator_ = _BinaryGaussianProcessClassifierLaplace( self.kernel, self.optimizer, self.n_restarts_optimizer, self.max_iter_predict, self.warm_start, self.copy_X_train, self.random_state) self.classes_ = np.unique(y) self.n_classes_ = self.classes_.size if self.n_classes_ == 1: raise ValueError("GaussianProcessClassifier requires 2 or more " "distinct classes. Only class %s present." % self.classes_[0]) if self.n_classes_ > 2: if self.multi_class == "one_vs_rest": self.base_estimator_ = \ OneVsRestClassifier(self.base_estimator_, n_jobs=self.n_jobs) elif self.multi_class == "one_vs_one": self.base_estimator_ = \ OneVsOneClassifier(self.base_estimator_, n_jobs=self.n_jobs) else: raise ValueError("Unknown multi-class mode %s" % self.multi_class) self.base_estimator_.fit(X, y) if self.n_classes_ > 2: self.log_marginal_likelihood_value_ = np.mean( [estimator.log_marginal_likelihood() for estimator in self.base_estimator_.estimators_]) else: self.log_marginal_likelihood_value_ = \ self.base_estimator_.log_marginal_likelihood() return self