我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用sklearn.utils.column_or_1d()。
def transform(self, y): """Transform labels to normalized encoding. Parameters ---------- y : array-like of shape [n_samples] Target values. Returns ------- y : array-like of shape [n_samples] """ y = column_or_1d(y, warn=True) classes = np.unique(y) if len(np.intersect1d(classes, self.classes_)) < len(classes): diff = np.setdiff1d(classes, self.classes_) self.classes_ = np.hstack((self.classes_, diff)) return np.searchsorted(self.classes_, y)[0]
def fit(self, X, y=None): """Fit label encoder Parameters ---------- y : array-like of shape (n_samples,) Target values. Returns ------- self : returns an instance of self. """ X = column_or_1d(X.ravel(), warn=True) _check_numpy_unicode_bug(X) self.classes_ = np.unique(X) if isinstance(self.classes_[0], np.float64): self.classes_ = self.classes_[np.isfinite(self.classes_)] return self
def transform(self, y): """Transform labels to normalized encoding. Parameters ---------- y : array-like of shape [n_samples] Target values. Returns ------- y : array-like of shape [n_samples] """ check_is_fitted(self, 'classes_') y = column_or_1d(y.ravel(), warn=True) classes = np.unique(y) if isinstance(classes[0], np.float64): classes = classes[np.isfinite(classes)] _check_numpy_unicode_bug(classes) if len(np.intersect1d(classes, self.classes_)) < len(classes): diff = np.setdiff1d(classes, self.classes_) print(self.classes_) raise ValueError("y contains new labels: %s" % str(diff)) return np.searchsorted(self.classes_, y).reshape(-1, 1)
def predict_sigmoid(a, b, T): """Predict new data by linear interpolation. Parameters ---------- T : array-like, shape (n_samples,) Data to predict from. Returns ------- T_ : array, shape (n_samples,) The predicted data. """ from sklearn.utils import column_or_1d T = column_or_1d(T) return 1. / (1. + np.exp(a * T + b))
def test_column_or_1d(): EXAMPLES = [ ("binary", ["spam", "egg", "spam"]), ("binary", [0, 1, 0, 1]), ("continuous", np.arange(10) / 20.), ("multiclass", [1, 2, 3]), ("multiclass", [0, 1, 2, 2, 0]), ("multiclass", [[1], [2], [3]]), ("multilabel-indicator", [[0, 1, 0], [0, 0, 1]]), ("multiclass-multioutput", [[1, 2, 3]]), ("multiclass-multioutput", [[1, 1], [2, 2], [3, 1]]), ("multiclass-multioutput", [[5, 1], [4, 2], [3, 1]]), ("multiclass-multioutput", [[1, 2, 3]]), ("continuous-multioutput", np.arange(30).reshape((-1, 3))), ] for y_type, y in EXAMPLES: if y_type in ["binary", 'multiclass', "continuous"]: assert_array_equal(column_or_1d(y), np.ravel(y)) else: assert_raises(ValueError, column_or_1d, y)
def transform(self, y): """Perform encoding if already fit. Parameters ---------- y : array_like, shape=(n_samples,) The array to encode Returns ------- e : array_like, shape=(n_samples,) The encoded array """ check_is_fitted(self, 'classes_') y = column_or_1d(y, warn=True) classes = np.unique(y) _check_numpy_unicode_bug(classes) # Check not too many: unseen = _get_unseen() if len(classes) >= unseen: raise ValueError('Too many factor levels in feature. Max is %i' % unseen) e = np.array([ np.searchsorted(self.classes_, x) if x in self.classes_ else unseen for x in y ]) return e
def __init__(self, filename='./corpus/train.csv'): if os.path.exists(filename): data = pd.read_csv(filename) self.data = shuffle(data) X_data = pd.DataFrame(data.drop('sentiment', axis=1)) Y_data = column_or_1d(data[:]['sentiment'], warn=True) self.X_train, self.X_val,\ self.y_train, self.y_val = train_test_split(X_data, Y_data, test_size=0.3, random_state=1) self.model = None self.load_model() self.preprocessor = Preprocessor.Preprocessor() else: print('No Source!') self.preprocessor.process_data()
def fit(self, X, y, sample_weight=None, check_input=True): """Fit Ridge regression model after searching for the best mu and tau. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training data y : array-like, shape = [n_samples] or [n_samples, n_targets] Target values sample_weight : float or array-like of shape [n_samples] Sample weight Returns ------- self : Returns self. """ self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1) y = self._label_binarizer.fit_transform(y) if self._label_binarizer.y_type_.startswith('multilabel'): raise ValueError( "%s doesn't support multi-label classification" % ( self.__class__.__name__)) else: y = column_or_1d(y, warn=False) param_grid = {'tau': self.taus, 'lamda': self.lamdas} fit_params = {'sample_weight': sample_weight, 'check_input': check_input} estimator = L1L2TwoStepClassifier( mu=self.mu, fit_intercept=self.fit_intercept, use_gpu=self.use_gpu, threshold=self.threshold, normalize=self.normalize, precompute=self.precompute, max_iter=self.max_iter, copy_X=self.copy_X, tol=self.tol, warm_start=self.warm_start, positive=self.positive, random_state=self.random_state, selection=self.selection) gs = GridSearchCV( estimator=estimator, param_grid=param_grid, fit_params=fit_params, cv=self.cv, scoring=self.scoring, n_jobs=self.n_jobs, iid=self.iid, refit=self.refit, verbose=self.verbose, pre_dispatch=self.pre_dispatch, error_score=self.error_score, return_train_score=self.return_train_score) gs.fit(X, y) estimator = gs.best_estimator_ self.tau_ = estimator.tau self.lamda_ = estimator.lamda self.coef_ = estimator.coef_ self.intercept_ = estimator.intercept_ self.best_estimator_ = estimator # XXX DEBUG if self.classes_.shape[0] > 2: ndim = self.classes_.shape[0] else: ndim = 1 self.coef_ = self.coef_.reshape(ndim, -1) return self