我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.utils.check_array()。
def transform(self, X): feature_range = self.feature_range X = check_array(X, copy=self.copy, ensure_2d=False, dtype=FLOAT_DTYPES) if X.ndim == 1: warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning) if np.any(X > feature_range[1]) or np.any(X < feature_range[0]): warnings.warn( "You got data that are out of the range: {}" .format(feature_range) ) X[X > feature_range[1]] = feature_range[1] X[X < feature_range[0]] = feature_range[0] return X
def _validate_X_predict( self, X: np.ndarray, check_input: bool) -> np.ndarray: if check_input: X = check_array(X, dtype=DTYPE, accept_sparse="csr") if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc): raise ValueError( "No support for np.int64 index based sparse matrices") n_features = X.shape[1] if self.n_features_ != n_features: raise ValueError( "Number of features of the model must match the input." " Model n_features is %s and input n_features is %s " % (self.n_features_, n_features)) return X
def load_data(): # Load dataset print("Loading dataset...") data = fetch_covtype(download_if_missing=True, shuffle=True, random_state=RANDOM_STATE) X = check_array(data['data'], dtype=np.float32, order='C') y = (data['target'] != 1).astype(np.int) # Create train-test split (as [Joachims, 2006]) print("Creating train-test split...") n_train = 522911 X_train = X[:n_train] y_train = y[:n_train] X_test = X[n_train:] y_test = y[n_train:] # Standardize first 10 features (the numerical ones) mean = X_train.mean(axis=0) std = X_train.std(axis=0) mean[10:] = 0.0 std[10:] = 1.0 X_train = (X_train - mean) / std X_test = (X_test - mean) / std return X_train, X_test, y_train, y_test
def predict(self, X): """ Predict data using the ``centroids_`` of subclusters. Avoid computation of the row norms of X. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Input data. Returns ------- labels : ndarray, shape(n_samples) Labelled data. """ X = check_array(X, accept_sparse='csr') self._check_fit(X) reduced_distance = safe_sparse_dot(X, self.subcluster_centers_.T) reduced_distance *= -2 reduced_distance += self._subcluster_norms return self.subcluster_labels_[np.argmin(reduced_distance, axis=1)]
def test_dict_completion_missing(): # Generate some toy data. rng = np.random.RandomState(0) U = rng.rand(100, 4) V = rng.rand(4, 20) X = np.dot(U, V) X = sp.csr_matrix(X) X_tr, X_te = train_test_split(X, train_size=0.95) X_tr = sp.csr_matrix(X_tr) X_te = sp.csr_matrix(X_te) mf = RecsysDictFact(n_components=4, n_epochs=1, alpha=1, random_state=0, detrend=True, verbose=0, ) mf.fit(X_tr) X_pred = mf.predict(X_te) rmse = sqrt(np.sum((X_te.data - X_pred.data) ** 2) / X_te.data.shape[0]) X_te_centered = check_array(X_te, accept_sparse='csr', copy=True) compute_biases(X_te_centered, inplace=True) rmse_c = sqrt(np.sum((X_te.data - X_te_centered.data) ** 2) / X_te.data.shape[0]) assert (rmse < rmse_c)
def partial_fit(self, X, sample_indices=None): """ Update the factorization using rows from X Parameters ---------- X: ndarray, shape (n_samples, n_features) Input data sample_indices: Indices for each row of X. If None, consider that row i index is i (useful when providing the whole data to the function) Returns ------- self """ X = check_array(X, dtype=[np.float32, np.float64], order='C') n_samples, n_features = X.shape batches = gen_batches(n_samples, self.batch_size) for batch in batches: this_X = X[batch] these_sample_indices = get_sub_slice(sample_indices, batch) self._single_batch_fit(this_X, these_sample_indices) return self
def transform(self, X, y=None): """Apply dimensionality reduction to X. X is masked. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples is the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted check_is_fitted(self, ['mask_'], all_or_any=all) X = check_array(X) return X[:, self.mask_]
def transform(self, X, y=None): """Apply dimensionality reduction to X. X is masked. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples is the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted check_is_fitted(self, ['mask_'], all_or_any=all) if hasattr(X, 'columns'): X = X.values X = check_array(X[:, self.mask_]) return X
def load_data(dtype=np.float32, order='F'): """Load the data, then cache and memmap the train/test split""" ###################################################################### # Load dataset safe_print("Loading dataset...") data = fetch_mldata('MNIST original') X = check_array(data['data'], dtype=dtype, order=order) y = data["target"] # Normalize features X = X / 255 # Create train-test split (as [Joachims, 2006]) safe_print("Creating train-test split...") n_train = 60000 X_train = X[:n_train] y_train = y[:n_train] X_test = X[n_train:] y_test = y[n_train:] return X_train, X_test, y_train, y_test
def _validate_X_predict(self, X, check_input): """Validate X whenever one tries to predict, apply, predict_proba""" if self.tree_ is None: raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") if check_input: X = check_array(X, dtype='f') n_features = X.shape[1] if self._n_features != n_features: raise ValueError("Number of features of the model must " " match the input. Model n_features is %s and " " input n_features is %s " % (self._n_features, n_features)) return X
def _validate_X_predict(self, X, check_input): """Validate X whenever one tries to predict, apply, predict_proba""" if self._tree is None: raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") if check_input: X = check_array(X, dtype='f') n_features = X.shape[1] if self._n_features != n_features: raise ValueError("Number of features of the model must " " match the input. Model n_features is %s and " " input n_features is %s " % (self._n_features, n_features)) return X
def anomaly_score(self, X=None): """Compute anomaly scores for test samples. Parameters ---------- X : array-like of shape (n_samples, n_features), default None Test samples. Returns ------- y_score : array-like of shape (n_samples,) Anomaly scores for test samples. """ check_is_fitted(self, ['_glasso']) if X is None: return self.y_score_ else: X = check_array(X) return self._glasso.mahalanobis(X)
def anomaly_score(self, X=None): """Compute anomaly scores for test samples. Parameters ---------- X : array-like of shape (n_samples, n_features), default None Test samples. Returns ------- y_score : array-like of shape (n_samples,) Anomaly scores for test samples. """ check_is_fitted(self, '_gmm') if X is None: return self.y_score_ else: X = check_array(X) return -self._gmm.score_samples(X)
def anomaly_score(self, X=None): """Compute anomaly scores for test samples. Parameters ---------- X : array-like of shape (n_samples, n_features), default None Test samples. Returns ------- y_score : array-like of shape (n_samples,) Anomaly scores for test samples. """ check_is_fitted(self, '_normalizer') if X is None: return self.y_score_ else: X = check_array(X) if not self.assume_normalized: X = self._normalizer.transform(X) return 1.0 - X @ self.mean_direction_
def anomaly_score(self, X=None): """Compute anomaly scores for test samples. Parameters ---------- X : array-like of shape (n_samples, n_features), default None Test samples. Returns ------- y_score : array-like of shape (n_samples,) Anomaly scores for test samples. """ check_is_fitted(self, '_kde') if X is None: return self.y_score_ else: X = check_array(X) return -self._kde.score_samples(X)
def mean_absolute_percentage_error(y_true, y_pred): """ Use of this metric is not recommended; for illustration only. See other regression metrics on sklearn docs: http://scikit-learn.org/stable/modules/classes.html#regression-metrics Use like any other metric >>> y_true = [3, -0.5, 2, 7]; y_pred = [2.5, -0.3, 2, 8] >>> mean_absolute_percentage_error(y_true, y_pred) Out[]: 24.791666666666668 """ # y_true, y_pred = check_array(y_true), check_array(y_pred) # Note: does not handle mix 1d representation # if _is_1d(y_true): # y_true, y_pred = _check_1d_array(y_true, y_pred) return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
def predict(self, X): """Predict using the ORFF ridge model. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- C : {array}, shape = [n_samples] or [n_samples, n_targets] Returns predicted values. """ check_is_fitted(self, ['coefs_', 'linop_'], all_or_any=all) X = check_array(X) return self._decision_function(X)
def predict(self, X): """Predict using ONORMA model. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- C : {array}, shape = [n_samples] or [n_samples, n_targets] Returns predicted values. """ check_is_fitted(self, ['coefs_', 't_', 'p_', 'X_seen_', 'y_seen_'], all_or_any=all) X = check_array(X) linop = self.ov_kernel_(self.X_seen_) pred = linop(X) * self.coefs_[:self.t_ * self.p_] return pred.reshape(X.shape[0], -1) if linop.p > 1 else pred
def predict(self, X): """Predict using the OVK ridge model. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- C : {array}, shape = [n_samples] or [n_samples, n_targets] Returns predicted values. """ check_is_fitted(self, ['dual_coefs_', 'linop_'], all_or_any=all) X = check_array(X, force_all_finite=True, accept_sparse=False, ensure_2d=True) return self._decision_function(X)
def predict(self, X): """Predict conditional quantiles. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- y : {array}, shape = [n_samples, n_quantiles] Returns predicted values for each prescribed quantile level. """ check_is_fitted(self, ['model_', 'linop_'], all_or_any=all) X = check_array(X) return self._decision_function(X)
def _check_X_y(self, X, y=None): """ :param X: :param y (~numpy.ndarray): :return: """ if not is_dataset(X) and not isinstance(X, list): if isinstance(X, numpy.ndarray): X = check_array(X, dtype=self._data_x_dtype) else: print('[WARNING] skip check type for dataset X with type {}' .format(type(X))) if y is not None: y = check_array(y, dtype=self._data_y_dtype, ensure_2d=False) if y.ndim == 1: y = y[:, None] return X, y
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X : scipy.sparse matrix, shape (n_test_samples, vocab_size) Test samples. Returns ------- y : array of shape [n_samples] Class labels for each data sample. """ X = check_array(X, accept_sparse='csr', copy=True) X = normalize(X, norm='l1', copy=False) dist = self._pairwise_wmd(sp.sparse.csr_matrix(X)) return super(WordMoversKNN, self).predict(dist)
def predict(self, X): check_is_fitted(self, "cluster_centers_") # Check that the array is good and attempt to convert it to # Numpy array if possible X = check_array(X) # Apply distance metric wrt. cluster centers (medoids) D = self.distance_func(X, Y=self.cluster_centers_) # Assign data points to clusters based on # which cluster assignment yields # the smallest distance labels = np.argmin(D, axis=1) return labels
def fit_transform(self, X, y=None, sample_weight=None): X = check_array(X, accept_sparse=['csc'], ensure_2d=False) if sp.issparse(X): # Pre-sort indices to avoid that each individual tree of the # ensemble sorts the indices. X.sort_indices() X_, y_ = generate_discriminative_dataset(X) super(RandomForestEmbedding, self).fit(X_, y_, sample_weight=sample_weight) self.one_hot_encoder_ = OneHotEncoder(sparse=True) if self.sparse_output: return self.one_hot_encoder_.fit_transform(self.apply(X)) return self.apply(X)
def transform(self, X, y=None): # scikit-learn checks X = check_array(X) if X.shape[1] != len(self.maximums_): raise ValueError("X has different shape than during fitting. " "Expected %d, got %d." % (len(self.maximums_), X.shape[1])) return np.vstack(( np.array([ np.cos(2 * np.pi * x / (maximum + 1)) for x, maximum in zip(X.T, self.maximums_) ]), np.array([ np.sin(2 * np.pi * x / (maximum + 1)) for x, maximum in zip(X.T, self.maximums_) ]) )).T
def transform(self, X, y=None): """Binarize X based on the fitted cut points.""" # scikit-learn checks X = check_array(X) if self.cut_points is None: raise NotFittedError('Estimator not fitted, call `fit` before exploiting the model.') if X.shape[1] != len(self.cut_points): raise ValueError("Provided array's dimensions do not match with the ones from the " "array `fit` was called on.") binned = np.array([ np.digitize(x, self.cut_points[i]) if len(self.cut_points[i]) > 0 else np.zeros(x.shape) for i, x in enumerate(X.T) ]).T return binned
def load_data(dtype=np.float32, order='F'): """Load the data, then cache and memmap the train/test split""" ###################################################################### ## Load dataset print("Loading dataset...") data = fetch_mldata('MNIST original') X = check_array(data['data'], dtype=dtype, order=order) y = data["target"] # Normalize features X = X / 255 ## Create train-test split (as [Joachims, 2006]) print("Creating train-test split...") n_train = 60000 X_train = X[:n_train] y_train = y[:n_train] X_test = X[n_train:] y_test = y[n_train:] return X_train, X_test, y_train, y_test
def test_check_input_false(): X, y, _, _ = build_dataset(n_samples=20, n_features=10) X = check_array(X, order='F', dtype='float64') y = check_array(X, order='F', dtype='float64') clf = ElasticNet(selection='cyclic', tol=1e-8) # Check that no error is raised if data is provided in the right format clf.fit(X, y, check_input=False) X = check_array(X, order='F', dtype='float32') clf.fit(X, y, check_input=True) # Check that an error is raised if data is provided in the wrong dtype, # because of check bypassing assert_raises(ValueError, clf.fit, X, y, check_input=False) # With no input checking, providing X in C order should result in false # computation X = check_array(X, order='C', dtype='float64') assert_raises(ValueError, clf.fit, X, y, check_input=False)
def transform(self, X, y=None): """Apply dimensionality reduction on X. X is projected on the principal components previous extracted from a training set. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_transformed : array-like, shape (n_samples, n_components) """ check_is_fitted(self, 'center_') X = check_array(X) if self.center_ is not None: X = X - self.center_ X_transformed = fast_dot(X, self.components_.T) return X_transformed
def _check_array(self, X): X = check_array(X) # Check that the number of clusters is less than or equal to # the number of samples if self.n_clusters > X.shape[0]: raise ValueError("The number of medoids " + "({}) ".format(self.n_clusters) + "must be larger than the number " + "of samples ({})".format(X.shape[0])) return X
def get_coef(self, X): qr, qraux = self.qr, self.qraux n, p = qr.shape # sanity check assert isinstance(qr, np.ndarray), 'internal error: QR should be a np.ndarray but got %s' % type(qr) assert isinstance(qraux, np.ndarray), 'internal error: qraux should be a np.ndarray but got %s' % type(qraux) # validate input array X = check_array(X, dtype='numeric', copy=True, order='F') nx, ny = X.shape if nx != n: raise ValueError('qr and X must have same number of rows') # check on size _validate_matrix_size(n, p) # get the rank of the decomposition k = self.rank # get ix vector # if p > n: # ix = np.ones(n + (p - n)) * np.nan # ix[:n] = np.arange(n) # i.e., array([0,1,2,nan,nan,nan]) # else: # ix = np.arange(n) # set up the structures to alter coef, info = (np.zeros((k, ny), dtype=np.double, order='F'), np.zeros(1, dtype=np.int, order='F')) # call the fortran module IN PLACE _safecall(dqrsl.dqrcf, qr, n, k, qraux, X, ny, coef, 0) # post-processing # if k < p: # cf = np.ones((p,ny)) * np.nan # cf[self.pivot[np.arange(k)], :] = coef return coef if not k < p else coef[self.pivot[np.arange(k)], :]
def partial_fit(self, X, y=None): """Fit the model to the data X which should contain a partial segment of the data. X : array-like, shape (n_samples, n_features) Training data. Returns ------- self : BernoulliRBM The fitted model. """ X = check_array(X, accept_sparse='csr', dtype=np.float) if not hasattr(self, 'components_'): self.components_ = np.asarray( self.rng_.normal( 0, 0.01, (self.n_components, X.shape[1]) ), order='fortran') if not hasattr(self, 'intercept_hidden_'): self.intercept_hidden_ = np.zeros(self.n_components, ) if not hasattr(self, 'intercept_visible_'): self.intercept_visible_ = np.zeros(X.shape[1], ) if not hasattr(self, 'h_samples_'): self.h_samples_ = np.zeros((self.batch_size, self.n_components)) self._fit(X)
def predict(self, X, check_input=True): """Predict class or regression value for X. For a classification model, the predicted class for each sample in X is returned. For a regression model, the predicted value based on X is returned. Parameters ---------- X : array-like of shape = [n_samples, n_features] The input samples. Returns ------- y : array of shape = [n_samples] or [n_samples, n_outputs] The predicted classes, or the predict values. """ if getattr(X, "dtype", None) != DTYPE or X.ndim != 2: X = check_array(X, dtype=DTYPE) n_samples, n_features = X.shape if self.tree_ is None: raise Exception("Tree not initialized. Perform a fit first") if self.n_features_ != n_features: raise ValueError("Number of features of the model must " " match the input. Model n_features is %s and " " input n_features is %s " % (self.n_features_, n_features)) if self.tree_.get('direction') > 0: return ((X[:, self.tree_.get('best_dim')] > self.tree_.get('threshold')) * 2) - 1 else: return ((X[:, self.tree_.get('best_dim')] <= self.tree_.get('threshold')) * 2) - 1
def predict(self, X, check_input=True): """Predict class or regression value for X. For a classification model, the predicted class for each sample in X is returned. For a regression model, the predicted value based on X is returned. Parameters ---------- X : array-like of shape = [n_samples, n_features] The input samples. Returns ------- y : array of shape = [n_samples] or [n_samples, n_outputs] The predicted classes, or the predict values. """ X = check_array(X, dtype=DTYPE, accept_sparse="csr") if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc): raise ValueError("No support for np.int64 index based " "sparse matrices") n_samples, n_features = X.shape if self.tree_ is None: raise Exception("Tree not initialized. Perform a fit first") if self.n_features_ != n_features: raise ValueError("Number of features of the model must " " match the input. Model n_features is %s and " " input n_features is %s " % (self.n_features_, n_features)) return (self.tree_.get('coefficient') * (X[:, self.tree_.get('best_dim')] > self.tree_.get('threshold')) + self.tree_.get('constant'))
def fit(self, X, y=None): """Do nothing and return the estimator unchanged This method is just there to implement the usual API and hence work in pipelines. """ X = check_array(X, accept_sparse='csr') return self
def transform(self, X, y=None, copy=None): """Scale each non zero row of X to unit norm Parameters ---------- X : array or scipy.sparse matrix with shape [n_samples, n_features] The data to normalize, row by row. scipy.sparse matrices should be in CSR format to avoid an un-necessary copy. """ copy = copy if copy is not None else self.copy X = check_array(X, accept_sparse='csr') return normalize(X, norm=self.norm, axis=1, copy=copy)
def transform_lsi(self, X): """ LSI transform, normalized by the inverse of the eigen values""" X = check_array(X, accept_sparse='csr') return safe_sparse_dot(X, self.components_.T).dot( np.diag(1./self.singular_values_[:self.n_components]))
def fit(self, X, y=None, **fit_params): """Fits the inverse covariance model according to the given training data and parameters. Parameters ----------- X : 2D ndarray, shape (n_features, n_features) Input data. Returns ------- self """ X = check_array(X, ensure_min_features=2, estimator=self) X = as_float_array(X, copy=False, force_all_finite=False) self.init_coefs(X) if self.method == 'quic': (self.precision_, self.covariance_, self.opt_, self.cputime_, self.iters_, self.duality_gap_) = quic( self.sample_covariance_, self.lam * self.lam_scale_, mode=self.mode, tol=self.tol, max_iter=self.max_iter, Theta0=self.Theta0, Sigma0=self.Sigma0, path=self.path_, msg=self.verbose ) else: raise NotImplementedError( "Only method='quic' has been implemented.") self.is_fitted = True return self
def predict(self, X): """ Predict values of X from internal dictionary and intercepts Parameters ---------- X: csr-matrix (n_samples, n_features) Matrix holding the loci of prediction Returns ------- X_pred: csr-matrix (n_samples, n_features) Matrix with the same sparsity structure as X, with predicted values """ if not sp.issparse(X): X = sp.csr_matrix(X) X = check_array(X, accept_sparse='csr') out = np.zeros_like(X.data) _predict(out, X.indices, X.indptr, self.code_, self.components_) if self.detrend: for i in range(X.shape[0]): out[X.indptr[i]:X.indptr[i + 1]] += self.row_mean_[i] out += self.col_mean_.take(X.indices, mode='clip') if self.crop is not None: out[out > self.crop[1]] = self.crop[1] out[out < self.crop[0]] = self.crop[0] return sp.csr_matrix((out, X.indices, X.indptr), shape=X.shape)
def score(self, X): """Score prediction based on root mean squared error""" if not sp.issparse(X): X = sp.csr_matrix(X) X = check_array(X, accept_sparse='csr') X_pred = self.predict(X) return rmse(X, X_pred)
def rmse(X_true, X_pred): """Root mean squared error for two sparse matrices""" X_true = check_array(X_true, accept_sparse='csr') X_pred = check_array(X_pred, accept_sparse='csr') mse = np.mean((X_true.data - X_pred.data) ** 2) return np.sqrt(mse)
def fit(self, X): """ Compute the factorisation X ~ code_ x components_, solving for D, code_ = argmin_{r2 ||D^j ||_1 + (1 - r2) || D^j ||_2^2 < 1} 1 / 2 || X - D A ||_2 + (1 - r) || A ||_2 / 2 + r || A ||_1 Parameters ---------- X: ndarray, shape= (n_samples, n_features) Returns ------- self """ X = check_array(X, order='C', dtype=[np.float32, np.float64]) if self.dict_init is None: dict_init = X else: dict_init = check_array(self.dict_init, dtype=X.dtype.type) self.prepare(n_samples=X.shape[0], X=dict_init) # Main loop for _ in range(self.n_epochs): self.partial_fit(X) permutation = self.shuffle() X = X[permutation] return self
def score_samples(self, X, lengths=None): """Compute the log probability under the model and compute posteriors. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ), optional Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. Returns ------- logprob : float Log likelihood of ``X``. posteriors : array, shape (n_samples, n_components) State-membership probabilities for each sample in ``X``. See Also -------- score : Compute the log probability under the model. decode : Find most likely state sequence corresponding to ``X``. """ check_is_fitted(self, "startprob_") self._check() X = check_array(X) n_samples = X.shape[0] logprob = 0 posteriors = np.zeros((n_samples, self.n_components)) for i, j in iter_from_X_lengths(X, lengths): framelogprob = self._compute_log_likelihood(X[i:j]) logprobij, fwdlattice = self._do_forward_pass(framelogprob) logprob += logprobij bwdlattice = self._do_backward_pass(framelogprob) posteriors[i:j] = self._compute_posteriors(fwdlattice, bwdlattice) return logprob, posteriors
def score(self, X, lengths=None): """Compute the log probability under the model. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ), optional Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. Returns ------- logprob : float Log likelihood of ``X``. See Also -------- score_samples : Compute the log probability under the model and posteriors. decode : Find most likely state sequence corresponding to ``X``. """ check_is_fitted(self, "startprob_") self._check() X = check_array(X) # XXX we can unroll forward pass for speed and memory efficiency. logprob = 0 for i, j in iter_from_X_lengths(X, lengths): framelogprob = self._compute_log_likelihood(X[i:j]) logprobij, _fwdlattice = self._do_forward_pass(framelogprob) logprob += logprobij return logprob
def fit(self, X, **kwargs): """Apply affinity propagation clustering. Create affinity matrix from negative euclidean distances if required. Parameters ---------- X: array-like or sparse matrix, shape (n_samples, n_features) or (n_samples, n_samples) Data matrix or, if affinity is ``precomputed``, matrix of similarities / affinities. """ if not issparse(X): return super(AffinityPropagation, self).fit(X, **kwargs) # Since X is sparse, this converts it in a coo_matrix if required X = check_array(X, accept_sparse='coo') if self.affinity == "precomputed": self.affinity_matrix_ = X elif self.affinity == "euclidean": self.affinity_matrix_ = coo_matrix( -euclidean_distances(X, squared=True)) else: raise ValueError("Affinity must be 'precomputed' or " "'euclidean'. Got %s instead" % str(self.affinity)) self.cluster_centers_indices_, self.labels_, self.n_iter_ = \ sparse_ap( self.affinity_matrix_, self.preference, max_iter=self.max_iter, convergence_iter=self.convergence_iter, damping=self.damping, copy=self.copy, verbose=self.verbose, return_n_iter=True, convergence_percentage=self.convergence_percentage) if self.affinity != "precomputed": self.cluster_centers_ = X.data[self.cluster_centers_indices_].copy() return self
def predict(self, X, quantile=None, check_input=False): """ Predict regression value for X. Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``. quantile : int, optional Value ranging from 0 to 100. By default, the mean is returned. check_input : boolean, (default=True) Allow to bypass several input checking. Don't use this parameter unless you know what you do. Returns ------- y : array of shape = [n_samples] If quantile is set to None, then return E(Y | X). Else return y such that F(Y=y | x) = quantile. """ # apply method requires X to be of dtype np.float32 X = check_array(X, dtype=np.float32, accept_sparse="csc") if quantile is None: return super(BaseTreeQuantileRegressor, self).predict(X, check_input=check_input) quantiles = np.zeros(X.shape[0]) X_leaves = self.apply(X) unique_leaves = np.unique(X_leaves) for leaf in unique_leaves: quantiles[X_leaves == leaf] = weighted_percentile( self.y_train_[self.y_train_leaves_ == leaf], quantile) return quantiles
def predict(self, X, quantile=None): """ Predict regression value for X. Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``. quantile : int, optional Value ranging from 0 to 100. By default, the mean is returned. check_input : boolean, (default=True) Allow to bypass several input checking. Don't use this parameter unless you know what you do. Returns ------- y : array of shape = [n_samples] If quantile is set to None, then return E(Y | X). Else return y such that F(Y=y | x) = quantile. """ # apply method requires X to be of dtype np.float32 X = check_array(X, dtype=np.float32, accept_sparse="csc") if quantile is None: return super(BaseForestQuantileRegressor, self).predict(X) sorter = np.argsort(self.y_train_) X_leaves = self.apply(X) weights = np.zeros((X.shape[0], len(self.y_train_))) quantiles = np.zeros((X.shape[0])) for i, x_leaf in enumerate(X_leaves): mask = self.y_train_leaves_ != np.expand_dims(x_leaf, 1) x_weights = ma.masked_array(self.y_weights_, mask) weights = x_weights.sum(axis=0) quantiles[i] = weighted_percentile( self.y_train_, quantile, weights, sorter) return quantiles
def _decision_scores(self, X): """Predict using the ELM model Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. Returns ------- y_pred : array-like, shape (n_samples,) or (n_samples, n_outputs) The predicted values. """ X = check_array(X, accept_sparse=['csr', 'csc', 'coo']) if self.batch_size is None: hidden_activations = self._compute_hidden_activations(X) y_pred = safe_sparse_dot(hidden_activations, self.coef_output_) else: n_samples = X.shape[0] batches = gen_batches(n_samples, self.batch_size) y_pred = np.zeros((n_samples, self.n_outputs_)) for batch in batches: h_batch = self._compute_hidden_activations(X[batch]) y_pred[batch] = safe_sparse_dot(h_batch, self.coef_output_) return y_pred
def fit(self, X, y=None): """Fit the model with X, using minibatches of size batch_size. Parameters ---------- X: array-like, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. y: Passthrough for ``Pipeline`` compatibility. Returns ------- self: object Returns the instance itself. """ if isinstance(X, Data): X = X[:] X = check_array(X, copy=self.copy, dtype=[np.float64, np.float32]) n_samples, n_features = X.shape if self.batch_size is None: batch_size = 12 * n_features else: batch_size = self.batch_size for batch in gen_batches(n_samples, batch_size): x = X[batch] self.partial_fit(x, check_input=False) return self