我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.sparse.issparse()。
def scipy_sparse_to_cvx_sparse(x): ''' This function takes as input as SciPy sparse matrix and converts it into a CVX sparse one. Inputs: ------ x : SciPy sparse matrix. Outputs: ------- y : CVX sparse matrix. ''' # --> Check that the input matrix is indeed a scipy sparse matrix. if sparse.issparse(x) is not True: raise ValueError('Input matrix is not a SciPy sparse matrix.') # --> Convert x to COOdinate format. coo = x.tocoo() # --> Create the corresponding cvx sparse matrix. y = spmatrix(coo.data, coo.row.tolist(), coo.col.tolist()) return y
def _sanitize_value(x): """ Performs cleaning steps on the data so various type comparisons can be performed correctly. """ if isinstance(x, (float, str, unicode, int, long)): return x elif _HAS_SKLEARN and _sp.issparse(x): return x.todense() elif isinstance(x, _np.ndarray): return x elif isinstance(x, tuple): return (_sanitize_value(v) for v in x) elif isinstance(x, list): return [_sanitize_value(v) for v in x] elif isinstance(x, dict): return dict( (_sanitize_value(k), _sanitize_value(v)) for k, v in x.items()) else: assert False, str(x)
def _return_float_dtype(X, Y): """ 1. If dtype of X and Y is float32, then dtype float32 is returned. 2. Else dtype float is returned. """ if not issparse(X) and not isinstance(X, np.ndarray): X = np.asarray(X) if Y is None: Y_dtype = X.dtype elif not issparse(Y) and not isinstance(Y, np.ndarray): Y = np.asarray(Y) Y_dtype = Y.dtype else: Y_dtype = Y.dtype if X.dtype == Y_dtype == np.float32: dtype = np.float32 else: dtype = np.float return X, Y, dtype
def __call__(self, X, y, categorical=None, metafeatures=None, helpers=None): if categorical is None: categorical = [False for i in range(X.shape[1])] start_time = time() try: if issparse(X) and hasattr(self, "_calculate_sparse"): value = self._calculate_sparse(X, y, categorical, metafeatures, helpers) else: value = self._calculate(X, y, categorical, metafeatures, helpers) comment = "" except MemoryError as e: value = None comment = "Memory Error" end_time = time() return MetaFeatureValue(self.__class__.__name__, self.type_, 0, 0, value, end_time-start_time, comment=comment)
def transform(self, X): """Scaling features of X according to feature_range. Parameters ---------- X : array-like with shape [n_samples, n_features] Input data that will be transformed. """ check_is_fitted(self, 'scale_') X = check_array(X, accept_sparse="csc", copy=self.copy, dtype=np.float32) if sparse.issparse(X): for i in range(X.shape[1]): X.data[X.indptr[i]:X.indptr[i + 1]] *= self.scale_[i] X.data[X.indptr[i]:X.indptr[i + 1]] += self.min_[i] else: X *= self.scale_ X += self.min_ return X
def __str__(self): val = 'DataManager : ' + self.name + '\ninfo:\n' for item in self.info: val += '\t' + item + ' = ' + str(self.info[item]) + '\n' val += 'data:\n' for subset in self.data: val += '\t%s = %s %s %s\n' % (subset, type(self.data[subset]), str(self.data[subset].shape), str(self.data[subset].dtype)) if issparse(self.data[subset]): val += '\tdensity: %f\n' % \ (float(len(self.data[subset].data)) / self.data[subset].shape[0] / self.data[subset].shape[1]) val += 'feat_type:\t' + str(self.feat_type) + '\n' return val
def nvecs(X, n, rank, do_flipsign=True, dtype=np.float): """ Eigendecomposition of mode-n unfolding of a tensor """ Xn = X.unfold(n) if issparse_mat(Xn): Xn = csr_matrix(Xn, dtype=dtype) Y = Xn.dot(Xn.T) _, U = eigsh(Y, rank, which='LM') else: Y = Xn.dot(Xn.T) N = Y.shape[0] _, U = eigh(Y, eigvals=(N - rank, N - 1)) #_, U = eigsh(Y, rank, which='LM') # reverse order of eigenvectors such that eigenvalues are decreasing U = array(U[:, ::-1]) # flip sign if do_flipsign: U = flipsign(U) return U
def prec_ets(n_trees, X_train, y_train, X_test, y_test, random_state=None): """ ExtraTrees """ from sklearn.ensemble import ExtraTreesClassifier if not issparse(X_train): X_train = X_train.reshape((X_train.shape[0], -1)) if not issparse(X_test): X_test = X_test.reshape((X_test.shape[0], -1)) LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format( n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape)) clf = ExtraTreesClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1, random_state=random_state) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) prec = float(np.sum(y_pred == y_test)) / len(y_test) LOGGER.info('prec_ets{}={:.6f}%'.format(n_trees, prec*100.0)) return clf, y_pred
def prec_rf(n_trees, X_train, y_train, X_test, y_test): """ ExtraTrees """ from sklearn.ensemble import RandomForestClassifier if not issparse(X_train): X_train = X_train.reshape((X_train.shape[0], -1)) if not issparse(X_test): X_test = X_test.reshape((X_test.shape[0], -1)) LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format( n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape)) clf = RandomForestClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) prec = float(np.sum(y_pred == y_test)) / len(y_test) LOGGER.info('prec_rf{}={:.6f}%'.format(n_trees, prec*100.0)) return clf, y_pred
def prec_log(X_train, y_train, X_test, y_test): from sklearn.linear_model import LogisticRegression if not issparse(X_train): X_train = X_train.reshape((X_train.shape[0], -1)) if not issparse(X_test): X_test = X_test.reshape((X_test.shape[0], -1)) LOGGER.info('start predict: X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format( X_train.shape, y_train.shape, X_test.shape, y_test.shape)) X_train = X_train.reshape((X_train.shape[0], -1)) X_test = X_test.reshape((X_test.shape[0], -1)) clf = LogisticRegression(solver='sag', n_jobs=-1, verbose=1) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) prec = float(np.sum(y_pred == y_test)) / len(y_test) LOGGER.info('prec_log={:.6f}%'.format(prec*100.0)) return clf, y_pred
def _sanitize_value(x): """ Performs cleaning steps on the data so various type comparisons can be performed correctly. """ if isinstance(x, _six.string_types + _six.integer_types + (float,)): return x elif _HAS_SKLEARN and _sp.issparse(x): return x.todense() elif isinstance(x, _np.ndarray): return x elif isinstance(x, tuple): return (_sanitize_value(v) for v in x) elif isinstance(x, list): return [_sanitize_value(v) for v in x] elif isinstance(x, dict): return dict( (_sanitize_value(k), _sanitize_value(v)) for k, v in x.items()) else: assert False, str(x)
def test_safe_power_sparse(): # TODO maybe move to a util module or something # scikit-learn has safe_sqr but not general power X_quad = X ** 4 # assert X stays sparse X_sp = sp.csr_matrix(X) for sp_format in ('csr', 'csc', 'coo'): # not working with lil for now X_sp = X_sp.asformat(sp_format) X_sp_quad = safe_power(X_sp, degree=4) assert_true(sp.issparse(X_sp_quad), msg="safe_power breaks {} sparsity".format(sp_format)) assert_array_almost_equal(X_quad, X_sp_quad.A, err_msg="safe_power differs for {} and " "dense".format(sp_format))
def filter_rows(self, ids_of_interest): # return (cooccurrence[1].todense()[:width]) xdim = self.matrix.shape[1] dense = np.empty([0, xdim]) # dense=np.empty([0,width]) for i in ids_of_interest: if i < 0: continue if sparse.issparse(self.matrix): row = self.matrix[i].todense() else: row = self.matrix[i] row = np.asarray(row) row = np.reshape(row, (xdim)) # dense=np.vstack([dense,row[:width]]) dense = np.vstack([dense, row]) return dense
def batch_generator(X, y=None, batch_size=128, shuffle=False): index = np.arange(X.shape[0]) while True: if shuffle: np.random.shuffle(index) batch_start = 0 while batch_start < X.shape[0]: batch_index = index[batch_start:batch_start + batch_size] batch_start += batch_size X_batch = X[batch_index, :] if sp.issparse(X_batch): X_batch = X_batch.toarray() if y is None: yield X_batch else: yield X_batch, y[batch_index]
def _validate_X_predict(self, X, check_input): """Validate X whenever one tries to predict, apply, predict_proba""" if self.tree_ is None: raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") if check_input: X = check_array(X, dtype=DTYPE, accept_sparse="csr") if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc): raise ValueError("No support for np.int64 index based " "sparse matrices") n_features = X.shape[1] if self.n_features_ != n_features: raise ValueError("Number of features of the model must " "match the input. Model n_features is %s and " "input n_features is %s " % (self.n_features_, n_features)) return X
def indexable(*iterables): """Make arrays indexable for cross-validation. Checks consistent length, passes through None, and ensures that everything can be indexed by converting sparse matrices to csr and converting non-interable objects to arrays. Parameters ---------- *iterables : lists, dataframes, arrays, sparse matrices List of objects to ensure sliceability. """ result = [] for X in iterables: if sp.issparse(X): result.append(X.tocsr()) elif hasattr(X, "__getitem__") or hasattr(X, "iloc"): result.append(X) elif X is None: result.append(X) else: result.append(np.array(X)) check_consistent_length(*result) return result
def update(self): """Updated output array and shift to input if stacked. If stacking is en force, the output array will replace the input array, and used as input for subsequent jobs. Sparse matrices are force-converted to ``csr`` format. """ if self.predict_out is None: return if (issparse(self.predict_out) and not self.predict_out.__class__.__name__.startswith('csr')): # Enforce csr on spare matrices self.predict_out = self.predict_out.tocsr() if self.stack: self.predict_in = self.predict_out self.rebase()
def _propagate_features(self, task): """Propagate features from input array to output array.""" p_out, p_in = self.job.predict_out, self.job.predict_in # Check for loss of obs between layers (i.e. with blendindex) n_in, n_out = p_in.shape[0], p_out.shape[0] r = int(n_in - n_out) if not issparse(p_in): # Simple item setting p_out[:, :task.n_feature_prop] = p_in[r:, task.propagate_features] else: # Need to populate propagated features using scipy sparse hstack self.job.predict_out = hstack( [p_in[r:, task.propagate_features], p_out[:, task.n_feature_prop:]] ).tolil()
def ensure_ndarray_or_sparse(A, shape=None, uniform=None, ndim=None, size=None, dtype=None, kind=None): r""" Ensures A is an ndarray or a scipy sparse matrix and does an assert_array with the given parameters Returns ------- A : ndarray If A is already an ndarray, it is just returned. Otherwise this is an independent copy as an ndarray """ if not isinstance(A, np.ndarray) and not scisp.issparse(A): try: A = np.array(A) except: raise AssertionError('Given argument cannot be converted to an ndarray:\n'+str(A)) assert_array(A, shape=shape, uniform=uniform, ndim=ndim, size=size, dtype=dtype, kind=kind) return A
def _concatenate_dense_jac(jac_list): # Read sequentially all jacobians. # Convert all values to numpy arrays. jac_ineq_list = [] jac_eq_list = [] for jac_tuple in jac_list: J_ineq, J_eq = jac_tuple if spc.issparse(J_ineq): jac_ineq_list += [J_ineq.toarray()] else: jac_ineq_list += [np.atleast_2d(J_ineq)] if spc.issparse(J_eq): jac_eq_list += [J_eq.toarray()] else: jac_eq_list += [np.atleast_2d(J_eq)] # Concatenate all J_ineq = np.vstack(jac_ineq_list) J_eq = np.vstack(jac_eq_list) # Return return J_ineq, J_eq
def _compute_jacobian(self, J_eq, J_ineq, s): if self.n_ineq == 0: return J_eq else: if spc.issparse(J_eq) or spc.issparse(J_ineq): # It is expected that J_eq and J_ineq # are already `csr_matrix` because of # the way ``BoxConstraint``, ``NonlinearConstraint`` # and ``LinearConstraint`` are defined. J_eq = spc.csr_matrix(J_eq) J_ineq = spc.csr_matrix(J_ineq) return self._assemble_sparse_jacobian(J_eq, J_ineq, s) else: S = np.diag(s) zeros = np.zeros((self.n_eq, self.n_ineq)) # Convert to matrix if spc.issparse(J_ineq): J_ineq = J_ineq.toarray() if spc.issparse(J_eq): J_eq = J_eq.toarray() # Concatenate matrices return np.asarray(np.bmat([[J_eq, zeros], [J_ineq, S]]))
def _make_feed_dict(self, X, inverse=False, training=False): # Make the dictionary mapping tensor placeholders to input data. # Convert sparse inputs to dense. if sp.issparse(X): X = X.todense().A if inverse: feed_dict = {self._encoded_values: X} else: feed_dict = {self._input_values: X} # If not training, turn off dropout (i.e., set keep_prob = 1.0). feed_dict[self._keep_prob] = self.keep_prob if training else 1.0 feed_dict[self._sigmoid_msk] \ = self._sigmoid_msk_values[0:X.shape[0], :] feed_dict[self._default_msk] \ = self._default_msk_values[0:X.shape[0], :] feed_dict[self._softmax_msks] \ = self._softmax_msks_values[:, 0:X.shape[0], :] return feed_dict
def mydot(A, B): r"""Dot-product that can handle dense and sparse arrays Parameters ---------- A : numpy ndarray or scipy sparse matrix The first factor B : numpy ndarray or scipy sparse matrix The second factor Returns C : numpy ndarray or scipy sparse matrix The dot-product of A and B """ if issparse(A) : return A.dot(B) elif issparse(B): return (B.T.dot(A.T)).T else: return np.dot(A, B)
def convert_solution(z, Cs): if issparse(Cs): Cs = Cs.toarray() M = Cs.shape[0] x = z[0:M] y = z[M:] w=np.exp(y) pi=w/w.sum() X=pi[:,np.newaxis]*x[np.newaxis,:] Y=X+np.transpose(X) denom=Y enum=Cs*np.transpose(pi) P=enum/denom ind=np.diag_indices(Cs.shape[0]) P[ind]=0.0 rowsums=P.sum(axis=1) P[ind]=1.0-rowsums return pi, P ############################################################################### # Objective, Gradient, and Hessian ###############################################################################
def get_knn_edges_sparse(dmat, k): edge_dict = {} if not issparse(dmat): return get_knn_edges(dmat,k) else: for i in range(dmat.shape[0]): l=1 saved_values={} while l<k: row = dmat.getrow(i) data_index=row.data.argmin() j=row.indices[data_index] saved_values[j] = dmat[i, j] if i != j: ii, jj = tuple(sorted([i, j])) edge_dict[(ii, jj)] = dmat[i, j] dmat[i, j] = inf l = l + 1 # Rewrite safed values: for j in saved_values: dmat[i, j] = saved_values[j] return edge_dict.keys()
def log1p(data, copy=False): """Logarithmize the data matrix. Computes `X = log(X + 1)`, where `log` denotes the natural logrithm. Parameters ---------- data : array-like or AnnData The data matrix. copy : bool (default: False) If an AnnData is passed, determines whether a copy is returned. Returns ------- Returns or updates data, depending on `copy`. """ if isinstance(data, AnnData): adata = data.copy() if copy else data adata.X = log1p(data.X) return adata if copy else None X = data # proceed with data matrix if not issparse(X): return np.log1p(X) else: return X.log1p()
def _get_mean_var(X): # - using sklearn.StandardScaler throws an error related to # int to long trafo for very large matrices # - using X.multiply is slower if True: mean = X.mean(axis=0) if issparse(X): mean_sq = X.multiply(X).mean(axis=0) mean = mean.A1 mean_sq = mean_sq.A1 else: mean_sq = np.multiply(X, X).mean(axis=0) # enforece R convention (unbiased estimator) for variance var = (mean_sq - mean**2) * (X.shape[0]/(X.shape[0]-1)) else: from sklearn.preprocessing import StandardScaler scaler = StandardScaler(with_mean=False).partial_fit(X) mean = scaler.mean_ # enforce R convention (unbiased estimator) var = scaler.var_ * (X.shape[0]/(X.shape[0]-1)) return mean, var
def _scale(X, zero_center=True): # - using sklearn.StandardScaler throws an error related to # int to long trafo for very large matrices # - using X.multiply is slower # the result differs very slightly, why? if True: mean, var = _get_mean_var(X) scale = np.sqrt(var) if issparse(X): if zero_center: raise ValueError('Cannot zero-center sparse matrix.') sparsefuncs.inplace_column_scale(X, 1/scale) else: X -= mean X /= scale else: from sklearn.preprocessing import StandardScaler scaler = StandardScaler(with_mean=zero_center, copy=False).partial_fit(X) # user R convention (unbiased estimator) scaler.scale_ *= np.sqrt(X.shape[0]/(X.shape[0]-1)) scaler.transform(X)
def fit_transform(self, X, y=None, sample_weight=None): X = check_array(X, accept_sparse=['csc'], ensure_2d=False) if sp.issparse(X): # Pre-sort indices to avoid that each individual tree of the # ensemble sorts the indices. X.sort_indices() X_, y_ = generate_discriminative_dataset(X) super(RandomForestEmbedding, self).fit(X_, y_, sample_weight=sample_weight) self.one_hot_encoder_ = OneHotEncoder(sparse=True) if self.sparse_output: return self.one_hot_encoder_.fit_transform(self.apply(X)) return self.apply(X)
def normalize_attr_array(a: Any) -> np.ndarray: """ Take all kinds of array-like inputs and normalize to a one-dimensional np.ndarray """ if type(a) is np.ndarray: return a elif type(a) is np.matrix: if a.shape[0] == 1: return np.array(a)[0, :] elif a.shape[1] == 1: return np.array(a)[:, 0] else: raise ValueError("Attribute values must be 1-dimensional.") elif type(a) is list or type(a) is tuple: return np.array(a) elif sparse.issparse(a): return normalize_attr_array(a.todense()) else: raise ValueError("Argument must be a list, tuple, numpy matrix, numpy ndarray or sparse matrix.")
def _check_fit_data(self, X): """Verify that the number of samples given is larger than k""" X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32]) n_samples, n_features = X.shape if X.shape[0] < self.n_clusters: raise ValueError("n_samples=%d should be >= n_clusters=%d" % ( X.shape[0], self.n_clusters)) for ee in range(n_samples): if sp.issparse(X): n = sp.linalg.norm(X[ee, :]) else: n = np.linalg.norm(X[ee, :]) if np.abs(n - 1.) > 1e-4: raise ValueError("Data l2-norm must be 1, found {}".format(n)) return X
def _check_test_data(self, X): X = check_array(X, accept_sparse='csr', dtype=FLOAT_DTYPES, warn_on_dtype=True) n_samples, n_features = X.shape expected_n_features = self.cluster_centers_.shape[1] if not n_features == expected_n_features: raise ValueError("Incorrect number of features. " "Got %d features, expected %d" % ( n_features, expected_n_features)) for ee in range(n_samples): if sp.issparse(X): n = sp.linalg.norm(X[ee, :]) else: n = np.linalg.norm(X[ee, :]) if np.abs(n - 1.) > 1e-4: raise ValueError("Data l2-norm must be 1, found {}".format(n)) return X
def keras_fit(self, X, Y, *, nn_model=None, validation_data=None, resume=None, **fit_args): if nn_model is None: nn_model = getattr(self, self.NN_MODEL_ATTRIBUTE) if not self._pre_fit_setup(nn_model, resume=resume, **fit_args): return if sps.issparse(X): X = X.toarray() if sps.issparse(Y): Y = Y.toarray() if validation_data is not None: X_validation, Y_validation = validation_data validation_data = (X_validation.toarray() if sps.issparse(X_validation) else X_validation, Y_validation.toarray() if sps.issparse(Y_validation) else Y_validation) #end if logger.info('{} instances used for training and {} instances used for validation.'.format(Y.shape[0], validation_data[1].shape[0] if validation_data else int(self.validation_size * Y.shape[0]))) return nn_model.fit(X, Y, validation_data=validation_data, validation_split=0.0 if validation_data is not None else self.validation_size, epochs=self.epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=self.build_callbacks(), initial_epoch=self.initial_epoch, **fit_args) #end def
def transform(self, X): """Transform X separately by each transformer, concatenate results. Parameters ---------- X : array-like or sparse matrix, shape (n_samples, n_features) Input data to be transformed. Returns ------- X_t : array-like or sparse matrix, shape (n_samples, sum_n_components) hstack of results of transformers. sum_n_components is the sum of n_components (output dimension) over transformers. """ Xs = Parallel(n_jobs=self.n_jobs)( delayed(_transform_one)(trans, name, X, self.transformer_weights) for name, trans in self.transformer_list) if any(sparse.issparse(f) for f in Xs): Xs = sparse.hstack(Xs).tocsr() else: Xs = np.hstack(Xs) return Xs
def _compute_distances(self, query, candidates): """Computes the cosine distance. Distance is from the query to points in the candidates array. Returns argsort of distances in the candidates array and sorted distances. """ if candidates.shape == (0,): # needed since _fit_X[np.array([])] doesn't work if _fit_X sparse return np.empty(0, dtype=np.int), np.empty(0, dtype=float) if sparse.issparse(self._fit_X): candidate_X = self._fit_X[candidates] else: candidate_X = self._fit_X.take(candidates, axis=0, mode='clip') distances = pairwise_distances(query, candidate_X, metric='cosine')[0] distance_positions = np.argsort(distances) distances = distances.take(distance_positions, mode='clip', axis=0) return distance_positions, distances
def test_stratified_strategy_sparse_target(): X = [[0]] * 5 # ignored y = sp.csc_matrix(np.array([[4, 1], [0, 0], [1, 1], [1, 4], [1, 1]])) clf = DummyClassifier(strategy="stratified", random_state=0) clf.fit(X, y) X = [[0]] * 500 y_pred = clf.predict(X) assert_true(sp.issparse(y_pred)) y_pred = y_pred.toarray() for k in range(y.shape[1]): p = np.bincount(y_pred[:, k]) / float(len(X)) assert_almost_equal(p[1], 3. / 5, decimal=1) assert_almost_equal(p[0], 1. / 5, decimal=1) assert_almost_equal(p[4], 1. / 5, decimal=1)
def test_most_frequent_and_prior_strategy_sparse_target(): X = [[0]] * 5 # ignored y = sp.csc_matrix(np.array([[1, 0], [1, 3], [4, 0], [0, 1], [1, 0]])) n_samples = len(X) y_expected = np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]) for strategy in ("most_frequent", "prior"): clf = DummyClassifier(strategy=strategy, random_state=0) clf.fit(X, y) y_pred = clf.predict(X) assert_true(sp.issparse(y_pred)) assert_array_equal(y_pred.toarray(), y_expected)
def test_SparseRandomProjection_output_representation(): for SparseRandomProjection in all_SparseRandomProjection: # when using sparse input, the projected data can be forced to be a # dense numpy array rp = SparseRandomProjection(n_components=10, dense_output=True, random_state=0) rp.fit(data) assert isinstance(rp.transform(data), np.ndarray) sparse_data = sp.csr_matrix(data) assert isinstance(rp.transform(sparse_data), np.ndarray) # the output can be left to a sparse matrix instead rp = SparseRandomProjection(n_components=10, dense_output=False, random_state=0) rp = rp.fit(data) # output for dense input will stay dense: assert isinstance(rp.transform(data), np.ndarray) # output for sparse output will be sparse: assert sp.issparse(rp.transform(sparse_data))
def _randomized_logistic(X, y, weights, mask, C=1., verbose=False, fit_intercept=True, tol=1e-3): X = X[safe_mask(X, mask)] y = y[mask] if issparse(X): size = len(weights) weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size)) X = X * weight_dia else: X *= (1 - weights) C = np.atleast_1d(np.asarray(C, dtype=np.float64)) scores = np.zeros((X.shape[1], len(C)), dtype=np.bool) for this_C, this_scores in zip(C, scores.T): # XXX : would be great to do it with a warm_start ... clf = LogisticRegression(C=this_C, tol=tol, penalty='l1', dual=False, fit_intercept=fit_intercept) clf.fit(X, y) this_scores[:] = np.any( np.abs(clf.coef_) > 10 * np.finfo(np.float).eps, axis=0) return scores
def test_sparsify(): # Test sparsify and densify members. n_samples, n_features = iris.data.shape target = iris.target_names[iris.target] clf = LogisticRegression(random_state=0).fit(iris.data, target) pred_d_d = clf.decision_function(iris.data) clf.sparsify() assert_true(sp.issparse(clf.coef_)) pred_s_d = clf.decision_function(iris.data) sp_data = sp.coo_matrix(iris.data) pred_s_s = clf.decision_function(sp_data) clf.densify() pred_d_s = clf.decision_function(sp_data) assert_array_almost_equal(pred_d_d, pred_s_d) assert_array_almost_equal(pred_d_d, pred_s_s) assert_array_almost_equal(pred_d_d, pred_d_s)
def densify(self): """Convert coefficient matrix to dense array format. Converts the ``coef_`` member (back) to a numpy.ndarray. This is the default format of ``coef_`` and is required for fitting, so calling this method is only required on models that have previously been sparsified; otherwise, it is a no-op. Returns ------- self: estimator """ msg = "Estimator, %(name)s, must be fitted before densifying." check_is_fitted(self, "coef_", msg=msg) if sp.issparse(self.coef_): self.coef_ = self.coef_.toarray() return self
def paired_manhattan_distances(X, Y): """Compute the L1 distances between the vectors in X and Y. Read more in the :ref:`User Guide <metrics>`. Parameters ---------- X : array-like, shape (n_samples, n_features) Y : array-like, shape (n_samples, n_features) Returns ------- distances : ndarray (n_samples, ) """ X, Y = check_paired_arrays(X, Y) diff = X - Y if issparse(diff): diff.data = np.abs(diff.data) return np.squeeze(np.array(diff.sum(axis=1))) else: return np.abs(diff).sum(axis=-1)
def test_check_sparse_arrays(): # Ensures that checks return valid sparse matrices. rng = np.random.RandomState(0) XA = rng.random_sample((5, 4)) XA_sparse = csr_matrix(XA) XB = rng.random_sample((5, 4)) XB_sparse = csr_matrix(XB) XA_checked, XB_checked = check_pairwise_arrays(XA_sparse, XB_sparse) # compare their difference because testing csr matrices for # equality with '==' does not work as expected. assert_true(issparse(XA_checked)) assert_equal(abs(XA_sparse - XA_checked).sum(), 0) assert_true(issparse(XB_checked)) assert_equal(abs(XB_sparse - XB_checked).sum(), 0) XA_checked, XA_2_checked = check_pairwise_arrays(XA_sparse, XA_sparse) assert_true(issparse(XA_checked)) assert_equal(abs(XA_sparse - XA_checked).sum(), 0) assert_true(issparse(XA_2_checked)) assert_equal(abs(XA_2_checked - XA_checked).sum(), 0)