我们从Python开源项目中,提取了以下28个代码示例,用于说明如何使用scipy.sparse.isspmatrix_csr()。
def _maximum_csr_safe(A, B): ''' Safe version of `numpy.maximum` for CSR matrices ''' # fall back on numpy's default if both matrices are dense if not sp.isspmatrix(A) and not sp.isspmatrix(B): return np.maximum(A, B) # if one of the two inputs is sparse and the other is dense, convert the # latter to sparse if not sp.isspmatrix_csr(A): A = sp.csr_matrix(A) if not sp.isspmatrix_csr(B): B = sp.csr_matrix(B) return c_maximum_csr(A, B)
def _sparse_matrix_data(X): """Prepare the sparse matrix for conversion to TensorFlow. Parameters ---------- X : sparse matrix Returns ------- indices : numpy array with shape (X.nnz, 2) describing the indices with values in X. values : numpy array with shape (X.nnz) describing the values at each index """ if sp.isspmatrix_csr(X): return _csr_data(X) else: return _csr_data(X.tocsr())
def test_20news_vectorized(): # This test is slow. raise SkipTest("Test too slow.") bunch = datasets.fetch_20newsgroups_vectorized(subset="train") assert_true(sp.isspmatrix_csr(bunch.data)) assert_equal(bunch.data.shape, (11314, 107428)) assert_equal(bunch.target.shape[0], 11314) assert_equal(bunch.data.dtype, np.float64) bunch = datasets.fetch_20newsgroups_vectorized(subset="test") assert_true(sp.isspmatrix_csr(bunch.data)) assert_equal(bunch.data.shape, (7532, 107428)) assert_equal(bunch.target.shape[0], 7532) assert_equal(bunch.data.dtype, np.float64) bunch = datasets.fetch_20newsgroups_vectorized(subset="all") assert_true(sp.isspmatrix_csr(bunch.data)) assert_equal(bunch.data.shape, (11314 + 7532, 107428)) assert_equal(bunch.target.shape[0], 11314 + 7532) assert_equal(bunch.data.dtype, np.float64)
def _document_frequency(X): """Count the number of non-zero values for each feature in sparse X.""" if sp.isspmatrix_csr(X): return np.bincount(X.indices, minlength=X.shape[1]) else: return np.diff(sp.csc_matrix(X, copy=False).indptr)
def inverse_transform(self, X, copy=None): """Scale back the data to the original representation Parameters ---------- X : array-like with shape [n_samples, n_features] The data used to scale along the features axis. """ check_is_fitted(self, 'std_') copy = copy if copy is not None else self.copy if sparse.issparse(X): if self.with_mean: raise ValueError( "Cannot uncenter sparse matrices: pass `with_mean=False` " "instead See docstring for motivation and alternatives.") if not sparse.isspmatrix_csr(X): X = X.tocsr() copy = False if copy: X = X.copy() if self.std_ is not None: inplace_column_scale(X, self.std_) else: X = np.asarray(X) if copy: X = X.copy() if self.with_std: X *= self.std_ if self.with_mean: X += self.mean_ return X
def _document_frequency(X): """Count the number of non-zero values for each feature in sparse X.""" if sp.isspmatrix_csr(X): # return np.sum(X,axis=0) return bincount(X.indices, minlength=X.shape[1]) else: return np.diff(sp.csc_matrix(X, copy=False).indptr)
def _document_frequency(X): """Count the number of non-zero values for each feature in sparse X. (copied from scikit-learn) """ if sp.isspmatrix_csr(X): return np.bincount(X.indices, minlength=X.shape[1]) else: return np.diff(sp.csc_matrix(X, copy=False).indptr)
def fit(self, X, y=None): """Learn the document lenght and document frequency vector (if necessary). Parameters ---------- X : sparse matrix, [n_samples, n_features] a matrix of term/token counts """ X = check_array(X, ['csr'], copy=self.copy) scheme_t, scheme_d, scheme_n = _validate_smart_notation(self.weighting) self.dl_ = _document_length(X) if scheme_d in 'stp' or self.compute_df: self.df_ = _document_frequency(X) else: self.df_ = None if sp.isspmatrix_csr(X): self.du_ = np.diff(X.indptr) else: self.du_ = X.shape[-1] - (X == 0).sum(axis=1) self._n_features = X.shape[1] if self.df_ is not None: df_n_samples = len(self.dl_) else: df_n_samples = None if scheme_n.endswith('p') and self.norm_pivot is None: # Need to compute the pivot if it's not provided _, self.norm_pivot = _smart_tfidf(X, self.weighting, self.df_, df_n_samples, norm_alpha=self.norm_alpha, norm_pivot=self.norm_pivot, return_pivot=True) return self
def fit_transform(self, X, y=None): """Apply document term weighting and normalization on text features Parameters ---------- X : sparse matrix, [n_samples, n_features] a matrix of term/token counts """ X = check_array(X, ['csr'], copy=self.copy) scheme_t, scheme_d, scheme_n = _validate_smart_notation(self.weighting) self.dl_ = _document_length(X) if scheme_d in 'stpd' or self.compute_df: self.df_ = _document_frequency(X) else: self.df_ = None if sp.isspmatrix_csr(X): self.du_ = np.diff(X.indptr) else: self.du_ = X.shape[-1] - (X == 0).sum(axis=1) self._n_features = X.shape[1] if self.df_ is not None: df_n_samples = len(self.dl_) else: df_n_samples = None if self.df_ is not None: df_n_samples = len(self.dl_) else: df_n_samples = None X, self.norm_pivot = _smart_tfidf(X, self.weighting, self.df_, df_n_samples, norm_alpha=self.norm_alpha, norm_pivot=self.norm_pivot, return_pivot=True) return X
def _allclose_csr(A, B, **kwrds): ''' CSR matrices-safe equivalent of allclose. Additional keyword are passed to allclose. See `numpy.allclose`. Will call the numpy version if passed dense matrices. ''' # fall back on numpy's allclose if both matrices are dense if not sp.isspmatrix(A) and not sp.isspmatrix(B): return np.allclose(A, B) if not sp.isspmatrix_csr(A): A = sp.csr_matrix(A) if not sp.isspmatrix_csr(B): B = sp.csr_matrix(B) # check indices indices_all = np.all(A.indices == B.indices) if not indices_all: return False # check indices pointers indptr_all = np.all(A.indptr == B.indptr) if not indptr_all: return False # check data return np.allclose(A.data, B.data, **kwrds) # try importing the fast C implementations first, otherwise use the Python # versions provided in this module as a fallback
def _check_A(self, A): if A.shape[0] != A.shape[1]: raise ValueError('Matrix A needs to be square, but has shape: {}'.format(A.shape)) if sp.isspmatrix_csr(A): self._solve_transposed = False self.set_iparm(12,0) elif sp.isspmatrix_csc(A): self._solve_transposed = True self.set_iparm(12,1) else: msg = 'PyPardiso requires matrix A to be in CSR or CSC format, but matrix A is: {}'.format(type(A)) raise TypeError(msg) # scipy allows unsorted csr-indices, which lead to completely wrong pardiso results if not A.has_sorted_indices: A.sort_indices() # scipy allows csr matrices with empty rows. a square matrix with an empty row is singular. calling # pardiso with a matrix A that contains empty rows leads to a segfault, same applies for csc with # empty columns if not np.diff(A.indptr).all(): row_col = 'column' if self._solve_transposed else 'row' raise ValueError('Matrix A is singular, because it contains empty {}(s)'.format(row_col)) if A.dtype != np.float64: raise TypeError('PyPardiso currently only supports float64, but matrix A has dtype: {}'.format(A.dtype))
def test_factorized_csc_matrix(): ps.remove_stored_factorization() ps.free_memory() A, b = create_test_A_b_rand() Afact_csr = factorized(A) Afact_csc = factorized(A.tocsc()) assert sp.isspmatrix_csr(Afact_csc.args[0]) x1 = Afact_csr(b) x2 = Afact_csc(b) np.testing.assert_array_equal(x1,x2)
def test_spsolve_csc_matrix(): ps.remove_stored_factorization() ps.free_memory() A, b = create_test_A_b_rand() x_csc = spsolve(A.tocsc(), b) assert sp.isspmatrix_csr(ps.factorized_A) x_csr = spsolve(A, b) np.testing.assert_array_equal(x_csr, x_csc)
def getTransitionMatrix(self,probabilities=True): """ If self.P has been given already, we will reuse it and convert it to a sparse csr matrix if needed. Otherwise, we will generate it using the direct or indirect method. Since most solution methods use a probability matrix, this is the default setting. By setting probabilities=False we can also return a rate matrix. """ if self.P is not None: if isspmatrix(self.P): if not isspmatrix_csr(self.P): self.P = self.P.tocsr() else: assert isinstance(self.P, np.ndarray) and self.P.ndim==2 and self.P.shape[0]==self.P.shape[1],'P needs to be a 2d numpy array with an equal number of columns and rows' self.P = csr_matrix(self.P) elif self.direct == True: self.P = self.directInitialMatrix() else: self.P = self.indirectInitialMatrix(self.initialState) if probabilities: P = self.convertToProbabilityMatrix(self.P) else: P = self.convertToRateMatrix(self.P) return P
def get_inv_matvec(M, symmetric=False, tol=0): if isdense(M): return LuInv(M).matvec elif isspmatrix(M): if isspmatrix_csr(M) and symmetric: M = M.T return SpLuInv(M).matvec else: return IterInv(M, tol=tol).matvec
def get_OPinv_matvec(A, M, sigma, symmetric=False, tol=0): if sigma == 0: return get_inv_matvec(A, symmetric=symmetric, tol=tol) if M is None: # M is the identity matrix if isdense(A): if (np.issubdtype(A.dtype, np.complexfloating) or np.imag(sigma) == 0): A = np.copy(A) else: A = A + 0j A.flat[::A.shape[1] + 1] -= sigma return LuInv(A).matvec elif isspmatrix(A): A = A - sigma * identity(A.shape[0]) if symmetric and isspmatrix_csr(A): A = A.T return SpLuInv(A.tocsc()).matvec else: return IterOpInv(_aslinearoperator_with_dtype(A), M, sigma, tol=tol).matvec else: if ((not isdense(A) and not isspmatrix(A)) or (not isdense(M) and not isspmatrix(M))): return IterOpInv(_aslinearoperator_with_dtype(A), _aslinearoperator_with_dtype(M), sigma, tol=tol).matvec elif isdense(A) or isdense(M): return LuInv(A - sigma * M).matvec else: OP = A - sigma * M if symmetric and isspmatrix_csr(OP): OP = OP.T return SpLuInv(OP.tocsc()).matvec
def _document_frequency(X): """Count the number of non-zero values for each feature in sparse X.""" if sp.isspmatrix_csr(X): return bincount(X.indices, minlength=X.shape[1]) else: return np.diff(sp.csc_matrix(X, copy=False).indptr)
def test_add_dummy_feature_csr(): X = sparse.csr_matrix([[1, 0], [0, 1], [0, 1]]) X = add_dummy_feature(X) assert_true(sparse.isspmatrix_csr(X), X) assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])
def transpose(self): """Transpose whole object. Data matrix is transposed, observations and variables are interchanged. """ if not self.isbacked: X = self._X else: X = self.file._file['X'] if sparse.isspmatrix_csr(X): return AnnData(X.T.tocsr(), self._var, self._obs, self._uns, self._varm.flipped(), self._obsm.flipped(), filename=self.filename) return AnnData(X.T, self._var, self._obs, self._uns, self._varm.flipped(), self._obsm.flipped(), filename=self.filename)
def remove_bridge_reads(a_mat): """ Remove some edges from the similarity graph. When the set of neighbors N(i) of a node i are not connected if that node i is removed from the graph, the edges between i and j are cut for all j that is not in the largest connected group among N(i). Parameters ---------- a_mat : scipy.sparse matrix (similarity matrix) Returns ---------- a_clr : scipy.sparse matrix (similarity matrix pre-preocessed) """ Ikill = [] Jkill = [] if not(isspmatrix_csr(a_mat)): a_mat = a_mat.tocsr() for i in xrange(a_mat.shape[0]): (_, J, _) = find(a_mat[i, :]) if len(J) == 0: continue Jl = list(set(J)) a_r = a_mat[Jl, :].tocsc() a_r = a_r[:, Jl] Jl = np.array(Jl) (n_c, lbl) = connected_components(a_r, directed=False, return_labels=True) if n_c > 1: sizeccs = np.zeros(n_c) for ncc in xrange(n_c): sizeccs[ncc] = sum(lbl == ncc) ccmax = np.argmax(sizeccs) away_idx = np.where(lbl != ccmax)[0] away_nbrs = list(Jl[away_idx]) Ikill.extend([i] * len(away_nbrs)) Jkill.extend(away_nbrs) Ikill = np.array(Ikill) Jkill = np.array(Jkill) Vkill = np.ones(Ikill.size) kill_mat = coo_matrix((Vkill, (Ikill, Jkill)), shape=a_mat.shape, dtype=int).tocsr() kill_mat = sym_max(kill_mat) kill_mat = kill_mat.multiply(a_mat) a_clr = a_mat - kill_mat if not(isspmatrix_csr(a_clr)): a_clr = a_clr.tocsr() return a_clr ############################################################################### ###### Spectral ordering related functions (gets coarse-grained layout) ####### ###############################################################################
def reorder_mat(A, thr_list, min_cc_len, VERB): if not isspmatrix_csr(A): A = A.tocsr() # Initialization. ccs_ord = [] #Create list of unordered connected components todo_ccs = [np.arange(A.shape[0])] todo_next = [] n_loop = 0 while len(todo_ccs) > 0: thr_sub = thr_list[n_loop] # starts at 0.4 for n_loop=0 # Reorder each of them for cc in todo_ccs: # if statement # in order not to make the preprocessing twice. We could also remove # the preprocessing from the pipeline and do it here. if n_loop > 0: A_sub = A[cc, :][:, cc] A_sub = remove_bridge_reads(A_sub.multiply(A_sub > thr_sub)) else: A_sub = A # Compute connected components (n_cc, labels) = connected_components(A_sub, directed=False, return_labels=True) # Reorder each cc with spectral and keep the ordering if it looks OK for i_cc in xrange(n_cc): cc_sub = np.argwhere(labels == i_cc)[:, 0] if len(cc_sub) <= min_cc_len: continue msg = " Running spectral algorithm in connected"\ "component of size %d..." % (len(cc_sub)) oprint(msg, cond=(VERB >= 2)) (_, fidvec) = get_fiedler(A_sub[cc_sub, :][:, cc_sub]) permu = np.argsort(fidvec) (ii, jj, _) = find(A_sub[cc_sub[permu], :][:, cc_sub[permu]]) bw = max(abs(ii - jj)) if bw >= 80: oprint("Bandwidth larger than 80 in reordered matrix.", cond=(VERB >= 2)) todo_next.append(cc[cc_sub]) else: ccs_ord.append(cc[cc_sub[permu]]) todo_ccs = todo_next todo_next = [] n_loop += 1 return ccs_ord
def reorder_mat_par(A, thr_list, opts): partial_reorder = partial(reord_submat, A=A, opts=opts) N_PROC = int(opts['N_PROC'])//4 min_cc_len = opts['MIN_CC_LEN'] if not isspmatrix_csr(A): A = A.tocsr() # Initialization. ccs_ord = [] #Create list of unordered connected components todo_ccs = [np.arange(A.shape[0])] todo_next = [] n_loop = 0 todo_ccs = [] (ncs, lbls) = connected_components(A, directed=False, return_labels=True) for nc in xrange(ncs): cc_sub = np.argwhere(lbls == nc)[:, 0] if len(cc_sub) <= min_cc_len: continue todo_ccs.append(cc_sub) while len(todo_ccs) > 0: thr_sub = thr_list[n_loop] # starts at 0.4 for n_loop=0 args_list = zip(repeat(thr_sub), todo_ccs) pool = Pool(processes=N_PROC) results = pool.map(partial_reorder, args_list) pool.close() pool.join() for tple in results: (sub_ccs_ord, sub_todo_next) = tple ccs_ord += sub_ccs_ord todo_next += sub_todo_next todo_ccs = todo_next todo_next = [] n_loop += 1 return ccs_ord
def maxmin(A, a=None, b=None, sparse=False): ''' Compute the max-min product of A with itself: [ AP ]_ij = max_k min ( [ A ]_ik, [ A ]_kj ) Parameters ---------- A : array_like A 2D square ndarray, matrix or sparse (CSR) matrix (see `scipy.sparse`). The sparse implementation will be used automatically for sparse matrices. a,b : integer optional; compute only the max-min product between A[a:b,:] and A.T sparse : bool if True, transforms A to CSR matrix format and use the sparse implementation. Return ------ A' : array_like The max-min product of A with itself. A CSR sparse matrix will be returned if the sparse implementation is used, otherwise a numpy matrix. ''' if A.ndim != 2: raise ValueError('expecting 2D array or matrix') N, M = A.shape if N != M: raise ValueError('argument must be a square array') if a is not None: if (a < 0) or (a > N): raise ValueError('a cannot be smaller nor larger than axis dim') if b is not None: if (b < 0) or (b > N): raise ValueError('b cannot be smaller nor larger than axis dim') if (a is not None) and (b is not None): if a > b: raise ValueError('a must be less or equal b') if sp.isspmatrix(A) or sparse: if not sp.isspmatrix_csr(A): A = sp.csr_matrix(A) return maxmin_sparse(A, a, b) else: return np.matrix(maxmin_naive(A, a, b)) # Global variables used by _maxmin_worker (see below)