我们从Python开源项目中,提取了以下26个代码示例,用于说明如何使用scipy.sparse.isspmatrix()。
def write_data(self, result_dict): for key, result in six.iteritems(result_dict): if ss.isspmatrix(result): if np.isnan(result.data).any(): raise ValueError("data {} have nan".format(key)) elif np.isnan(result).any(): raise ValueError("data {} have nan".format(key)) with SimpleTimer("Writing generated data {} to hdf5 file" .format(key), end_in_new_line=False): if key in self.h5f: # self.h5f[key][...] = result raise NotImplementedError("Overwriting not supported.") else: if (isinstance(result, ss.csc_matrix) or isinstance(result, ss.csr_matrix)): # sparse matrix h5sparse.Group(self.h5f).create_dataset(key, data=result) else: self.h5f.create_dataset(key, data=result) self.h5f.flush()
def _maximum_csr_safe(A, B): ''' Safe version of `numpy.maximum` for CSR matrices ''' # fall back on numpy's default if both matrices are dense if not sp.isspmatrix(A) and not sp.isspmatrix(B): return np.maximum(A, B) # if one of the two inputs is sparse and the other is dense, convert the # latter to sparse if not sp.isspmatrix_csr(A): A = sp.csr_matrix(A) if not sp.isspmatrix_csr(B): B = sp.csr_matrix(B) return c_maximum_csr(A, B)
def _check_b(self, A, b): if sp.isspmatrix(b): warnings.warn('PyPardiso requires the right-hand side b to be a dense array for maximum efficiency', SparseEfficiencyWarning) b = b.todense() # pardiso expects fortran (column-major) order if b is a matrix if b.ndim == 2: b = np.asfortranarray(b) if b.shape[0] != A.shape[0]: raise ValueError("Dimension mismatch: Matrix A {} and array b {}".format(A.shape, b.shape)) if b.dtype != np.float64: if b.dtype in [np.float16, np.float32, np.int16, np.int32, np.int64]: warnings.warn("Array b's data type was converted from {} to float64".format(str(b.dtype)), PyPardisoWarning) b = b.astype(np.float64) else: raise TypeError('Dtype {} for array b is not supported'.format(str(b.dtype))) return b
def _graph_is_connected(graph): """ Return whether the graph is connected (True) or Not (False) Parameters ---------- graph : array-like or sparse matrix, shape: (n_samples, n_samples) adjacency matrix of the graph, non-zero weight means an edge between the nodes Returns ------- is_connected : bool True means the graph is fully connected and False means not """ if sparse.isspmatrix(graph): # sparse graph, find all the connected components n_connected_components, _ = connected_components(graph) return n_connected_components == 1 else: # dense graph, find all connected components start from node 0 return _graph_connected_component(graph, 0).sum() == graph.shape[0]
def _validate_for_predict(self, X): check_is_fitted(self, 'support_') X = check_array(X, accept_sparse='csr', dtype=np.float64, order="C") if self._sparse and not sp.isspmatrix(X): X = sp.csr_matrix(X) if self._sparse: X.sort_indices() if sp.issparse(X) and not self._sparse and not callable(self.kernel): raise ValueError( "cannot use sparse input in %r trained on dense data" % type(self).__name__) n_samples, n_features = X.shape if self.kernel == "precomputed": if X.shape[1] != self.shape_fit_[0]: raise ValueError("X.shape[1] = %d should be equal to %d, " "the number of samples at training time" % (X.shape[1], self.shape_fit_[0])) elif n_features != self.shape_fit_[1]: raise ValueError("X.shape[1] = %d should be equal to %d, " "the number of features at training time" % (n_features, self.shape_fit_[1])) return X
def dot(X, Y): if sparse.isspmatrix(X) and sparse.isspmatrix(Y): return X * Y elif sparse.isspmatrix(X) or sparse.isspmatrix(Y): return sparse.csr_matrix(X) * sparse.csr_matrix(Y) return np.asmatrix(X) * np.asmatrix(Y)
def bundle(self, key, path, new_key): """Copy the data to another HDF5 file with new key.""" data = self.get(key) with h5py.File(path) as h5f: if ss.isspmatrix(data) or isinstance(data, h5sparse.Dataset): h5f = h5sparse.Group(h5f) h5f.create_dataset(new_key, data=data)
def epnoclosure(A, sources, targets): """ Compute the baseline on the original graph (i.e. no closure is performed). See `epclosurebatch` for parameters and return value format. """ _check_inputs(targets, sources) N = A[sources, :] if sp.isspmatrix(N): N = N.todense() return _make_return(N, targets, sources)
def _allclose_csr(A, B, **kwrds): ''' CSR matrices-safe equivalent of allclose. Additional keyword are passed to allclose. See `numpy.allclose`. Will call the numpy version if passed dense matrices. ''' # fall back on numpy's allclose if both matrices are dense if not sp.isspmatrix(A) and not sp.isspmatrix(B): return np.allclose(A, B) if not sp.isspmatrix_csr(A): A = sp.csr_matrix(A) if not sp.isspmatrix_csr(B): B = sp.csr_matrix(B) # check indices indices_all = np.all(A.indices == B.indices) if not indices_all: return False # check indices pointers indptr_all = np.all(A.indptr == B.indptr) if not indptr_all: return False # check data return np.allclose(A.data, B.data, **kwrds) # try importing the fast C implementations first, otherwise use the Python # versions provided in this module as a fallback
def getTransitionMatrix(self,probabilities=True): """ If self.P has been given already, we will reuse it and convert it to a sparse csr matrix if needed. Otherwise, we will generate it using the direct or indirect method. Since most solution methods use a probability matrix, this is the default setting. By setting probabilities=False we can also return a rate matrix. """ if self.P is not None: if isspmatrix(self.P): if not isspmatrix_csr(self.P): self.P = self.P.tocsr() else: assert isinstance(self.P, np.ndarray) and self.P.ndim==2 and self.P.shape[0]==self.P.shape[1],'P needs to be a 2d numpy array with an equal number of columns and rows' self.P = csr_matrix(self.P) elif self.direct == True: self.P = self.directInitialMatrix() else: self.P = self.indirectInitialMatrix(self.initialState) if probabilities: P = self.convertToProbabilityMatrix(self.P) else: P = self.convertToRateMatrix(self.P) return P
def _build_graph(self): """Matrix representing a fully connected graph between each sample This basic implementation creates a non-stochastic affinity matrix, so class distributions will exceed 1 (normalization may be desired). """ if self.kernel == 'knn': self.nn_fit = None affinity_matrix = self._get_kernel(self.X_) normalizer = affinity_matrix.sum(axis=0) if sparse.isspmatrix(affinity_matrix): affinity_matrix.data /= np.diag(np.array(normalizer)) else: affinity_matrix /= normalizer[:, np.newaxis] return affinity_matrix
def _build_graph(self): """Graph matrix for Label Spreading computes the graph laplacian""" # compute affinity matrix (or gram matrix) if self.kernel == 'knn': self.nn_fit = None n_samples = self.X_.shape[0] affinity_matrix = self._get_kernel(self.X_) laplacian = graph_laplacian(affinity_matrix, normed=True) laplacian = -laplacian if sparse.isspmatrix(laplacian): diag_mask = (laplacian.row == laplacian.col) laplacian.data[diag_mask] = 0.0 else: laplacian.flat[::n_samples + 1] = 0.0 # set diag to 0.0 return laplacian
def test_sparse_coef(): # Check that the sparse_coef property works clf = ElasticNet() clf.coef_ = [1, 2, 3] assert_true(sp.isspmatrix(clf.sparse_coef_)) assert_equal(clf.sparse_coef_.toarray().tolist()[0], clf.coef_)
def sparse_center_data(X, y, fit_intercept, normalize=False): """ Compute information needed to center data to have mean zero along axis 0. Be aware that X will not be centered since it would break the sparsity, but will be normalized if asked so. """ if fit_intercept: # we might require not to change the csr matrix sometimes # store a copy if normalize is True. # Change dtype to float64 since mean_variance_axis accepts # it that way. if sp.isspmatrix(X) and X.getformat() == 'csr': X = sp.csr_matrix(X, copy=normalize, dtype=np.float64) else: X = sp.csc_matrix(X, copy=normalize, dtype=np.float64) X_offset, X_var = mean_variance_axis(X, axis=0) if normalize: # transform variance to std in-place X_var *= X.shape[0] X_std = np.sqrt(X_var, X_var) del X_var X_std[X_std == 0] = 1 inplace_column_scale(X, 1. / X_std) else: X_std = np.ones(X.shape[1]) y_offset = y.mean(axis=0) y = y - y_offset else: X_offset = np.zeros(X.shape[1]) X_std = np.ones(X.shape[1]) y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype) return X, y, X_offset, y_offset, X_std
def get_inv_matvec(M, symmetric=False, tol=0): if isdense(M): return LuInv(M).matvec elif isspmatrix(M): if isspmatrix_csr(M) and symmetric: M = M.T return SpLuInv(M).matvec else: return IterInv(M, tol=tol).matvec
def get_OPinv_matvec(A, M, sigma, symmetric=False, tol=0): if sigma == 0: return get_inv_matvec(A, symmetric=symmetric, tol=tol) if M is None: # M is the identity matrix if isdense(A): if (np.issubdtype(A.dtype, np.complexfloating) or np.imag(sigma) == 0): A = np.copy(A) else: A = A + 0j A.flat[::A.shape[1] + 1] -= sigma return LuInv(A).matvec elif isspmatrix(A): A = A - sigma * identity(A.shape[0]) if symmetric and isspmatrix_csr(A): A = A.T return SpLuInv(A.tocsc()).matvec else: return IterOpInv(_aslinearoperator_with_dtype(A), M, sigma, tol=tol).matvec else: if ((not isdense(A) and not isspmatrix(A)) or (not isdense(M) and not isspmatrix(M))): return IterOpInv(_aslinearoperator_with_dtype(A), _aslinearoperator_with_dtype(M), sigma, tol=tol).matvec elif isdense(A) or isdense(M): return LuInv(A - sigma * M).matvec else: OP = A - sigma * M if symmetric and isspmatrix_csr(OP): OP = OP.T return SpLuInv(OP.tocsc()).matvec
def check_svm_model_equal(dense_svm, sparse_svm, X_train, y_train, X_test): dense_svm.fit(X_train.toarray(), y_train) if sparse.isspmatrix(X_test): X_test_dense = X_test.toarray() else: X_test_dense = X_test sparse_svm.fit(X_train, y_train) assert_true(sparse.issparse(sparse_svm.support_vectors_)) assert_true(sparse.issparse(sparse_svm.dual_coef_)) assert_array_almost_equal(dense_svm.support_vectors_, sparse_svm.support_vectors_.toarray()) assert_array_almost_equal(dense_svm.dual_coef_, sparse_svm.dual_coef_.toarray()) if dense_svm.kernel == "linear": assert_true(sparse.issparse(sparse_svm.coef_)) assert_array_almost_equal(dense_svm.coef_, sparse_svm.coef_.toarray()) assert_array_almost_equal(dense_svm.support_, sparse_svm.support_) assert_array_almost_equal(dense_svm.predict(X_test_dense), sparse_svm.predict(X_test)) assert_array_almost_equal(dense_svm.decision_function(X_test_dense), sparse_svm.decision_function(X_test)) assert_array_almost_equal(dense_svm.decision_function(X_test_dense), sparse_svm.decision_function(X_test_dense)) if isinstance(dense_svm, svm.OneClassSVM): msg = "cannot use sparse input in 'OneClassSVM' trained on dense data" else: assert_array_almost_equal(dense_svm.predict_proba(X_test_dense), sparse_svm.predict_proba(X_test), 4) msg = "cannot use sparse input in 'SVC' trained on dense data" if sparse.isspmatrix(X_test): assert_raise_message(ValueError, msg, dense_svm.predict, X_test)
def reduce(self, Y): if sp.isspmatrix(Y): Y = Y.toarray() sketch1, sketch2 = self.__sketch(Y) return np.real(np.fft.ifft(np.fft.fft(sketch1, axis=0) * np.fft.fft(sketch2, axis=0), axis=0))
def aslinearoperator(A): """Return A as a LinearOperator. 'A' may be any of the following types: - ndarray - matrix - sparse matrix (e.g. csr_matrix, lil_matrix, etc.) - LinearOperator - An object with .shape and .matvec attributes See the LinearOperator documentation for additional information. Examples -------- >>> from scipy.sparse.linalg import aslinearoperator >>> M = np.array([[1,2,3],[4,5,6]], dtype=np.int32) >>> aslinearoperator(M) <2x3 MatrixLinearOperator with dtype=int32> """ if isinstance(A, LinearOperator): return A elif isinstance(A, np.ndarray) or isinstance(A, np.matrix): if A.ndim > 2: raise ValueError('array must have ndim <= 2') A = np.atleast_2d(np.asarray(A)) return MatrixLinearOperator(A) elif isspmatrix(A): return MatrixLinearOperator(A) else: if hasattr(A, 'shape') and hasattr(A, 'matvec'): rmatvec = None matmat = None # why not add this too? rmatmat = None dtype = None if hasattr(A, 'rmatvec'): rmatvec = A.rmatvec if hasattr(A, 'matmat'): matmat = A.matmat if hasattr(A, 'rmatmat'): matmat = A.rmatmat if hasattr(A, 'dtype'): dtype = A.dtype return LinearOperator(shape=A.shape, matvec=A.matvec, rmatvec=rmatvec, matmat=matmat, rmatmat=rmatmat, dtype=dtype) else: raise TypeError('type not understood')
def maxmin(A, a=None, b=None, sparse=False): ''' Compute the max-min product of A with itself: [ AP ]_ij = max_k min ( [ A ]_ik, [ A ]_kj ) Parameters ---------- A : array_like A 2D square ndarray, matrix or sparse (CSR) matrix (see `scipy.sparse`). The sparse implementation will be used automatically for sparse matrices. a,b : integer optional; compute only the max-min product between A[a:b,:] and A.T sparse : bool if True, transforms A to CSR matrix format and use the sparse implementation. Return ------ A' : array_like The max-min product of A with itself. A CSR sparse matrix will be returned if the sparse implementation is used, otherwise a numpy matrix. ''' if A.ndim != 2: raise ValueError('expecting 2D array or matrix') N, M = A.shape if N != M: raise ValueError('argument must be a square array') if a is not None: if (a < 0) or (a > N): raise ValueError('a cannot be smaller nor larger than axis dim') if b is not None: if (b < 0) or (b > N): raise ValueError('b cannot be smaller nor larger than axis dim') if (a is not None) and (b is not None): if a > b: raise ValueError('a must be less or equal b') if sp.isspmatrix(A) or sparse: if not sp.isspmatrix_csr(A): A = sp.csr_matrix(A) return maxmin_sparse(A, a, b) else: return np.matrix(maxmin_naive(A, a, b)) # Global variables used by _maxmin_worker (see below)
def object_diff(a, b, pre=''): """Compute all differences between two python variables Parameters ---------- a : object Currently supported: dict, list, tuple, ndarray, int, str, bytes, float. b : object Must be same type as x1. pre : str String to prepend to each line. Returns ------- diffs : str A string representation of the differences. """ out = '' if type(a) != type(b): out += pre + ' type mismatch (%s, %s)\n' % (type(a), type(b)) elif isinstance(a, dict): k1s = _sort_keys(a) k2s = _sort_keys(b) m1 = set(k2s) - set(k1s) if len(m1): out += pre + ' x1 missing keys %s\n' % (m1) for key in k1s: if key not in k2s: out += pre + ' x2 missing key %s\n' % key else: out += object_diff(a[key], b[key], pre + 'd1[%s]' % repr(key)) elif isinstance(a, (list, tuple)): if len(a) != len(b): out += pre + ' length mismatch (%s, %s)\n' % (len(a), len(b)) else: for xx1, xx2 in zip(a, b): out += object_diff(xx1, xx2, pre='') elif isinstance(a, (string_types, int, float, bytes)): if a != b: out += pre + ' value mismatch (%s, %s)\n' % (a, b) elif a is None: pass # b must be None due to our type checking elif isinstance(a, np.ndarray): if not np.array_equal(a, b): out += pre + ' array mismatch\n' elif sparse is not None and sparse.isspmatrix(a): # sparsity and sparse type of b vs a already checked above by type() if b.shape != a.shape: out += pre + (' sparse matrix a and b shape mismatch' '(%s vs %s)' % (a.shape, b.shape)) else: c = a - b c.eliminate_zeros() if c.nnz > 0: out += pre + (' sparse matrix a and b differ on %s ' 'elements' % c.nnz) else: raise RuntimeError(pre + ': unsupported type %s (%s)' % (type(a), a)) return out
def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy): """Aux function used at beginning of fit in linear models""" n_samples, n_features = X.shape if sparse.isspmatrix(X): precompute = False X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=fit_intercept, normalize=normalize, return_mean=True) else: # copy was done in fit if necessary X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy) if hasattr(precompute, '__array__') and ( fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or normalize and not np.allclose(X_scale, np.ones(n_features))): warnings.warn("Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", UserWarning) # recompute Gram precompute = 'auto' Xy = None # precompute if n_samples > n_features if isinstance(precompute, six.string_types) and precompute == 'auto': precompute = (n_samples > n_features) if precompute is True: # make sure that the 'precompute' array is contiguous. precompute = np.empty(shape=(n_features, n_features), dtype=X.dtype, order='C') np.dot(X.T, X, out=precompute) if not hasattr(precompute, '__array__'): Xy = None # cannot use Xy if precompute is not Gram if hasattr(precompute, '__array__') and Xy is None: common_dtype = np.find_common_type([X.dtype, y.dtype], []) if y.ndim == 1: # Xy is 1d, make sure it is contiguous. Xy = np.empty(shape=n_features, dtype=common_dtype, order='C') np.dot(X.T, y, out=Xy) else: # Make sure that Xy is always F contiguous even if X or y are not # contiguous: the goal is to make it fast to extract the data for a # specific target. n_targets = y.shape[1] Xy = np.empty(shape=(n_features, n_targets), dtype=common_dtype, order='F') np.dot(y.T, X, out=Xy.T) return X, y, X_offset, y_offset, X_scale, precompute, Xy
def _set_diag(laplacian, value, norm_laplacian): """Set the diagonal of the laplacian matrix and convert it to a sparse format well suited for eigenvalue decomposition Parameters ---------- laplacian : array or sparse matrix The graph laplacian value : float The value of the diagonal norm_laplacian : bool Whether the value of the diagonal should be changed or not Returns ------- laplacian : array or sparse matrix An array of matrix in a form that is well suited to fast eigenvalue decomposition, depending on the band width of the matrix. """ n_nodes = laplacian.shape[0] # We need all entries in the diagonal to values if not sparse.isspmatrix(laplacian): if norm_laplacian: laplacian.flat[::n_nodes + 1] = value else: laplacian = laplacian.tocoo() if norm_laplacian: diag_idx = (laplacian.row == laplacian.col) laplacian.data[diag_idx] = value # If the matrix has a small number of diagonals (as in the # case of structured matrices coming from images), the # dia format might be best suited for matvec products: n_diags = np.unique(laplacian.row - laplacian.col).size if n_diags <= 7: # 3 or less outer diagonals on each side laplacian = laplacian.todia() else: # csr has the fastest matvec and is thus best suited to # arpack laplacian = laplacian.tocsr() return laplacian
def validate_graph(csgraph, directed, dtype=DTYPE, csr_output=True, dense_output=True, copy_if_dense=False, copy_if_sparse=False, null_value_in=0, null_value_out=np.inf, infinity_null=True, nan_null=True): """Routine for validation and conversion of csgraph inputs""" if not (csr_output or dense_output): raise ValueError("Internal: dense or csr output must be true") # if undirected and csc storage, then transposing in-place # is quicker than later converting to csr. if (not directed) and isspmatrix_csc(csgraph): csgraph = csgraph.T if isspmatrix(csgraph): if csr_output: csgraph = csr_matrix(csgraph, dtype=DTYPE, copy=copy_if_sparse) else: csgraph = csgraph_to_dense(csgraph, null_value=null_value_out) elif np.ma.is_masked(csgraph): if dense_output: mask = csgraph.mask csgraph = np.array(csgraph.data, dtype=DTYPE, copy=copy_if_dense) csgraph[mask] = null_value_out else: csgraph = csgraph_from_masked(csgraph) else: if dense_output: csgraph = csgraph_masked_from_dense(csgraph, copy=copy_if_dense, null_value=null_value_in, nan_null=nan_null, infinity_null=infinity_null) mask = csgraph.mask csgraph = np.asarray(csgraph.data, dtype=DTYPE) csgraph[mask] = null_value_out else: csgraph = csgraph_from_dense(csgraph, null_value=null_value_in, infinity_null=infinity_null, nan_null=nan_null) if csgraph.ndim != 2: raise ValueError("compressed-sparse graph must be two dimensional") if csgraph.shape[0] != csgraph.shape[1]: raise ValueError("compressed-sparse graph must be shape (N, N)") return csgraph
def graph_laplacian(csgraph, normed=False, return_diag=False): """ Return the Laplacian matrix of a directed graph. For non-symmetric graphs the out-degree is used in the computation. Parameters ---------- csgraph : array_like or sparse matrix, 2 dimensions compressed-sparse graph, with shape (N, N). normed : bool, optional If True, then compute normalized Laplacian. return_diag : bool, optional If True, then return diagonal as well as laplacian. Returns ------- lap : ndarray The N x N laplacian matrix of graph. diag : ndarray The length-N diagonal of the laplacian matrix. diag is returned only if return_diag is True. Notes ----- The Laplacian matrix of a graph is sometimes referred to as the "Kirchoff matrix" or the "admittance matrix", and is useful in many parts of spectral graph theory. In particular, the eigen-decomposition of the laplacian matrix can give insight into many properties of the graph. For non-symmetric directed graphs, the laplacian is computed using the out-degree of each node. """ if csgraph.ndim != 2 or csgraph.shape[0] != csgraph.shape[1]: raise ValueError('csgraph must be a square matrix or array') if normed and (np.issubdtype(csgraph.dtype, np.int) or np.issubdtype(csgraph.dtype, np.uint)): csgraph = check_array(csgraph, dtype=np.float64, accept_sparse=True) if sparse.isspmatrix(csgraph): return _laplacian_sparse(csgraph, normed=normed, return_diag=return_diag) else: return _laplacian_dense(csgraph, normed=normed, return_diag=return_diag)
def _fix_connectivity(X, connectivity, n_components=None, affinity="euclidean"): """ Fixes the connectivity matrix - copies it - makes it symmetric - converts it to LIL if necessary - completes it if necessary """ n_samples = X.shape[0] if (connectivity.shape[0] != n_samples or connectivity.shape[1] != n_samples): raise ValueError('Wrong shape for connectivity matrix: %s ' 'when X is %s' % (connectivity.shape, X.shape)) # Make the connectivity matrix symmetric: connectivity = connectivity + connectivity.T # Convert connectivity matrix to LIL if not sparse.isspmatrix_lil(connectivity): if not sparse.isspmatrix(connectivity): connectivity = sparse.lil_matrix(connectivity) else: connectivity = connectivity.tolil() # Compute the number of nodes n_components, labels = connected_components(connectivity) if n_components > 1: warnings.warn("the number of connected components of the " "connectivity matrix is %d > 1. Completing it to avoid " "stopping the tree early." % n_components, stacklevel=2) # XXX: Can we do without completing the matrix? for i in xrange(n_components): idx_i = np.where(labels == i)[0] Xi = X[idx_i] for j in xrange(i): idx_j = np.where(labels == j)[0] Xj = X[idx_j] D = pairwise_distances(Xi, Xj, metric=affinity) ii, jj = np.where(D == np.min(D)) ii = ii[0] jj = jj[0] connectivity[idx_i[ii], idx_j[jj]] = True connectivity[idx_j[jj], idx_i[ii]] = True return connectivity, n_components ############################################################################### # Hierarchical tree building functions