Python scipy.sparse 模块,isspmatrix_csr() 实例源码


项目:knowledge_linker    作者:glciampaglia    | 项目源码 | 文件源码
def _maximum_csr_safe(A, B):
    Safe version of `numpy.maximum` for CSR matrices
    # fall back on numpy's default if both matrices are dense
    if not sp.isspmatrix(A) and not sp.isspmatrix(B):
        return np.maximum(A, B)

    # if one of the two inputs is sparse and the other is dense, convert the
    # latter to sparse
    if not sp.isspmatrix_csr(A):
        A = sp.csr_matrix(A)
    if not sp.isspmatrix_csr(B):
        B = sp.csr_matrix(B)

    return c_maximum_csr(A, B)
项目:muffnn    作者:civisanalytics    | 项目源码 | 文件源码
def _sparse_matrix_data(X):
    """Prepare the sparse matrix for conversion to TensorFlow.

    X : sparse matrix

    indices : numpy array with shape (X.nnz, 2)
              describing the indices with values in X.
    values : numpy array with shape (X.nnz)
             describing the values at each index
    if sp.isspmatrix_csr(X):
        return _csr_data(X)
        return _csr_data(X.tocsr())
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_20news_vectorized():
    # This test is slow.
    raise SkipTest("Test too slow.")

    bunch = datasets.fetch_20newsgroups_vectorized(subset="train")
    assert_equal(, (11314, 107428))
    assert_equal([0], 11314)
    assert_equal(, np.float64)

    bunch = datasets.fetch_20newsgroups_vectorized(subset="test")
    assert_equal(, (7532, 107428))
    assert_equal([0], 7532)
    assert_equal(, np.float64)

    bunch = datasets.fetch_20newsgroups_vectorized(subset="all")
    assert_equal(, (11314 + 7532, 107428))
    assert_equal([0], 11314 + 7532)
    assert_equal(, np.float64)
项目:deepcut    作者:rkcosmos    | 项目源码 | 文件源码
def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""
    if sp.isspmatrix_csr(X):
        return np.bincount(X.indices, minlength=X.shape[1])
        return np.diff(sp.csc_matrix(X, copy=False).indptr)
项目:AutoML-Challenge    作者:postech-mlg-exbrain    | 项目源码 | 文件源码
def inverse_transform(self, X, copy=None):
        """Scale back the data to the original representation

        X : array-like with shape [n_samples, n_features]
            The data used to scale along the features axis.
        check_is_fitted(self, 'std_')

        copy = copy if copy is not None else self.copy
        if sparse.issparse(X):
            if self.with_mean:
                raise ValueError(
                    "Cannot uncenter sparse matrices: pass `with_mean=False` "
                    "instead See docstring for motivation and alternatives.")
            if not sparse.isspmatrix_csr(X):
                X = X.tocsr()
                copy = False
            if copy:
                X = X.copy()
            if self.std_ is not None:
                inplace_column_scale(X, self.std_)
            X = np.asarray(X)
            if copy:
                X = X.copy()
            if self.with_std:
                X *= self.std_
            if self.with_mean:
                X += self.mean_
        return X
项目:2016CCF_BDCI_Sougou    作者:coderSkyChen    | 项目源码 | 文件源码
def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""

    if sp.isspmatrix_csr(X):
        # return np.sum(X,axis=0)
        return bincount(X.indices, minlength=X.shape[1])


        return np.diff(sp.csc_matrix(X, copy=False).indptr)
项目:2016CCF-sougou    作者:prozhuchen    | 项目源码 | 文件源码
def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""

    if sp.isspmatrix_csr(X):
        # return np.sum(X,axis=0)
        return bincount(X.indices, minlength=X.shape[1])


        return np.diff(sp.csc_matrix(X, copy=False).indptr)
项目:vec4ir    作者:lgalke    | 项目源码 | 文件源码
def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""
    if sp.isspmatrix_csr(X):
        return np.bincount(X.indices, minlength=X.shape[1])
        return np.diff(sp.csc_matrix(X, copy=False).indptr)
项目:FreeDiscovery    作者:FreeDiscovery    | 项目源码 | 文件源码
def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X.
    (copied from scikit-learn)
    if sp.isspmatrix_csr(X):
        return np.bincount(X.indices, minlength=X.shape[1])
        return np.diff(sp.csc_matrix(X, copy=False).indptr)
项目:FreeDiscovery    作者:FreeDiscovery    | 项目源码 | 文件源码
def fit(self, X, y=None):
        """Learn the document lenght and document frequency vector
        (if necessary).

        X : sparse matrix, [n_samples, n_features]
            a matrix of term/token counts
        X = check_array(X, ['csr'], copy=self.copy)
        scheme_t, scheme_d, scheme_n = _validate_smart_notation(self.weighting)
        self.dl_ = _document_length(X)
        if scheme_d in 'stp' or self.compute_df:
            self.df_ = _document_frequency(X)
            self.df_ = None
        if sp.isspmatrix_csr(X):
            self.du_ = np.diff(X.indptr)
            self.du_ = X.shape[-1] - (X == 0).sum(axis=1)
        self._n_features = X.shape[1]

        if self.df_ is not None:
            df_n_samples = len(self.dl_)
            df_n_samples = None

        if scheme_n.endswith('p') and self.norm_pivot is None:
            # Need to compute the pivot if it's not provided
            _, self.norm_pivot = _smart_tfidf(X, self.weighting, self.df_,

        return self
项目:FreeDiscovery    作者:FreeDiscovery    | 项目源码 | 文件源码
def fit_transform(self, X, y=None):
        """Apply document term weighting and normalization on text features

        X : sparse matrix, [n_samples, n_features]
            a matrix of term/token counts
        X = check_array(X, ['csr'], copy=self.copy)

        scheme_t, scheme_d, scheme_n = _validate_smart_notation(self.weighting)
        self.dl_ = _document_length(X)
        if scheme_d in 'stpd' or self.compute_df:
            self.df_ = _document_frequency(X)
            self.df_ = None
        if sp.isspmatrix_csr(X):
            self.du_ = np.diff(X.indptr)
            self.du_ = X.shape[-1] - (X == 0).sum(axis=1)
        self._n_features = X.shape[1]

        if self.df_ is not None:
            df_n_samples = len(self.dl_)
            df_n_samples = None

        if self.df_ is not None:
            df_n_samples = len(self.dl_)
            df_n_samples = None

        X, self.norm_pivot = _smart_tfidf(X, self.weighting, self.df_,
        return X
项目:2016_CCFsougou    作者:dhdsjy    | 项目源码 | 文件源码
def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""

    if sp.isspmatrix_csr(X):
        # return np.sum(X,axis=0)
        return bincount(X.indices, minlength=X.shape[1])


        return np.diff(sp.csc_matrix(X, copy=False).indptr)
项目:knowledge_linker    作者:glciampaglia    | 项目源码 | 文件源码
def _allclose_csr(A, B, **kwrds):
    CSR matrices-safe equivalent of allclose. Additional keyword are passed to
    allclose. See `numpy.allclose`. Will call the numpy version if passed dense
    # fall back on numpy's allclose if both matrices are dense
    if not sp.isspmatrix(A) and not sp.isspmatrix(B):
        return np.allclose(A, B)

    if not sp.isspmatrix_csr(A):
        A = sp.csr_matrix(A)

    if not sp.isspmatrix_csr(B):
        B = sp.csr_matrix(B)

    # check indices
    indices_all = np.all(A.indices == B.indices)
    if not indices_all:
        return False

    # check indices pointers
    indptr_all = np.all(A.indptr == B.indptr)
    if not indptr_all:
        return False

    # check data
    return np.allclose(,, **kwrds)

# try importing the fast C implementations first, otherwise use the Python
# versions provided in this module as a fallback
项目:2016_CCFsougou2    作者:dhdsjy    | 项目源码 | 文件源码
def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""

    if sp.isspmatrix_csr(X):
        # return np.sum(X,axis=0)
        return bincount(X.indices, minlength=X.shape[1])


        return np.diff(sp.csc_matrix(X, copy=False).indptr)
项目:PyPardisoProject    作者:haasad    | 项目源码 | 文件源码
def _check_A(self, A):
        if A.shape[0] != A.shape[1]:
            raise ValueError('Matrix A needs to be square, but has shape: {}'.format(A.shape))

        if sp.isspmatrix_csr(A):
            self._solve_transposed = False
        elif sp.isspmatrix_csc(A):
            self._solve_transposed = True
            msg = 'PyPardiso requires matrix A to be in CSR or CSC format, but matrix A is: {}'.format(type(A))
            raise TypeError(msg)

        # scipy allows unsorted csr-indices, which lead to completely wrong pardiso results
        if not A.has_sorted_indices:

        # scipy allows csr matrices with empty rows. a square matrix with an empty row is singular. calling 
        # pardiso with a matrix A that contains empty rows leads to a segfault, same applies for csc with 
        # empty columns
        if not np.diff(A.indptr).all():
            row_col = 'column' if self._solve_transposed else 'row'
            raise ValueError('Matrix A is singular, because it contains empty {}(s)'.format(row_col))

        if A.dtype != np.float64:
            raise TypeError('PyPardiso currently only supports float64, but matrix A has dtype: {}'.format(A.dtype))
项目:PyPardisoProject    作者:haasad    | 项目源码 | 文件源码
def test_factorized_csc_matrix():
    A, b = create_test_A_b_rand()
    Afact_csr = factorized(A)
    Afact_csc = factorized(A.tocsc())
    assert sp.isspmatrix_csr(Afact_csc.args[0])
    x1 = Afact_csr(b)
    x2 = Afact_csc(b)
项目:PyPardisoProject    作者:haasad    | 项目源码 | 文件源码
def test_spsolve_csc_matrix():
    A, b = create_test_A_b_rand()
    x_csc = spsolve(A.tocsc(), b)
    assert sp.isspmatrix_csr(ps.factorized_A)
    x_csr = spsolve(A, b)
    np.testing.assert_array_equal(x_csr, x_csc)
项目:discreteMarkovChain    作者:gvanderheide    | 项目源码 | 文件源码
def getTransitionMatrix(self,probabilities=True):
        If self.P has been given already, we will reuse it and convert it to a sparse csr matrix if needed.
        Otherwise, we will generate it using the direct or indirect method.         
        Since most solution methods use a probability matrix, this is the default setting. 
        By setting probabilities=False we can also return a rate matrix.
        if self.P is not None:               
            if isspmatrix(self.P): 
                if not isspmatrix_csr(self.P):
                    self.P = self.P.tocsr() 
                assert isinstance(self.P, np.ndarray) and self.P.ndim==2 and self.P.shape[0]==self.P.shape[1],'P needs to be a 2d numpy array with an equal number of columns and rows'                     
                self.P = csr_matrix(self.P)   

        elif == True:
            self.P = self.directInitialMatrix()

            self.P = self.indirectInitialMatrix(self.initialState)   

        if probabilities:    
            P = self.convertToProbabilityMatrix(self.P)
            P = self.convertToRateMatrix(self.P)   

        return P
项目:Quadflor    作者:quadflor    | 项目源码 | 文件源码
def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""
    if sp.isspmatrix_csr(X):
        return np.bincount(X.indices, minlength=X.shape[1])
        return np.diff(sp.csc_matrix(X, copy=False).indptr)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def get_inv_matvec(M, symmetric=False, tol=0):
    if isdense(M):
        return LuInv(M).matvec
    elif isspmatrix(M):
        if isspmatrix_csr(M) and symmetric:
            M = M.T
        return SpLuInv(M).matvec
        return IterInv(M, tol=tol).matvec
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def get_OPinv_matvec(A, M, sigma, symmetric=False, tol=0):
    if sigma == 0:
        return get_inv_matvec(A, symmetric=symmetric, tol=tol)

    if M is None:
        # M is the identity matrix
        if isdense(A):
            if (np.issubdtype(A.dtype, np.complexfloating) or
               np.imag(sigma) == 0):
                A = np.copy(A)
                A = A + 0j
            A.flat[::A.shape[1] + 1] -= sigma
            return LuInv(A).matvec
        elif isspmatrix(A):
            A = A - sigma * identity(A.shape[0])
            if symmetric and isspmatrix_csr(A):
                A = A.T
            return SpLuInv(A.tocsc()).matvec
            return IterOpInv(_aslinearoperator_with_dtype(A),
                             M, sigma, tol=tol).matvec
        if ((not isdense(A) and not isspmatrix(A)) or
                (not isdense(M) and not isspmatrix(M))):
            return IterOpInv(_aslinearoperator_with_dtype(A),
                             sigma, tol=tol).matvec
        elif isdense(A) or isdense(M):
            return LuInv(A - sigma * M).matvec
            OP = A - sigma * M
            if symmetric and isspmatrix_csr(OP):
                OP = OP.T
            return SpLuInv(OP.tocsc()).matvec
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""
    if sp.isspmatrix_csr(X):
        return bincount(X.indices, minlength=X.shape[1])
        return np.diff(sp.csc_matrix(X, copy=False).indptr)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_add_dummy_feature_csr():
    X = sparse.csr_matrix([[1, 0], [0, 1], [0, 1]])
    X = add_dummy_feature(X)
    assert_true(sparse.isspmatrix_csr(X), X)
    assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])
项目:anndata    作者:theislab    | 项目源码 | 文件源码
def transpose(self):
        """Transpose whole object.

        Data matrix is transposed, observations and variables are interchanged.
        if not self.isbacked: X = self._X
        else: X = self.file._file['X']
        if sparse.isspmatrix_csr(X):
            return AnnData(X.T.tocsr(), self._var, self._obs, self._uns,
                           self._varm.flipped(), self._obsm.flipped(),
        return AnnData(X.T, self._var, self._obs, self._uns,
                       self._varm.flipped(), self._obsm.flipped(),
项目:spectrassembler    作者:antrec    | 项目源码 | 文件源码
def remove_bridge_reads(a_mat):
    """ Remove some edges from the similarity graph.

    When the set of neighbors N(i) of a node i are not connected if that node i is removed from the graph,
    the edges between i and j are cut for all j that is not in the largest connected group among N(i).

    a_mat : scipy.sparse matrix (similarity matrix)

    a_clr : scipy.sparse matrix (similarity matrix pre-preocessed)

    Ikill = []
    Jkill = []
    if not(isspmatrix_csr(a_mat)):
        a_mat = a_mat.tocsr()
    for i in xrange(a_mat.shape[0]):
        (_, J, _) = find(a_mat[i, :])
        if len(J) == 0:
        Jl = list(set(J))
        a_r = a_mat[Jl, :].tocsc()
        a_r = a_r[:, Jl]
        Jl = np.array(Jl)
        (n_c, lbl) = connected_components(a_r, directed=False, return_labels=True)
        if n_c > 1:
            sizeccs = np.zeros(n_c)
            for ncc in xrange(n_c):
                sizeccs[ncc] = sum(lbl == ncc)
            ccmax = np.argmax(sizeccs)
            away_idx = np.where(lbl != ccmax)[0]
            away_nbrs = list(Jl[away_idx])
            Ikill.extend([i] * len(away_nbrs))

    Ikill = np.array(Ikill)
    Jkill = np.array(Jkill)
    Vkill = np.ones(Ikill.size)
    kill_mat = coo_matrix((Vkill, (Ikill, Jkill)), shape=a_mat.shape, dtype=int).tocsr()
    kill_mat = sym_max(kill_mat)
    kill_mat = kill_mat.multiply(a_mat)
    a_clr = a_mat - kill_mat
    if not(isspmatrix_csr(a_clr)):
        a_clr = a_clr.tocsr()

    return a_clr

###### Spectral ordering related functions (gets coarse-grained layout) #######
项目:spectrassembler    作者:antrec    | 项目源码 | 文件源码
def reorder_mat(A, thr_list, min_cc_len, VERB):

    if not isspmatrix_csr(A):
        A = A.tocsr()
    # Initialization.
    ccs_ord = []
    #Create list of unordered connected components
    todo_ccs = [np.arange(A.shape[0])]
    todo_next = []
    n_loop = 0

    while len(todo_ccs) > 0:
        thr_sub = thr_list[n_loop] # starts at 0.4 for n_loop=0
        # Reorder each of them
        for cc in todo_ccs:
            # if statement
            # in order not to make the preprocessing twice. We could also remove
            # the preprocessing from the pipeline and do it here.
            if n_loop > 0:
                A_sub = A[cc, :][:, cc]
                A_sub = remove_bridge_reads(A_sub.multiply(A_sub > thr_sub))
                A_sub = A

            # Compute connected components
            (n_cc, labels) = connected_components(A_sub, directed=False, return_labels=True)

            # Reorder each cc with spectral and keep the ordering if it looks OK
            for i_cc in xrange(n_cc):
                cc_sub = np.argwhere(labels == i_cc)[:, 0]
                if len(cc_sub) <= min_cc_len:
                msg = " Running spectral algorithm in connected"\
                      "component of size %d..." % (len(cc_sub))
                oprint(msg, cond=(VERB >= 2))
                (_, fidvec) = get_fiedler(A_sub[cc_sub, :][:, cc_sub])
                permu = np.argsort(fidvec)
                (ii, jj, _) = find(A_sub[cc_sub[permu], :][:, cc_sub[permu]])
                bw = max(abs(ii - jj))
                if bw >= 80:
                    oprint("Bandwidth larger than 80 in reordered matrix.",
                           cond=(VERB >= 2))

        todo_ccs = todo_next
        todo_next = []
        n_loop += 1

    return ccs_ord
项目:spectrassembler    作者:antrec    | 项目源码 | 文件源码
def reorder_mat_par(A, thr_list, opts):

    partial_reorder = partial(reord_submat, A=A, opts=opts)
    N_PROC = int(opts['N_PROC'])//4
    min_cc_len = opts['MIN_CC_LEN']

    if not isspmatrix_csr(A):
        A = A.tocsr()
    # Initialization.
    ccs_ord = []
    #Create list of unordered connected components
    todo_ccs = [np.arange(A.shape[0])]
    todo_next = []
    n_loop = 0

    todo_ccs = []
    (ncs, lbls) = connected_components(A, directed=False, return_labels=True)
    for nc in xrange(ncs):
        cc_sub = np.argwhere(lbls == nc)[:, 0]
        if len(cc_sub) <= min_cc_len:

    while len(todo_ccs) > 0:
        thr_sub = thr_list[n_loop] # starts at 0.4 for n_loop=0
        args_list = zip(repeat(thr_sub), todo_ccs)

        pool = Pool(processes=N_PROC)
        results =, args_list)

        for tple in results:
            (sub_ccs_ord, sub_todo_next) = tple
            ccs_ord += sub_ccs_ord
            todo_next += sub_todo_next

        todo_ccs = todo_next
        todo_next = []
        n_loop += 1
    return ccs_ord
项目:knowledge_linker    作者:glciampaglia    | 项目源码 | 文件源码
def maxmin(A, a=None, b=None, sparse=False):
    Compute the max-min product of A with itself:

    [ AP ]_ij = max_k min ( [ A ]_ik, [ A ]_kj )

    A : array_like
        A 2D square ndarray, matrix or sparse (CSR) matrix (see `scipy.sparse`).
        The sparse implementation will be used automatically for sparse
    a,b : integer
        optional; compute only the max-min product between A[a:b,:] and A.T
    sparse : bool
        if True, transforms A to CSR matrix format and use the sparse

    A' : array_like
        The max-min product of A with itself. A CSR sparse matrix will be
        returned if the sparse implementation is used, otherwise a numpy matrix.
    if A.ndim != 2:
        raise ValueError('expecting 2D array or matrix')
    N, M = A.shape
    if N != M:
        raise ValueError('argument must be a square array')
    if a is not None:
        if (a < 0) or (a > N):
            raise ValueError('a cannot be smaller nor larger than axis dim')
    if b is not None:
        if (b < 0) or (b > N):
            raise ValueError('b cannot be smaller nor larger than axis dim')
    if (a is not None) and (b is not None):
        if a > b:
            raise ValueError('a must be less or equal b')
    if sp.isspmatrix(A) or sparse:
        if not sp.isspmatrix_csr(A):
            A = sp.csr_matrix(A)
        return maxmin_sparse(A, a, b)
        return np.matrix(maxmin_naive(A, a, b))

# Global variables used by _maxmin_worker (see below)