Python scipy.sparse 模块，coo_matrix() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用scipy.sparse.coo_matrix()。

项目：histwords 作者：williamleif | 项目源码 | 文件源码

def make_ppmi_mat(old_mat, row_probs, col_probs, smooth, neg=1, normalize=False):
    prob_norm = old_mat.sum() + (old_mat.shape[0] * old_mat.shape[1]) * smooth
    old_mat = old_mat.tocoo()
    row_d = old_mat.row
    col_d = old_mat.col
    data_d = old_mat.data
    neg = np.log(neg)
    for i in xrange(len(old_mat.data)):
        if data_d[i] == 0.0:
            continue
        joint_prob = (data_d[i] + smooth) / prob_norm
        denom = row_probs[row_d[i], 0] * col_probs[0, col_d[i]]
        if denom == 0.0:
            data_d[i] = 0
            continue
        data_d[i] = np.log(joint_prob /  denom)
        data_d[i] = max(data_d[i] - neg, 0)
        if normalize:
            data_d[i] /= -1*np.log(joint_prob)
    return coo_matrix((data_d, (row_d, col_d)))

项目：discretize 作者：simpeg | 项目源码 | 文件源码

def _getEdgeP(self, xEdge, yEdge, zEdge):
        if self.dim == 2: raise Exception('Not implemented') # this should be a reordering of the face inner product?

        ind1, ind2, ind3 = [], [], []
        for ind in self._sortedCells:
            p = self._pointer(ind)
            w = self._levelWidth(p[-1])

            posX = [0, 0] if xEdge == 'eX0' else [w, 0] if xEdge == 'eX1' else [0, w] if xEdge == 'eX2' else [w, w]
            posY = [0, 0] if yEdge == 'eY0' else [w, 0] if yEdge == 'eY1' else [0, w] if yEdge == 'eY2' else [w, w]
            posZ = [0, 0] if zEdge == 'eZ0' else [w, 0] if zEdge == 'eZ1' else [0, w] if zEdge == 'eZ2' else [w, w]

            ind1.append( self._ex2i[self._index([ p[0]          , p[1] + posX[0], p[2] + posX[1], p[3]])]                         )
            ind2.append( self._ey2i[self._index([ p[0] + posY[0], p[1]          , p[2] + posY[1], p[3]])] + self.ntEx             )
            ind3.append( self._ez2i[self._index([ p[0] + posZ[0], p[1] + posZ[1], p[2]          , p[3]])] + self.ntEx + self.ntEy )

        IND = np.r_[ind1, ind2, ind3]

        PXXX = sp.coo_matrix((np.ones(self.dim*self.nC), (range(self.dim*self.nC), IND)), shape=(self.dim*self.nC, self.ntE)).tocsr()

        Re = self._deflationMatrix('E')

        return PXXX * Re

项目：pysapc 作者：bioinfocao | 项目源码 | 文件源码

def matixToRowColDataArr(X):
    """
    Convert sparse affinity/similarity matrix to numpy array format (row_array,col_array,data_array)
    So cython update function can work efficiently on it.
    """
    # convert to coo format (from lil,csr,csc)
    if isinstance(X, coo_matrix):
        X_coo=X
    elif (isinstance(X, csr_matrix)) or (isinstance(X, lil_matrix)):
        X_coo=X.tocoo()
    else: # others like numpy matrix could be convert to coo matrix
        X_coo=coo_matrix(X)
    # Upcast matrix to a floating point format (if necessary)
    X_coo=X_coo.asfptype() 
    # get row_array,col_array,data_array in their correct data type (for cython to work)
    row_array,col_array,data_array=X_coo.row.astype(np.int),X_coo.col.astype(np.int),X_coo.data

    return row_array,col_array,data_array

项目：pysapc 作者：bioinfocao | 项目源码 | 文件源码

def loadMatrix(data_file, dataCutoff=None):
    """
    Load similarity data file
    if dataCutoff is not None, all value (affinity/similarity) below this will be discarded
    """
    #print('{0}, loading data'.format(datetime.now()))
    simi=pd.DataFrame.from_csv(data_file,sep='\t',index_col=None)
    samples=sorted(list(set(simi.row) | set(simi.col)))
    samplesInd={el:ind for ind,el in enumerate(samples)}
    row,col,data=simi.row.map(lambda x:samplesInd[x]),simi.col.map(lambda x:samplesInd[x]),simi.data
    if dataCutoff is not None:
        row_new,col_new,data_new=[],[],[]
        for r,c,d in zip(row,col,data):
            if d>dataCutoff:
                row_new.append(r)
                col_new.append(c)
                data_new.append(d)
        simi_mat=coo_matrix((data_new,(row_new,col_new)), shape=(len(samplesInd),len(samplesInd)))
    else:
        simi_mat=coo_matrix((data,(row,col)), shape=(len(samplesInd),len(samplesInd)))
    return simi_mat

项目：SlidingWindowVideoTDA 作者：ctralie | 项目源码 | 文件源码

def makeDelta1(R):
    """Make the delta1 coboundary matrix
    :param R: Edge list NEdges x 2. It is assumed that 
    there is at least one edge incident on every vertex
    """
    NEdges = R.shape[0]
    NVertices = int(np.max(R))+1
    #Make a list of edges for fast lookup
    Edges = []
    for i in range(NVertices):
        Edges.append({})
    for i in range(R.shape[0]):
        [a, b] = [int(R[i, 0]), int(R[i, 1])]
        Edges[a][b] = i
        Edges[b][a] = i    

    tic = time.time()
    (I, J, V) = get3CliquesBrute(Edges)
    toc = time.time()
    print("Elapsed time 3 cliques brute: %g"%(toc - tic))
    [I, J, V] = [a.flatten() for a in [I, J, V]]
    TriNum = len(I)/3
    Delta1 = sparse.coo_matrix((V, (I, J)), shape = (TriNum, NEdges)).tocsr()

    return Delta1

项目：diamond 作者：stitchfix | 项目源码 | 文件源码

def _create_main_design(self, **kwargs):
        r"""
        Create design matrix for main effects
        Keyword Args:
            * *df* (``DataFrame``). specify a new dataframe to create
                design matrix from
        Returns:
            array_like: design matrix in sparse CSR format

        """
        df = kwargs.get('df', self.train_df)
        df.reset_index(drop=True, inplace=True)
        df['row_index'] = df.index
        df['intercept'] = 1.0  # assume intercept is always included

        id_cols = ['row_index']

        melted_df = pd.melt(df[id_cols + self.main_effects], id_cols)
        melted_df = melted_df.merge(self.main_map, on='variable')
        melted_df['col_index'] = melted_df['main_idx']
        row = melted_df.row_index
        col = melted_df.col_index
        data = melted_df.value
        return sparse.coo_matrix((data, (row, col)),
                                 shape=(max(row) + 1, max(col) + 1)).tocsr()

项目：implicit 作者：benfred | 项目源码 | 文件源码

def read_data(filename):
    """ Reads in the last.fm dataset, and returns a tuple of a pandas dataframe
    and a sparse matrix of artist/user/playcount """
    # read in triples of user/artist/playcount from the input dataset
    # get a model based off the input params
    start = time.time()
    logging.debug("reading data from %s", filename)
    data = pandas.read_table(filename,
                             usecols=[0, 2, 3],
                             names=['user', 'artist', 'plays'])

    # map each artist and user to a unique numeric value
    data['user'] = data['user'].astype("category")
    data['artist'] = data['artist'].astype("category")

    # create a sparse matrix of all the users/plays
    plays = coo_matrix((data['plays'].astype(numpy.float32),
                       (data['artist'].cat.codes.copy(),
                        data['user'].cat.codes.copy())))

    logging.debug("read data file in %s", time.time() - start)
    return data, plays

项目：implicit 作者：benfred | 项目源码 | 文件源码

def bm25_weight(X, K1=100, B=0.8):
    """ Weighs each row of a sparse matrix X  by BM25 weighting """
    # calculate idf per term (user)
    X = coo_matrix(X)

    N = float(X.shape[0])
    idf = log(N / (1 + bincount(X.col)))

    # calculate length_norm per document (artist)
    row_sums = numpy.ravel(X.sum(axis=1))
    average_length = row_sums.mean()
    length_norm = (1.0 - B) + B * row_sums / average_length

    # weight matrix rows by bm25
    X.data = X.data * (K1 + 1.0) / (K1 * length_norm[X.row] + X.data) * idf[X.col]
    return X

项目：sequence-based-recommendations 作者：rdevooght | 项目源码 | 文件源码

def prepare_model(self, dataset):
        '''Load the data from the training file into a format adapted for the KNN methods.
        '''
        filename = dataset.dirname + 'data/train_set_triplets'
        if os.path.isfile(filename + '.npy'):
            file_content = np.load(filename + '.npy')
        else:
            file_content = np.loadtxt(filename)
            np.save(filename, file_content)

        #self.user_item = ssp.coo_matrix((file_content[:,2], (file_content[:,0], file_content[:,1]))).tocsr()
        self.binary_user_item = ssp.coo_matrix((np.ones(file_content.shape[0]), (file_content[:,0], file_content[:,1]))).tocsr()

        del file_content

        self.n_items = self.binary_user_item.shape[1]
        self.n_users = self.binary_user_item.shape[0]

项目：sparseMF 作者：jeh0753 | 项目源码 | 文件源码

def GLGrid(train):
    ''' Function for obtaining the optimal GraphLab Factorization Recommender parameters for a given dataset.

    train: scipy.sparse.coo_matrix
        The dataset used for grid searching the best parameters.

    Returns: dict
        Dictionary of the best GraphLab parameters for the given dataset.
    '''    
    c = coo_matrix(train)
    sf = graphlab.SFrame({'row': c.row, 'col': c.col, 'data': c.data})
    sf_small = sf.dropna('data', how="all")
    folds = graphlab.cross_validation.KFold(sf_small, 3)
    params = dict([('target', 'data'), ('user_id', 'row'), ('item_id', 'col'), ('num_factors', [10, 20]), ('sgd_step_size', [0.02, 10])])
    job = graphlab.grid_search.create(folds, graphlab.factorization_recommender.create, params) 
    params = job.get_best_params() 
    return params

项目：sparseMF 作者：jeh0753 | 项目源码 | 文件源码

def prepare_input_data(self, X):
        """
        Check to make sure that the input matrix and its mask of missing
        values are valid. Returns X and missing mask.
        """
        #TODO - separate out safety checks in _preprocess_sparse as well, and include them here instead

        self._check_input(X)
        shape = X.shape
        coo = coo_matrix(X)
        row_id = coo.row
        col_id = coo.col
        self.missing_mask = row_id, col_id, shape
        self._check_max_rank(X)
        self._check_missing_value_mask()
        return X

项目：glmnet_py 作者：hanfang | 项目源码 | 文件源码

def sparseDf(self, df, matrixType="csc"):
        """
        convert a pandas sparse df to numpy sparse array
        :param df: pandas sparse df
        :param matrixType: csc or csr
        :return: numpy sparse array
        """
        columns = df.columns
        dat, rows = map(list, zip(
            *[(df[col].sp_values - df[col].fill_value, df[col].sp_index.to_int_index().indices) for col in columns]))
        cols = [np.ones_like(a) * i for (i, a) in enumerate(dat)]
        datF, rowsF, colsF = np.concatenate(dat), np.concatenate(rows), np.concatenate(cols)
        arr = sparse.coo_matrix((datF, (rowsF, colsF)), df.shape, dtype=np.float64)
        if matrixType == "csc":
            return arr.tocsc()
        elif matrixType == "csr":
            return arr.tocsc()
        else:
            raise ValueError("Only accept csc or csr")

项目：glmnet_py 作者：hanfang | 项目源码 | 文件源码

def sparseDf(self, df, matrixType="csc"):
        """
        convert a pandas sparse df to numpy sparse array
        :param df: pandas sparse df
        :param matrixType: csc or csr
        :return: numpy sparse array
        """
        columns = df.columns
        dat, rows = map(list, zip(
            *[(df[col].sp_values - df[col].fill_value, df[col].sp_index.to_int_index().indices) for col in columns]))
        cols = [np.ones_like(a) * i for (i, a) in enumerate(dat)]
        datF, rowsF, colsF = np.concatenate(dat), np.concatenate(rows), np.concatenate(cols)
        arr = sparse.coo_matrix((datF, (rowsF, colsF)), df.shape, dtype=np.float64)
        if matrixType == "csc":
            return arr.tocsc()
        elif matrixType == "csr":
            return arr.tocsc()
        else:
            raise ValueError("Only accept csc or csr")

项目：glmnet_py 作者：hanfang | 项目源码 | 文件源码

def sparseDf(self, df, matrixType="csc"):
        """
        convert a pandas sparse df to numpy sparse array
        :param df: pandas sparse df
        :param matrixType: csc or csr
        :return: numpy sparse array
        """
        columns = df.columns
        dat, rows = map(list, zip(
            *[(df[col].sp_values - df[col].fill_value, df[col].sp_index.to_int_index().indices) for col in columns]))
        cols = [np.ones_like(a) * i for (i, a) in enumerate(dat)]
        datF, rowsF, colsF = np.concatenate(dat), np.concatenate(rows), np.concatenate(cols)
        arr = sparse.coo_matrix((datF, (rowsF, colsF)), df.shape, dtype=np.float64)
        if matrixType == "csc":
            return arr.tocsc()
        elif matrixType == "csr":
            return arr.tocsc()
        else:
            raise ValueError("Only accept csc or csr")

项目：SparkADMM 作者：yahoo | 项目源码 | 文件源码

def solveSingle(self,inputDF,outputDict,rho,beta_target):
        I,J,V,Y=[],[],[],[]
        fd = {} # mapping feature names to consecutive integers, starting with 0
        for i,(id, x) in enumerate(inputDF.items()):
            l = outputDict.get(id)
            for k,v in x.items():
                I.append(i)
                J.append(k)
                V.append(v)
                upd(fd,k)
            Y.append(l)
        J = map(lambda k: fd[k], J)
        X = sparse.coo_matrix((V,(I,J)),shape=(I[-1]+1,len(fd)))
        fd_reverse = [k for k,v in sorted(fd.items(), key = lambda t: t[1])]
        # y_new = y - X . beta_target
        # converting a proximal least square problem to a ridge regression
        ZmUl = np.array([beta_target.get(k,0) for k in fd_reverse])
        y_new = np.array(Y) - X * ZmUl
        ridge = Ridge(alpha =  rho , fit_intercept=False)
        ret = ridge.fit(X,y_new)
        #ret = self.lr.fit(X,y_new)
        # ordered list of feature names according to their integer ids in fd
        #raise ValueError('fd_reverse = %s \n X = %s \n J = %s \n I = %s \n V = %s \n Y = %s \n y_new = %s \n ret.coef_ = %s \n ZmUl = %s \n'\
        #            %(str(fd_reverse), str(X), str(J), str(I), str(V), str(Y), str(y_new), str(ret.coef_), str(ZmUl)))
        return dict(zip(fd_reverse, (ret.coef_ + ZmUl).tolist()))

项目：modl 作者：arthurmensch | 项目源码 | 文件源码

def split(self, X):
        X = sp.coo_matrix(X)
        rng = np.random.RandomState(self.random_state)
        shape = X.shape
        n_data = len(X.data)
        n_train = int(self.train_size * n_data)

        for it in range(self.n_iter):
            ind = rng.permutation(n_data)
            train_ind = ind[:n_train]
            test_ind = ind[n_train:]
            X_tr = sp.coo_matrix((X.data[train_ind],
                                  (X.row[train_ind], X.col[train_ind])),
                                 shape=shape)
            X_te = sp.coo_matrix((X.data[test_ind],
                                  (X.row[test_ind], X.col[test_ind])),
                                 shape=shape)
            yield X_tr, X_te

项目：babusca 作者：georglind | 项目源码 | 文件源码

def generate_hamiltonian(m, basis):
        """
        Generates the (sparse) Hamiltonian

        Parameters
        ----------
        basis : Basis object
            Full basis for this specific number sector.
        """
        nbas = basis.len

        Us = m.Us
        if m.W is not None:
            Us = m.W + np.diag(m.Us)

        HDi = np.arange(nbas)
        HD = NumberSector.onsite_hamiltonian(m.Es, basis.vs) \
            + NumberSector.interaction_hamiltonian(Us, basis.vs)
        Hki, Hkj, Hkv = NumberSector.hopping_hamiltonian(basis, m.hopping, basis.vs)

        return sparse.coo_matrix((Hkv, (Hki, Hkj)), shape=(nbas, nbas)).tocsr() \
            + sparse.coo_matrix((HD, (HDi, HDi)), shape=(nbas, nbas)).tocsr()

项目：babusca 作者：georglind | 项目源码 | 文件源码

def creation_operator(i, basis0, basis1):
    """
    Create a boson on site <i>

    Parameters
    ----------
    i : int
        Site index
    basis0 : list
        Initial basis
    basis1 : list
        Final basis
    """
    index0 = np.arange(basis0.len)

    mbasis = np.copy(basis0.vs)
    mbasis[:, i] += 1
    index1 = basis1.index(mbasis)

    return sparse.coo_matrix((np.sqrt(mbasis[:, i]), (index1, index0)), shape=[basis1.len, basis0.len]).tocsr()

项目：AequilibraE 作者：AequilibraE | 项目源码 | 文件源码

def reblocks_matrix(self, sparse_matrix):
        # Gets all non-zero coordinates and makes sure that they are considered
        froms = sparse_matrix.row
        tos =  sparse_matrix.col
        data = sparse_matrix.data

        all_indices = np.hstack((froms, tos))
        indices = np.unique(all_indices)
        compact_shape = indices.shape[0]

        # Builds the hash
        matrix_hash = {}
        titles = []
        for i in range(compact_shape):
            matrix_hash[indices[i]] = i
            froms[froms == indices[i]] = matrix_hash[indices[i]]
            tos[tos == indices[i]] = matrix_hash[indices[i]]
            titles.append(indices[i])
        matrix = coo_matrix((data, (froms, tos)), shape=(compact_shape, compact_shape)).toarray().astype(np.float64)
        return matrix, matrix_hash, titles

项目：bear 作者：theeluwin | 项目源码 | 文件源码

def drop_tolerance(A, t):
    """
    Drops entry of `A` having absolute value lower than `t`.

    Args:
        A (coo_matrix): Given coo matrix.
        t (float): Tolerance threshld.

    Returns:
        A coo matrix.
    """
    A = A.tocoo()
    row = []
    col = []
    data = []
    for idx, (i, j) in enumerate(zip(A.row, A.col)):
        value = A.data[idx]
        if value < t or value > -t:
            continue
        row.append(i)
        col.append(j)
        data.append(value)
    A = coo_matrix((data, (row, col)), shape=A.shape, dtype=A.dtype)
    del row, col, data
    return A

项目：bear 作者：theeluwin | 项目源码 | 文件源码

def degree_reverse_rank_perm(A, reverse=False):
    """
    Computes permutation that sorts nodes by degree.

    Args:
        A (coo_matrix): Given coo matrix.
        reverse (bool): If True, sorts with descending order.

    Returns:
        A permutation of node indices. Like `(i -> j)` is denoted as `perm[i] = j`.
    """
    n, _ = A.shape
    degree = {i: 0 for i in range(n)}
    for i, j in zip(A.row, A.col):
        degree[j] += 1
    bottoms = sorted(degree, key=degree.get, reverse=reverse)
    perm = [0 for _ in range(n)]
    for i in range(n):
        perm[bottoms[i]] = i
    return perm

项目：bear 作者：theeluwin | 项目源码 | 文件源码

def reorder_matrix(A, perm, fix_row=False, fix_col=False):
    """
    Reorders given coo matrix with given permutation. You can fix either row or column.

    Args:
        A (coo_matrix): Given coo matrix.
        perm (list): List of node indicies denoting permutation.
        fix_row (bool): If True, reorders column only.
        fix_col (bool): If True, reorders row only.

    Returns:
        A coo matrix.
    """
    A = A.tocoo()
    if not fix_row:
        row = [perm[i] for i in A.row]
    else:
        row = A.row
    if not fix_col:
        col = [perm[j] for j in A.col]
    else:
        col = A.col
    A = coo_matrix((A.data, (row, col)), shape=A.shape, dtype=A.dtype)
    del row, col
    return A

项目：icing 作者：slipguru | 项目源码 | 文件源码

def matrix_to_row_col_data(X):
    """Convert sparse affinity matrix to arrays.

    .. note:: Deprecated.
          It will be removed in icing 0.2. This is now done by check_array from
          numpy.
    """
    # convert to coo format (from lil,csr,csc)
    if isinstance(X, coo_matrix):
        X_coo = X
    elif (isinstance(X, csr_matrix)) or (isinstance(X, lil_matrix)):
        X_coo = X.tocoo()
    else:  # others like numpy matrix could be convert to coo matrix
        X_coo = coo_matrix(X)
    # Upcast matrix to a floating point format (if necessary)
    X_coo = X_coo.asfptype()
    return X_coo.row.astype(np.int), X_coo.col.astype(np.int), X_coo.data

项目：NLP.py 作者：PythonOptimizers | 项目源码 | 文件源码

def hess(self, *args, **kwargs):
        """Evaluate Lagrangian Hessian at (x, z)."""
        l_vals, l_rows, l_cols = super(SciPyAmplModel, self).hess(*args,
                                                                  **kwargs)

        # AMPL only returns the upper triangular part of the Hessian and
        # `scipy.coo_matrix` doesn't have a `symmetric` attribute, so we
        # need to copy the upper part of the matrix
        diag_idx = np.where(l_rows == l_cols)

        # strict upper triangle of H is obtained by switching rows and
        # cols indices and removing values on the diagonal.
        u_rows = np.delete(l_cols, diag_idx)  # creates a copy
        u_cols = np.delete(l_rows, diag_idx)
        u_vals = np.delete(l_vals, diag_idx)

        H = sp.coo_matrix((np.concatenate((l_vals, u_vals)),
                           (np.concatenate((l_rows, u_rows)),
                            np.concatenate((l_cols, u_cols)))),
                          shape=(self.nvar, self.nvar))
        return H

项目：NLP.py 作者：PythonOptimizers | 项目源码 | 文件源码

def hess(self, *args, **kwargs):
            """Evaluate Lagrangian Hessian at (x, z)."""
            u_vals, u_rows, u_cols = super(SciPyAdolcModel,
                                           self).hess(*args, **kwargs)

            # ADOL-C only returns the upper triangular part of the Hessian and
            # `scipy.coo_matrix` doesn't have a `symmetric` attribute, so we
            # need to copy the upper part of the matrix
            diag_idx = np.where(u_rows == u_cols)

            l_rows = np.delete(u_cols, diag_idx)  # creates a copy
            l_cols = np.delete(u_rows, diag_idx)
            l_vals = np.delete(u_vals, diag_idx)

            H = sp.coo_matrix((np.concatenate((l_vals, u_vals)),
                               (np.concatenate((l_rows, u_rows)),
                                np.concatenate((l_cols, u_cols)))),
                              shape=(self.nvar, self.nvar))
            return H