Python scipy.sparse 模块,csr_matrix() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.sparse.csr_matrix()

项目:recom-system    作者:tizot    | 项目源码 | 文件源码
def dataset_from_file(filename):
    """Load a dataset from file.

    Args:
        filename (string): the name of the file from which extract the dataset

    Returns:
        tuple: the dataset (np.ndarray) and the ngrams (list of strings)
    """
    loader = np.load(filename)
    num_entries = loader['num_entries'][0]
    sp_dataset = sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']),
                         shape = loader['shape'])
    dataset = sp_dataset.toarray()
    samp_entries, num_features = dataset.shape
    return dataset.reshape(int(samp_entries / num_entries), num_entries, num_features), loader['ngrams']
项目:genomedisco    作者:kundajelab    | 项目源码 | 文件源码
def construct_csr_matrix_from_data_and_nodes(f,nodes,blacklisted_nodes,remove_diag=True):
    print "GenomeDISCO | "+strftime("%c")+" | processing: Loading interaction data from "+f

    total_nodes=len(nodes.keys())
    i=[]
    j=[]
    v=[]

    #print strftime("%c")
    c=0
    for line in gzip.open(f):
        items=line.strip().split('\t')
        n1,n2,val=nodes[items[0]]['idx'],nodes[items[1]]['idx'],float(items[2])
        i.append(n1)
        j.append(n2)
        v.append(val)
        c+=1

    csr_m=csr_matrix( (v,(i,j)), shape=(total_nodes,total_nodes),dtype=float)
    if remove_diag:
        csr_m.setdiag(0)
    return filter_nodes(csr_m,blacklisted_nodes)
项目:genomedisco    作者:kundajelab    | 项目源码 | 文件源码
def to_transition(m):
    mup=m
    mdown=mup.transpose()
    mdown.setdiag(0)
    mtogether=mup+mdown
    sums=mtogether.sum(axis=1)
    nonzeros=mtogether.nonzero()
    num_elts=len(nonzeros[0])
    rows=[]
    cols=[]
    m_norm_data=[]
    for elt in range(num_elts):
        i=nonzeros[0][elt]
        j=nonzeros[1][elt]
        rows.append(i)
        cols.append(j)
        if sums[i,0]>0:
            m_norm_data.append(float(mtogether[i,j])/(float(sums[i,0])))
        else:
            m_norm_data.append(0)
    return csr_matrix((m_norm_data,(rows,cols)),shape=mtogether.get_shape(),dtype=float)
项目:deep_architect    作者:negrinho    | 项目源码 | 文件源码
def _compute_features(self, model):

        bls = [ b[0] for b in tuple(model.repr_model()) ]

        nfeats_other = 1
        nfeats_ngrams = len(self.module_ngram_to_id)
        nfeats = nfeats_other + nfeats_ngrams
        feats = sp.dok_matrix((1, nfeats), dtype=np.float32)

        # other features
        feats[0, 0] = len(bls)

        # ngrams features
        for k in xrange(1, self.ngram_maxlen):
            for i in xrange(len(bls) - k):
                ngram = tuple(bls[i:i + k])

                if ngram in self.module_ngram_to_id:
                    ngram_i = self.module_ngram_to_id[ngram]
                    feats_i = nfeats_other + ngram_i

                    feats[0, feats_i] += 1.0

        return sp.csr_matrix(feats)
项目:hidi    作者:VEVO    | 项目源码 | 文件源码
def transform(self, df, **kwargs):
        """
        Takes a dataframe that has :code:`link_id`, :code:`item_id` and
        :code:`score` columns.

        Returns a SciPy :code:`csr_matrix`.

        :param df: The DataFrame to make a sparse matrix from. Must have
            :code:`link_id`, :code:`item_id`, and :code:`score` columns.
        :type df: pandas.DataFrame
        :rtype: scipy.sparse.csr_matrix
        """
        link_u = list(df.link_id.unique())
        item_u = list(df.item_id.unique())
        data = df.score.as_matrix()

        row = df.link_id.astype('category', categories=link_u).cat.codes
        col = df.item_id.astype('category', categories=item_u).cat.codes

        outshape = (len(link_u), len(item_u))
        in_tuple = (data, (row, col))
        kwargs = self.merge_kwargs(dict(links=link_u, items=item_u), kwargs)

        return csr_matrix(in_tuple, shape=outshape), kwargs
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def calc_pmi(counts, cds):
    """
    Calculates e^PMI; PMI without the log().
    """
    sum_w = np.array(counts.sum(axis=1))[:, 0]
    sum_c = np.array(counts.sum(axis=0))[0, :]
    if cds != 1:
        sum_c = sum_c ** cds
    sum_total = sum_c.sum()
    sum_w = np.reciprocal(sum_w)
    sum_c = np.reciprocal(sum_c)

    pmi = csr_matrix(counts)
    pmi = multiply_by_rows(pmi, sum_w)
    pmi = multiply_by_columns(pmi, sum_c)
    pmi = pmi * sum_total
    return pmi
项目:feagen    作者:ianlini    | 项目源码 | 文件源码
def write_data(self, result_dict):
        for key, result in six.iteritems(result_dict):
            if ss.isspmatrix(result):
                if np.isnan(result.data).any():
                    raise ValueError("data {} have nan".format(key))
            elif np.isnan(result).any():
                raise ValueError("data {} have nan".format(key))
            with SimpleTimer("Writing generated data {} to hdf5 file"
                             .format(key),
                             end_in_new_line=False):
                if key in self.h5f:
                    # self.h5f[key][...] = result
                    raise NotImplementedError("Overwriting not supported.")
                else:
                    if (isinstance(result, ss.csc_matrix)
                            or isinstance(result, ss.csr_matrix)):
                        # sparse matrix
                        h5sparse.Group(self.h5f).create_dataset(key,
                                                                data=result)
                    else:
                        self.h5f.create_dataset(key, data=result)
        self.h5f.flush()
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def __deflationMatrix(self, theSet, theHang, theIndex, withHanging=True, asOnes=False):
        reducedInd = dict() # final reduced index
        ii = 0
        I,J,V = [],[],[]
        for fx in sorted(theSet):
            if theIndex[fx] not in theHang:
                reducedInd[theIndex[fx]] = ii
                I += [theIndex[fx]]
                J += [ii]
                V += [1.0]
                ii += 1
        if withHanging:
            for hfkey in theHang.keys():
                hf = theHang[hfkey]
                I += [hfkey]*len(hf)
                J += [reducedInd[_[0]] for _ in hf]
                if asOnes:
                    V += [1.0]*len(hf)
                else:
                    V += [_[1] for _ in hf]
        return sp.csr_matrix((V,(I,J)), shape=(len(theSet), len(reducedInd)))
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def getInterpolationMat(self, loc, locType='CC', zerosOutside=False):
        """ Produces interpolation matrix

        :param numpy.ndarray loc: Location of points to interpolate to
        :param str locType: What to interpolate (see below)
        :rtype: scipy.sparse.csr_matrix
        :return: M, the interpolation matrix

        locType can be::

            'Ex'    -> x-component of field defined on edges
            'Ey'    -> y-component of field defined on edges
            'Ez'    -> z-component of field defined on edges
            'Fx'    -> x-component of field defined on faces
            'Fy'    -> y-component of field defined on faces
            'Fz'    -> z-component of field defined on faces
            'N'     -> scalar field defined on nodes
            'CC'    -> scalar field defined on cell centers
            'CCVx'  -> x-component of vector field defined on cell centers
            'CCVy'  -> y-component of vector field defined on cell centers
            'CCVz'  -> z-component of vector field defined on cell centers
        """
        return self._getInterpolationMat(loc, locType, zerosOutside)
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def _getInnerProductDeriv(self, prop, projType, doFast=True, invProp=False, invMat=False):
        """
        :param numpy.array prop: material property (tensor properties are possible) at each cell center (nC, (1, 3, or 6))
        :param str projType: 'F' for faces 'E' for edges
        :param bool doFast: do a faster implementation if available.
        :param bool invProp: inverts the material property
        :param bool invMat: inverts the matrix
        :rtype: scipy.sparse.csr_matrix
        :return: dMdm, the derivative of the inner product matrix (nE, nC*nA)
        """
        fast = None
        if hasattr(self, '_fastInnerProductDeriv') and doFast:
            fast = self._fastInnerProductDeriv(projType, prop, invProp=invProp, invMat=invMat)
        if fast is not None:
            return fast

        if invProp or invMat:
            raise NotImplementedError('inverting the property or the matrix is not yet implemented for this mesh/tensorType. You should write it!')

        tensorType = TensorType(self, prop)
        P = self._getInnerProductProjectionMatrices(projType, tensorType=tensorType)

        def innerProductDeriv(v):
            return self._getInnerProductDerivFunction(tensorType, P, projType, v)
        return innerProductDeriv
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_FaceInnerProductAnisotropicDerivInvProp(self):

        def fun(x):
            x = np.repeat(np.atleast_2d(x), 3, axis=0).T
            x0 = np.repeat(self.x0, 3, axis=0).T

            zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC))
            eye = sp.eye(self.mesh.nC)
            P = sp.vstack([sp.hstack([eye, zero, eye])])

            MfSig = self.mesh.getFaceInnerProduct(x, invProp=True)
            MfSigDeriv = self.mesh.getFaceInnerProductDeriv(x0,
                                                            invProp=True)
            return MfSig*self.face_vec, MfSigDeriv(self.face_vec) * P.T

        print('Testing FaceInnerProduct Anisotropic InvProp')
        return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7,
                                                     tolerance=TOLD,
                                                     plotIt=False))
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_FaceInnerProductAnisotropicDerivInvMat(self):

        def fun(x):
            x = np.repeat(np.atleast_2d(x), 3, axis=0).T
            x0 = np.repeat(self.x0, 3, axis=0).T

            zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC))
            eye = sp.eye(self.mesh.nC)
            P = sp.vstack([sp.hstack([eye, zero, eye])])

            MfSig = self.mesh.getFaceInnerProduct(x, invMat=True)
            MfSigDeriv = self.mesh.getFaceInnerProductDeriv(x0, invMat=True)
            return MfSig*self.face_vec, MfSigDeriv(self.face_vec) * P.T

        print('Testing FaceInnerProduct Anisotropic InvMat')
        return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7,
                                                     tolerance=TOLD,
                                                     plotIt=False))
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_FaceInnerProductAnisotropicDerivInvPropInvMat(self):

        def fun(x):
            x = np.repeat(np.atleast_2d(x), 3, axis=0).T
            x0 = np.repeat(self.x0, 3, axis=0).T

            zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC))
            eye = sp.eye(self.mesh.nC)
            P = sp.vstack([sp.hstack([eye, zero, eye])])

            MfSig = self.mesh.getFaceInnerProduct(x, invProp=True, invMat=True)
            MfSigDeriv = self.mesh.getFaceInnerProductDeriv(x0,
                                                            invProp=True,
                                                            invMat=True)
            return MfSig*self.face_vec, MfSigDeriv(self.face_vec) * P.T

        print('Testing FaceInnerProduct Anisotropic InvProp InvMat')
        return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7,
                                                     tolerance=TOLD,
                                                     plotIt=False))
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_EdgeInnerProductAnisotropicDeriv(self):

        def fun(x):
            x = np.repeat(np.atleast_2d(x), 3, axis=0).T
            x0 = np.repeat(self.x0, 3, axis=0).T

            zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC))
            eye = sp.eye(self.mesh.nC)
            P = sp.vstack([sp.hstack([zero, eye, zero])])

            MeSig = self.mesh.getEdgeInnerProduct(x.reshape(self.mesh.nC, 3))
            MeSigDeriv = self.mesh.getEdgeInnerProductDeriv(x0)
            return MeSig*self.edge_vec, MeSigDeriv(self.edge_vec) * P.T

        print('Testing EdgeInnerProduct Anisotropic')
        return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7,
                                                     tolerance=TOLD,
                                                     plotIt=False))
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_EdgeInnerProductAnisotropicDerivInvProp(self):

        def fun(x):
            x = np.repeat(np.atleast_2d(x), 3, axis=0).T
            x0 = np.repeat(self.x0, 3, axis=0).T

            zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC))
            eye = sp.eye(self.mesh.nC)
            P = sp.vstack([sp.hstack([zero, eye, zero])])

            MeSig = self.mesh.getEdgeInnerProduct(x, invProp=True)
            MeSigDeriv = self.mesh.getEdgeInnerProductDeriv(x0, invProp=True)
            return MeSig*self.edge_vec, MeSigDeriv(self.edge_vec) * P.T

        print('Testing EdgeInnerProduct Anisotropic InvProp')
        return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7,
                                                     tolerance=TOLD,
                                                     plotIt=False))
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_EdgeInnerProductAnisotropicDerivInvPropInvMat(self):

        def fun(x):
            x = np.repeat(np.atleast_2d(x), 3, axis=0).T
            x0 = np.repeat(self.x0, 3, axis=0).T

            zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC))
            eye = sp.eye(self.mesh.nC)
            P = sp.vstack([sp.hstack([zero, eye, zero])])

            MeSig = self.mesh.getEdgeInnerProduct(x, invProp=True, invMat=True)
            MeSigDeriv = self.mesh.getEdgeInnerProductDeriv(x0,
                                                            invProp=True,
                                                            invMat=True)
            return MeSig*self.edge_vec, MeSigDeriv(self.edge_vec) * P.T

        print('Testing EdgeInnerProduct Anisotropic InvProp InvMat')
        return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7,
                                                     tolerance=TOLD,
                                                     plotIt=False))
项目:wikilinks    作者:trovdimi    | 项目源码 | 文件源码
def zero_rows_norm_eff1(self,hypothesis, structur, k):
        #find zero sum rows in hypothesis
        #print 'sum hyp'
        norma = hypothesis.sum(axis=1)
        n_zeros = np.where(norma == 0)
        # norm the structure matrix
        i_index = list()
        j_index = list()
        values = list()
        for x, i in enumerate(n_zeros[0]):
            #if x % 1000 == 0:
            #    print x, len(n_zeros[0])
            links = np.where(structur[i,:]!=0)
            value = k / len(links[0])
            for j in links[0]:
                i_index.append(i)
                j_index.append(j)
                values.append(value)
        hypothesis= hypothesis+csr_matrix((values, (i_index, j_index)),
                                          shape=hypothesis.shape, dtype=np.float)
项目:ltls    作者:kjasinska    | 项目源码 | 文件源码
def assign(self):

        G, path_index = self.path_edge_matrix(self.paths)
        H = np.dot(G, G.T)

        perm_paths = self.seriation(H)

        Y = sparse.csr_matrix(self.Y, dtype=np.float32)
        D = np.dot(Y.transpose(), Y)
        perm_classes = self.seriation(D)

        classmap = {}
        pathmap = {}

        for i in xrange(len(perm_classes)):
            classmap[perm_classes[i]] = path_index[perm_paths[i]]
            pathmap[path_index[perm_paths[i]]] = perm_classes[i]
        return classmap, pathmap
项目:ltls    作者:kjasinska    | 项目源码 | 文件源码
def predict_topk(self, X, k=5):
        row = [[] for i in xrange(k)]
        col = [[] for i in xrange(k)]
        i = 0
        for x in self.iterate_dataset(X):
            edge_weight = self.evaluate_model(x, self.w)
            ranking = self.create_ranking(edge_weight, k)
            rank = 0
            for path in ranking:
                yhat = self.path_to_class_map[path[1]]
                col[rank].append(yhat)
                row[rank].append(i)
                rank += 1
            i += 1
        Yhats = []
        for i in xrange(k):
            row1 = np.array(row[i])
            col1 = np.array(col[i])
            data1 = np.array([1 for i in xrange(len(row[i]))])
            Yhat = sparse.csr_matrix((data1, (row1, col1)), shape=(X.shape[0], self.n_classes))
            Yhats.append(Yhat)
        return Yhats
项目:quoll    作者:LanguageMachines    | 项目源码 | 文件源码
def run(self):

        # open instances
        loader = numpy.load(self.in_vectors().path)
        instances = sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape = loader['shape'])

        # load feature names
        with open(self.in_featurenames().path,'r',encoding='utf-8') as infile:
            fn = infile.read().strip().split('\n')

        # calculate feature correlations
        feature_correlation = vectorizer.calculate_feature_correlation(instances)

        # write to file
        with open(self.out_feature_correlation().path,'w',encoding='utf-8') as out:
            # for fc in feature_correlation:
            #     print(fc,len(fc))
            #     out.write('\t'.join([str(fc[0]),str(fc[1]),fn[fc[0]],fn[fc[1]],str(fc[2]),str(fc[3]),str(fc[4])]) + '\n')
           out.write('\n'.join(['\t'.join([str(fc[0]),str(fc[1]),fn[fc[0]],fn[fc[1]],str(fc[2]),str(fc[3]),str(fc[4])]) for fc in feature_correlation]))
项目:quoll    作者:LanguageMachines    | 项目源码 | 文件源码
def run(self):

        # load vectors
        loader = numpy.load(self.in_vectors().path)
        vectors = sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape = loader['shape'])

        # load labels
        with open(self.in_labels().path,'r',encoding='utf-8') as file_in:
            labels = file_in.read().strip().split('\n')

        # combine vetors and labels
        if vectors.shape[0] != len(labels):
            print('instances and labels do not align, exiting program...')
        instances_list = vectors.toarray().tolist()
        for i, label in enumerate(labels):
            instances_list[i].append(label)

        # write to file
        lw = linewriter.Linewriter(instances_list)
        lw.write_csv(self.out_instances().path)
项目:quoll    作者:LanguageMachines    | 项目源码 | 文件源码
def run(self):

        # read in vectors
        loader = numpy.load(self.in_vectors().path)
        instances = sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape = loader['shape'])
        num_dimensions = instances.shape[1]

        # generate vectorpopulation
        random_vectorpopulation = ga_functions.random_vectorpopulation(num_dimensions, self.population_size)
        numpy.savez(self.out_vectorpopulation().path, data=random_vectorpopulation.data, indices=random_vectorpopulation.indices, indptr=random_vectorpopulation.indptr, shape=random_vectorpopulation.shape)

        # read in parameter options
        with open(self.in_parameter_options().path) as infile:
            lines = infile.read().rstrip().split('\n')
            parameter_options = [[i for i in range(len(line.split()))] for line in lines]

        # generate parameterpopulation
        random_parameterpopulation = ga_functions.random_parameterpopulation(parameter_options, self.population_size)
        numpy.savez(self.out_parameterpopulation().path, data=random_parameterpopulation.data, indices=random_parameterpopulation.indices, indptr=random_parameterpopulation.indptr, shape=random_parameterpopulation.shape)



################################################################################
###GA Iterator
################################################################################
项目:pysapc    作者:bioinfocao    | 项目源码 | 文件源码
def matixToRowColDataArr(X):
    """
    Convert sparse affinity/similarity matrix to numpy array format (row_array,col_array,data_array)
    So cython update function can work efficiently on it.
    """
    # convert to coo format (from lil,csr,csc)
    if isinstance(X, coo_matrix):
        X_coo=X
    elif (isinstance(X, csr_matrix)) or (isinstance(X, lil_matrix)):
        X_coo=X.tocoo()
    else: # others like numpy matrix could be convert to coo matrix
        X_coo=coo_matrix(X)
    # Upcast matrix to a floating point format (if necessary)
    X_coo=X_coo.asfptype() 
    # get row_array,col_array,data_array in their correct data type (for cython to work)
    row_array,col_array,data_array=X_coo.row.astype(np.int),X_coo.col.astype(np.int),X_coo.data

    return row_array,col_array,data_array
项目:pysapc    作者:bioinfocao    | 项目源码 | 文件源码
def denseToSparseAbvCutoff(self, denseMatrix, cutoff):
        """
        Remove datas in denseMatrix that is below cutoff, Convert the remaining datas into sparse matrix.
        Parameters:
        ----------------------
        denseMatrix: dense numpy matrix

        cutoff: int or float

        Returns
        ----------------------
        Scipy csr_matrix

        """
        maskArray=denseMatrix>=cutoff
        sparseMatrix=csr_matrix( (np.asarray(denseMatrix[maskArray]).reshape(-1),np.nonzero(maskArray)),\
                    shape=denseMatrix.shape)
        return sparseMatrix
项目:pysapc    作者:bioinfocao    | 项目源码 | 文件源码
def denseToSparseTopPercentage(self, denseMatrix, percentage=10.0):
        """
        Keep top percentage (such as 10%) of data points, remove all others. Convert into sparse matrix.
        Parameters:
        ----------------------
        denseMatrix: dense numpy matrix

        percentage: float, default is 10.0
            percentage of top data points to keep. default is 10.0% that is for 10000 data points keep top 1000.

        Returns
        ----------------------
        Scipy csr_matrix

        """
        rowN,colN=denseMatrix.shape
        totalN=rowN*colN
        topN=min(int(totalN*(percentage/100.0)), totalN)
        arr=np.array(denseMatrix.flatten())[0]
        cutoff=arr[arr.argsort()[-(topN)]]
        sparseMatrix=self.denseToSparseAbvCutoff(denseMatrix,cutoff)
        return sparseMatrix
项目:keras    作者:GeekLiB    | 项目源码 | 文件源码
def test_sparse_dot(self):
        x_d = np.array([0, 7, 2, 3], dtype=np.float32)
        x_r = np.array([0, 2, 2, 3], dtype=np.int64)
        x_c = np.array([4, 3, 2, 3], dtype=np.int64)

        x_sparse = sparse.csr_matrix((x_d, (x_r, x_c)), shape=(4, 5))
        x_dense = x_sparse.toarray()

        W = np.random.random((5, 4))

        backends = [KTF]
        if KTH.th_sparse_module:
            # Theano has some dependency issues for sparse
            backends.append(KTH)

        for K in backends:
            t_W = K.variable(W)
            k_s = K.eval(K.dot(K.variable(x_sparse), t_W))
            k_d = K.eval(K.dot(K.variable(x_dense), t_W))

            assert k_s.shape == k_d.shape
            assert_allclose(k_s, k_d, atol=1e-05)
项目:fastxml    作者:Refefer    | 项目源码 | 文件源码
def stream(self, fn, no_features=False):
        with open(fn, 'rt') as f:
            n_samples, n_feats, n_classes = list(map(int, f.readline().split()))
            for i, line in enumerate(f):
                if i == 0:
                    continue

                if self.verbose and i % 10000 == 0:
                    print("%s docs encoded" % i)

                res = self.quantize(line, no_features)
                if no_features:
                    yield {"labels": res}, res
                else:

                    (c, d), y = res
                    yield {"labels": y}, sp.csr_matrix((d, ([0] * len(d), c)), 
                            shape=(1, n_feats), dtype='float32'), y
项目:fastxml    作者:Refefer    | 项目源码 | 文件源码
def predict(self, X, fmt='sparse'):
        assert fmt in ('sparse', 'dict')
        s = []
        num = X.shape[0] if isinstance(X, sp.csr_matrix) else len(X)
        for i in range(num):
            Xi = X[i]
            mean = self.predictor.predict(Xi.data, Xi.indices, 
                    self.blend, self.gamma, self.leaf_probs)

            if fmt == 'sparse':
                s.append(mean)

            else:
                od = OrderedDict()
                for idx in reversed(mean.data.argsort()):
                    od[mean.indices[idx]] = mean.data[idx]

                s.append(od)

        if fmt == 'sparse':
            return sp.vstack(s)

        return s
项目:diamond    作者:stitchfix    | 项目源码 | 文件源码
def custom_block_diag(blocks):
    """ create csr sparse block diagonal matrix from identically-sized blocks

    Blocks don't need to be identical, but they do need to be the same shape.
    """
    L = len(blocks)
    K = blocks[0].shape[0]

    _data = [x.flatten() for x in blocks]
    m = np.arange(_data[0].shape[0])

    flat_data = np.zeros(L * len(m))
    for n in range(L):
        flat_data[m + n * len(m)] = _data[n][m]

    # now make the block diagonal array
    i = np.repeat(np.arange(L * K), K)
    j = np.tile(np.tile(np.arange(K), K), L) + np.repeat(np.arange(0, L * K, K), K * K)

    return sparse.csr_matrix((flat_data, (i, j)), shape=(L * K, L * K))
项目:StructEngPy    作者:zhuoju36    | 项目源码 | 文件源码
def solve_linear(model:Model.fem_model):
    K_bar,F_bar,index=model.K_,model.F_,model.index
    Dvec=model.D
    Logger.info('Solving linear model with %d DOFs...'%model.DOF)
    n_nodes=model.node_count
    try:
        #sparse matrix solution
        delta_bar = sl.spsolve(sp.csr_matrix(K_bar),F_bar,sym_pos=True)
        delta = delta_bar
        #fill original displacement vector
        prev = 0
        for idx in index:
            gap=idx-prev
            if gap>0:
                delta=np.insert(delta,prev,[0]*gap)
            prev = idx + 1               
            if idx==index[-1] and idx!=n_nodes-1:
                delta = np.insert(delta,prev, [0]*(n_nodes*6-prev))
        delta += Dvec
    except Exception as e:
        print(e)
        return None
    model.is_solved=True
    return delta
项目:polara    作者:Evfro    | 项目源码 | 文件源码
def get_test_matrix(self, test_data, shape, user_slice=None):
        num_users_all = shape[0]
        if user_slice:
            start, stop = user_slice
            stop = min(stop, num_users_all)
            num_users = stop - start
            coo_data = self._slice_test_data(test_data, start, stop)
        else:
            num_users = num_users_all
            coo_data = test_data

        user_coo, item_coo, fdbk_coo = coo_data
        num_items = shape[1]
        test_matrix = csr_matrix((fdbk_coo, (user_coo, item_coo)),
                                  shape=(num_users, num_items),
                                  dtype=fdbk_coo.dtype)
        return test_matrix, coo_data
项目:implicit    作者:benfred    | 项目源码 | 文件源码
def test_all_pairs_knn(self):
        counts = csr_matrix([[5, 1, 0, 9, 0, 0],
                             [0, 2, 1, 1, 0, 0],
                             [7, 0, 3, 0, 0, 0],
                             [1, 8, 0, 0, 0, 0],
                             [0, 0, 4, 4, 0, 0],
                             [0, 3, 0, 0, 0, 2],
                             [0, 0, 0, 0, 6, 0]], dtype=np.float64)
        counts = implicit.nearest_neighbours.tfidf_weight(counts).tocsr()

        # compute all neighbours using matrix dot product
        all_neighbours = counts.dot(counts.T).tocsr()
        K = 3
        knn = implicit.nearest_neighbours.all_pairs_knn(counts, K).tocsr()

        for rowid in range(counts.shape[0]):
            # make sure values match
            for colid, data in zip(knn[rowid].indices, knn[rowid].data):
                self.assertAlmostEqual(all_neighbours[rowid, colid], data)

            # make sure top K selected
            row = all_neighbours[rowid]
            self.assertEqual(set(knn[rowid].indices),
                             set(colid for colid, _ in
                                 sorted(zip(row.indices, row.data), key=lambda x: -x[1])[:K]))
项目:skan    作者:jni    | 项目源码 | 文件源码
def _csrget(indices, indptr, data, row, col):
    """Fast lookup of value in a scipy.sparse.csr_matrix format table.

    Parameters
    ----------
    indices, indptr, data : numpy arrays of int, int, float
        The CSR format data.
    row, col : int
        The matrix coordinates of the desired value.

    Returns
    -------
    dat: float
        The data value in the matrix.
    """
    start, end = indptr[row], indptr[row+1]
    for i in range(start, end):
        if indices[i] == col:
            return data[i]
    return 0.
项目:zippy    作者:securesystemslab    | 项目源码 | 文件源码
def _represent_ZGate(self, basis, **options):
        """Represent this qubits in the computational basis (ZGate).
        """
        format = options.get('format', 'sympy')
        n = 1
        definite_state = 0
        for it in reversed(self.qubit_values):
            definite_state += n*it
            n = n*2
        result = [0]*(2**self.dimension)
        result[int(definite_state)] = 1
        if format == 'sympy':
            return Matrix(result)
        elif format == 'numpy':
            import numpy as np
            return np.matrix(result, dtype='complex').transpose()
        elif format == 'scipy.sparse':
            from scipy import sparse
            return sparse.csr_matrix(result, dtype='complex').transpose()
项目:rTensor    作者:erichson    | 项目源码 | 文件源码
def nvecs(X, n, rank, do_flipsign=True, dtype=np.float):
    """
    Eigendecomposition of mode-n unfolding of a tensor
    """
    Xn = X.unfold(n)
    if issparse_mat(Xn):
        Xn = csr_matrix(Xn, dtype=dtype)
        Y = Xn.dot(Xn.T)
        _, U = eigsh(Y, rank, which='LM')
    else:
        Y = Xn.dot(Xn.T)
        N = Y.shape[0]
        _, U = eigh(Y, eigvals=(N - rank, N - 1))
        #_, U = eigsh(Y, rank, which='LM')
    # reverse order of eigenvectors such that eigenvalues are decreasing
    U = array(U[:, ::-1])
    # flip sign
    if do_flipsign:
        U = flipsign(U)
    return U
项目:Movie-Recommendation-System    作者:turq84    | 项目源码 | 文件源码
def get_item_representations(self, features=None):
        """
        Get the latent representations for items given model and features.
        Arguments
        ---------
        features: np.float32 csr_matrix of shape [n_items, n_item_features], optional
             Each row contains that item's weights over features.
             An identity matrix will be used if not supplied.
        Returns
        -------
        (item_biases, item_embeddings):
                (np.float32 array of shape n_items,
                 np.float32 array of shape [n_items, num_components]
            Biases and latent representations for items.
        """

        self._check_initialized()

        if features is None:
            return self.item_biases, self.item_embeddings

        features = sp.csr_matrix(features, dtype=CYTHON_DTYPE)

        return features * self.item_biases, features * self.item_embeddings
项目:Movie-Recommendation-System    作者:turq84    | 项目源码 | 文件源码
def get_user_representations(self, features=None):
        """
        Get the latent representations for users given model and features.
        Arguments
        ---------
        features: np.float32 csr_matrix of shape [n_users, n_user_features], optional
             Each row contains that user's weights over features.
             An identity matrix will be used if not supplied.
        Returns
        -------
        (user_biases, user_embeddings):
                (np.float32 array of shape n_users
                 np.float32 array of shape [n_users, num_components]
            Biases and latent representations for users.
        """

        self._check_initialized()

        if features is None:
            return self.user_biases, self.user_embeddings

        features = sp.csr_matrix(features, dtype=CYTHON_DTYPE)

        return features * self.user_biases, features * self.user_embeddings
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def mult(A, x, t=False):
    if(sp.issparse(A)):
        if(t):
            return(sp.csr_matrix(x).dot(A).transpose().todense().A[:, 0])
        return(A.dot(sp.csr_matrix(x).transpose()).todense().A[:, 0])
    if(t):
        return(x.dot(A))
    return(A.dot(x))
项目:recom-system    作者:tizot    | 项目源码 | 文件源码
def dataset_to_file(dataset, ngrams, filename='dataset'):
    """Save a dataset to a file.

    Args:
        dataset (:class:`np.ndarray`): the dataset to save (built with :func:`dataset_tools.build_dataset`)
        ngrams (list of strings): the ngrams used to compute the features
        filename (string): the filename without extension (will be .npz)
    """
    num_samples, num_entries, num_features = dataset.shape
    # We rehaspe the ndarray from 3D to 2D in order to write it into a text file
    # Each line of the file will correspond to one cited paper
    # Therefore, on each there will be the `num_entries` sets of features
    dataset_sp = sparse.csr_matrix(dataset.reshape(num_samples*num_entries, num_features))
    np.savez(filename, num_entries=np.array([num_entries]), data=dataset_sp.data, indices=dataset_sp.indices,
             indptr=dataset_sp.indptr, shape=dataset_sp.shape, ngrams=ngrams)
项目:SpicePy    作者:giaccone    | 项目源码 | 文件源码
def incidence_matrix(self):
        """
        'incidence_matrix' creates the branch-2-node incidence matrix

        :return: update self with self.A
        """

        # initialize incidence matrix terms
        a = []
        a_row = []
        a_col = []

        # cycle on branches (N1 and N2)
        for b, nodes in enumerate(self.nodes):
            # get nodes
            N1, N2 = nodes

            # detect connection to ground
            if N1 == 0:
                a.append(-1)
                a_row.append(N2 - 1)
                a_col.append(b)
            elif N2 == 0:
                a.append(1)
                a_row.append(N1 - 1)
                a_col.append(b)
            else:
                a.append(1)
                a_row.append(N1 - 1)
                a_col.append(b)
                a.append(-1)
                a_row.append(N2 - 1)
                a_col.append(b)

        # create conductance matrix
        self.A = csr_matrix((a, (a_row, a_col)))
项目:genomedisco    作者:kundajelab    | 项目源码 | 文件源码
def subsample_to_depth(m,seq_depth):
    if type(m) is csr_matrix:
        return subsample_to_depth_csr_upperTri(m,seq_depth)
    if type(m) is np.ndarray:
        return subsample_to_depth_array_upperTri(m,seq_depth)
项目:genomedisco    作者:kundajelab    | 项目源码 | 文件源码
def subsample_to_depth_csr_upperTri(m,seq_depth):
    depthm=m.sum()
    assert seq_depth<=depthm
    subsampling_prob=seq_depth/depthm

    vals=m.data
    num_elts=len(vals)
    m_subsampled_data=[]#np.random.binomial(value,subsampling_prob)
    elt=0
    while elt<num_elts:
        m_subsampled_data.append(np.random.binomial(vals[elt],subsampling_prob,1)[0])
        elt+=1
    return csr_matrix((m_subsampled_data, m.indices, m.indptr), dtype=int,shape=m.shape)
项目:genomedisco    作者:kundajelab    | 项目源码 | 文件源码
def load_sparse_csr(filename):
    loader = np.load(filename)
    return csr_matrix((  loader['data'], loader['indices'], loader['indptr']),
                         shape = loader['shape'])
项目:genomedisco    作者:kundajelab    | 项目源码 | 文件源码
def filter_nodes(m,to_remove):

    if len(to_remove)==0:
        return m

    nonzeros=m.nonzero()
    num_elts=len(nonzeros[0])

    r_idx=[i for i, x in enumerate(nonzeros[0]) if x not in to_remove]
    c_idx=[i for i, x in enumerate(nonzeros[1]) if x not in to_remove]
    keep=list(set(r_idx).union(set(c_idx)))

    coo_mat=m.tocoo()

    return csr_matrix((coo_mat.data[keep],(coo_mat.row[keep],coo_mat.col[keep])),shape=m.get_shape(),dtype=float)
项目:conec    作者:cod3licious    | 项目源码 | 文件源码
def _get_raw_context_matrix(self, sentences):
        """
        compute the raw context matrix with weighted counts
        it has an entry for every word in the vocabulary
        """
        # make the feature matrix
        featmat = lil_matrix((len(self.index2word), len(self.index2word)), dtype=float)
        for sentence_no, sentence in enumerate(sentences):
            if not sentence_no % self.progress:
                print("PROGRESS: at sentence #%i" % sentence_no)
            sentence = [word if word in self.word2index else None for word in sentence]
            # forward pass
            if self.forward:
                for i, word in enumerate(sentence[:-1]):
                    if word:
                        # get all words in the forward window
                        wwords = sentence[i + 1:min(i + 1 + self.window, len(sentence))]
                        for j, w in enumerate(wwords, 1):
                            if w:
                                featmat[self.word2index[word], self.word2index[w]] += 1.  # /j
            # backwards pass
            if self.backward:
                sentence_back = sentence[::-1]
                for i, word in enumerate(sentence_back[:-1]):
                    if word:
                        # get all words in the forward window of the backwards sentence
                        wwords = sentence_back[i + 1:min(i + 1 + self.window, len(sentence_back))]
                        for j, w in enumerate(wwords, 1):
                            if w:
                                featmat[self.word2index[word], self.word2index[w]] += 1.  # /j
        print("PROGRESS: through with all the sentences")
        self.featmat = csr_matrix(featmat)
项目:conec    作者:cod3licious    | 项目源码 | 文件源码
def get_context_matrix(self, fill_diag=True, norm='count'):
        """
        for every word in the sentences, create a vector that contains the counts of its context words
        (weighted by the distance to it with a max distance of window)
        Inputs:
            - norm: if the feature matrix should be normalized to contain ones on the diagonal
                    (--> average context vectors)
            - fill_diag: if diagonal of featmat should be filled with word counts
        Returns:
            - featmat: n_voc x n_voc sparse array with weighted context word counts for every word
        """
        featmat = deepcopy(self.featmat)
        # fill up the diagonals with the total counts of each word --> similarity matrix
        if fill_diag:
            featmat = lil_matrix(featmat)
            for i, word in enumerate(self.index2word):
                featmat[i, i] = self.wcounts[word]
            featmat = csr_matrix(featmat)
        assert ((featmat - featmat.transpose()).data**2).sum() < 2.220446049250313e-16, "featmat not symmetric"
        # possibly normalize by the max counts
        if norm == 'count':
            print("normalizing feature matrix by word count")
            normmat = lil_matrix(featmat.shape, dtype=float)
            normmat.setdiag([1. / self.wcounts[word] for word in self.index2word])
            featmat = csr_matrix(normmat) * featmat
        elif norm == 'max':
            print("normalizing feature matrix by max counts")
            normmat = lil_matrix(featmat.shape, dtype=float)
            normmat.setdiag([1. / v[0] if v[0] else 1. for v in featmat.max(axis=1).toarray()])
            featmat = csr_matrix(normmat) * featmat
        return featmat
项目:conec    作者:cod3licious    | 项目源码 | 文件源码
def get_local_context_matrix(self, tokens, forward=True, backward=True):
        """
        compute a local context matrix. it has an entry for every token, even if it is not present in the vocabulary
        Inputs:
            - tokens: list of words
        Returns:
            - local_featmat: size len(set(tokens)) x n_vocab
            - tok_idx: {word: index} to map the words from the tokens list to an index of the featmat
        """
        # for every token we still only need one representation per document
        tok_idx = {word: i for i, word in enumerate(set(tokens))}
        featmat = lil_matrix((len(tok_idx), len(self.index2word)), dtype=float)
        # clean out context words we don't know
        known_tokens = [word if word in self.word2index else None for word in tokens]
        # forward pass
        if self.forward:
            for i, word in enumerate(tokens[:-1]):
                # get all words in the forward window
                wwords = known_tokens[i + 1:min(i + 1 + self.window, len(known_tokens))]
                for j, w in enumerate(wwords, 1):
                    if w:
                        featmat[tok_idx[word], self.word2index[w]] += 1. / j
        # backwards pass
        if self.backward:
            tokens_back = tokens[::-1]
            known_tokens_back = known_tokens[::-1]
            for i, word in enumerate(tokens_back[:-1]):
                # get all words in the forward window of the backwards sentence, incl. word itself
                wwords = known_tokens_back[i + 1:min(i + 1 + self.window, len(known_tokens_back))]
                for j, w in enumerate(wwords, 1):
                    if w:
                        featmat[tok_idx[word], self.word2index[w]] += 1. / j
        featmat = csr_matrix(featmat)
        # normalize matrix
        normmat = lil_matrix((featmat.shape[0], featmat.shape[0]), dtype=float)
        normmat.setdiag([1. / v[0] if v[0] else 1. for v in featmat.max(axis=1).toarray()])
        featmat = csr_matrix(normmat) * featmat
        return featmat, tok_idx
项目:qcqp    作者:cvxgrp    | 项目源码 | 文件源码
def dc_split(self, use_eigen_split=False):
        n = self.P.shape[0]

        if self.P.nnz == 0: # P is zero
            P1, P2 = sp.csr_matrix((n, n)), sp.csr_matrix((n, n))
        if use_eigen_split:
            lmb, Q = LA.eigh(self.P.todense())
            P1 = sum([Q[:, i]*lmb[i]*Q[:, i].T for i in range(n) if lmb[i] > 0])
            P2 = sum([-Q[:, i]*lmb[i]*Q[:, i].T for i in range(n) if lmb[i] < 0])
            assert abs(np.sum(P1 - P2 - self.P)) < 1e-8
        else:
            lmb_min = np.min(LA.eigh(self.P.todense())[0])
            if lmb_min < 0:
                P1 = self.P + (1-lmb_min)*sp.identity(n)
                P2 = (1-lmb_min)*sp.identity(n)
            else:
                P1 = self.P
                P2 = sp.csr_matrix((n, n))
        f1 = QuadraticFunction(P1, self.q, self.r)
        f2 = QuadraticFunction(P2, sp.csc_matrix((n, 1)), 0)
        return (f1, f2)

    # Returns the one-variable function when regarding f(x)
    # as a quadratic expression in x[k].
    # f is an instance of QuadraticFunction
    # return value is an instance of OneVarQuadraticFunction
    # TODO: speedup
项目:ensemble_amazon    作者:kaz-Anova    | 项目源码 | 文件源码
def build_matrix(self, X, opt_y=None, weighting=None):
    if opt_y==None: 
        if weighting==None:
            return xgb.DMatrix(csr_matrix(X), missing =-999.0)
        else :
            #scale weight
            sumtotal=float(X.shape[0])
            sumweights=np.sum(weighting)            
            for s in range(0,len(weighting)):
                weighting[s]*=sumtotal/sumweights
            return xgb.DMatrix(csr_matrix(X), missing =-999.0, weight=weighting)            
    else:
        if weighting==None:           
            return xgb.DMatrix(csr_matrix(X), label=np.array(opt_y), missing =-999.0)
        else :
            sumtotal=float(X.shape[0])
            sumweights=np.sum(weighting)            
            for s in range(0,len(weighting)):
                weighting[s]*=sumtotal/sumweights             
            return xgb.DMatrix(csr_matrix(X), label=np.array(opt_y), missing =-999.0, weight=weighting)
项目:hidi    作者:VEVO    | 项目源码 | 文件源码
def dot(X, Y):
    if sparse.isspmatrix(X) and sparse.isspmatrix(Y):
        return X * Y
    elif sparse.isspmatrix(X) or sparse.isspmatrix(Y):
        return sparse.csr_matrix(X) * sparse.csr_matrix(Y)

    return np.asmatrix(X) * np.asmatrix(Y)