Python scipy.sparse 模块,lil_matrix() 实例源码

我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用scipy.sparse.lil_matrix()

项目:shalo    作者:henryre    | 项目源码 | 文件源码
def _get_data_batch(self, x_batch):
        # Construct LIL matrix
        X_lil = sparse.lil_matrix((len(x_batch), self.td))
        for j, x in enumerate(x_batch):
            for t in x:
                X_lil[j, t] += 1
        # Get batch data
        indices, ids, weights = [], [], []
        max_len = 0
        for i, (row, data) in enumerate(zip(X_lil.rows, X_lil.data)):
            # Dummy weight for all-zero row
            if len(row) == 0:
                indices.append((i, 0))
                ids.append(0)
                weights.append(0.0)
                continue
            # Update indices by position
            max_len = max(max_len, len(row))
            indices.extend((i, t) for t in xrange(len(row)))
            ids.extend(row)
            weights.extend(data)
        shape = (len(X_lil.rows), max_len)
        return [indices, shape, ids, weights], None
项目:pysapc    作者:bioinfocao    | 项目源码 | 文件源码
def matixToRowColDataArr(X):
    """
    Convert sparse affinity/similarity matrix to numpy array format (row_array,col_array,data_array)
    So cython update function can work efficiently on it.
    """
    # convert to coo format (from lil,csr,csc)
    if isinstance(X, coo_matrix):
        X_coo=X
    elif (isinstance(X, csr_matrix)) or (isinstance(X, lil_matrix)):
        X_coo=X.tocoo()
    else: # others like numpy matrix could be convert to coo matrix
        X_coo=coo_matrix(X)
    # Upcast matrix to a floating point format (if necessary)
    X_coo=X_coo.asfptype() 
    # get row_array,col_array,data_array in their correct data type (for cython to work)
    row_array,col_array,data_array=X_coo.row.astype(np.int),X_coo.col.astype(np.int),X_coo.data

    return row_array,col_array,data_array
项目:MIT-Thesis    作者:alec-heif    | 项目源码 | 文件源码
def test_serialize(self):
        from scipy.sparse import lil_matrix
        lil = lil_matrix((4, 1))
        lil[1, 0] = 1
        lil[3, 0] = 2
        sv = SparseVector(4, {1: 1, 3: 2})
        self.assertEqual(sv, _convert_to_vector(lil))
        self.assertEqual(sv, _convert_to_vector(lil.tocsc()))
        self.assertEqual(sv, _convert_to_vector(lil.tocoo()))
        self.assertEqual(sv, _convert_to_vector(lil.tocsr()))
        self.assertEqual(sv, _convert_to_vector(lil.todok()))

        def serialize(l):
            return ser.loads(ser.dumps(_convert_to_vector(l)))
        self.assertEqual(sv, serialize(lil))
        self.assertEqual(sv, serialize(lil.tocsc()))
        self.assertEqual(sv, serialize(lil.tocsr()))
        self.assertEqual(sv, serialize(lil.todok()))
项目:StageDP    作者:EastonWang    | 项目源码 | 文件源码
def vectorize(features, vocab):
    """ Transform a features list into a numeric vector
        with a given vocab

    :type dpvocab: dict
    :param dpvocab: vocab for distributional representation

    :type projmat: scipy.lil_matrix
    :param projmat: projection matrix for disrep
    """
    vec = lil_matrix((1, len(vocab)))

    for feat in features:
        try:
            fidx = vocab[feat]
            vec[0, fidx] += 1.0
        except KeyError:
            pass
    # Normalization
    vec = normalize(vec)
    return vec
项目:sequence-based-recommendations    作者:rdevooght    | 项目源码 | 文件源码
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None):
        ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)
        '''

        if exclude is None:
            exclude = []

        # Compile network if needed
        if not hasattr(self, 'predict_function'):
            self._compile_predict_function()

        # Prepare RNN input
        max_length_seq = sequence[-min(self.max_length, len(sequence)):]
        X = sp.lil_matrix((1, self.n_items), dtype=theano.config.floatX)
        for j in sequence:
            X[0, j[0]] = 1./len(sequence)**self.alpha

        # Run RNN
        if self.interactions_are_unique:
            should_exclude = [i[0] for i in sequence]
        else:
            should_exclude = []
        should_exclude.extend(exclude)
        return self.predict_function(X.tocsr(), k, should_exclude)
项目:hexmachina    作者:dnkrtz    | 项目源码 | 文件源码
def reduce_system(A, x, b, i):
    """Remove variable(s) i from system.
    Row(s) i of matrix 'b' must be set before this gets called."""

    # Convert all the lil format.
    A = sparse.lil_matrix(A)
    x = sparse.lil_matrix(x.reshape((len(x),1)))
    b = sparse.lil_matrix(b.reshape((len(b),1)))

    # Update rhs b (absorbs vars).
    for i in var_i:
        b = b - x[i,0] * A.getcol(i)

    # Drop rows form b vector.
    b = drop_rows(b, i) 
    # Drop rows from the x vector.
    x = drop_rows(x, i)
    # Drop rows from the A matrix.
    A = drop_rows(A, i)
    # Drop cols from the A matrix.
    A = drop_rows(A.transpose(), i)

    return A, x, b
项目:pbtranscript    作者:PacificBiosciences    | 项目源码 | 文件源码
def calc_sim_matrix(uc1, uc2):
    """
    Calculate matrix similarity between two sets of clusters.
    uc1: a set of clusters, dict{cluster_id:set(), cluster_id:set(), ...}
    uc2: a set of clusters, dict{cluster_id:set(), cluster_id:set(), ...}

    return (S, keys1, keys2, m, n), where
            S_ij = jacard_sim(uc1_i, uc2_j)
            keys1, keys2 = cluster ids in uc1 and uc2
            m, n = number of clusters in uc1 and uc2
    """
    keys1 = uc1.keys()
    keys1.sort()
    keys2 = uc2.keys()
    keys2.sort()
    m, n = len(keys1), len(keys2)
    S = lil_matrix((m+1, n+1))
    for i in xrange(m):
        for j in xrange(n):
            c1 = uc1[keys1[i]]
            c2 = uc2[keys2[j]]
            sim = jacard_sim(c1, c2)
            S[i, j] = sim
    return S, keys1, keys2, m, n
项目:pbtranscript    作者:PacificBiosciences    | 项目源码 | 文件源码
def convert_graph_connectivity_to_sparse(G, nodes):
    """
        Given a networkx graph, return sparse adjacency matrix S and H
        S and H are different in that S's entires contain edge weights
        (if there are multiple edges, behavior is overwrite),
        and H just has a 1 for every non-zero entry.

        NOTE: for now just use H, so returns None,H
    """
    n = G.number_of_nodes()
#    S = sparse.lil_matrix((n,n))
    H = sparse.lil_matrix((n, n))
    nodes_to_index = dict(zip(nodes, range(n)))
    for e in G.edges_iter(data=True):
        i = nodes_to_index[e[0]]
        j = nodes_to_index[e[1]]
        H[i, j] = 1
        H[j, i] = 1
    # we do a lot of column-slicing, so convert to CSC for efficiency
    H = H.tocsr()

    return None, H
#    return S,H
项目:ANNarchy    作者:vitay    | 项目源码 | 文件源码
def sparse_random_matrix(pre, post, p, weight, delay=0):
    """
    Returns a sparse (lil) matrix to connect the pre and post populations with the probability p and the value weight.
    """
    try:
        from scipy.sparse import lil_matrix
    except:
        Global._warning("scipy is not installed, sparse matrices won't work")
        return None
    from random import sample
    W=lil_matrix((pre, post))
    for i in xrange(pre):
        k=np.random.binomial(post,p,1)[0]
        W.rows[i]=sample(xrange(post),k)
        W.data[i]=[weight]*k

    return W
项目:cebl    作者:idfah    | 项目源码 | 文件源码
def setIWMult(self, mult):
        m = mult / float(self.iwMult)

        if self.sparse:
            v = spsparse.lil_matrix((self.hw.shape[0], self.hw.shape[0]))
            v.setdiag(np.ones(v.shape[0], dtype=self.dtype))
            v.setdiag([m,]*(self.nIn+1))

            self.hw = v*self.hw

            # good for debugging above
            #w = self.hw.todense()
            #w[:self.nIn+1,:] *= m
            #self.hw = spsparse.csc_matrix(w)
        else:
            self.hw[:self.nIn+1,:] *= m

        self.iwMult = mult
项目:cebl    作者:idfah    | 项目源码 | 文件源码
def setRWScale(self, x, scale):
        # why does this method not work? XXX - idfah
        m = scale / float(self.rwScale)

        if self.sparse:
            v = spsparse.lil_matrix((self.hw.shape[0], self.hw.shape[0]))
            d = np.ones(v.shape[0], dtype=self.dtype)
            d[self.nIn+1:] = m
            v.setdiag(d)

            self.hw = v*self.hw

            # good for debugging above
            #w = self.hw.todense()
            #w[self.nIn+1:,:] *= m
            #self.hw = spsparse.csc_matrix(w)
        else:
            self.hw[self.nIn+1:,:] *= m

        self.scaleIW(x)
项目:vrec    作者:tn1031    | 项目源码 | 文件源码
def mc_logdet(train, mu=1., gamma=5, maxitr=2):
    m, n = train.shape
    nonzero_row, nonzero_col = train.nonzero()
    nonzero_index = zip(nonzero_row, nonzero_col)
    prevX = train#.toarray()
    X = None
    Y = train#.toarray()
    Z = sp.lil_matrix(np.zeros((m, n)))

    for itr in range(maxitr):
        X = update_X(Y - Z / mu, mu / 2., 6)

        for idx in nonzero_index:
            X[idx[0], idx[1]] = train[idx[0], idx[1]]
        Y = maximum(X + (Z/mu), sp.lil_matrix(np.zeros(X.shape)))
        Z = Z + mu * (X - Y)
        mu *= gamma

        #err = np.sum((X-prevX)**2)/np.sum(prevX**2)
        #print err

        prevX = X

    return X
项目:icing    作者:slipguru    | 项目源码 | 文件源码
def matrix_to_row_col_data(X):
    """Convert sparse affinity matrix to arrays.

    .. note:: Deprecated.
          It will be removed in icing 0.2. This is now done by check_array from
          numpy.
    """
    # convert to coo format (from lil,csr,csc)
    if isinstance(X, coo_matrix):
        X_coo = X
    elif (isinstance(X, csr_matrix)) or (isinstance(X, lil_matrix)):
        X_coo = X.tocoo()
    else:  # others like numpy matrix could be convert to coo matrix
        X_coo = coo_matrix(X)
    # Upcast matrix to a floating point format (if necessary)
    X_coo = X_coo.asfptype()
    return X_coo.row.astype(np.int), X_coo.col.astype(np.int), X_coo.data
项目:histonets-cv    作者:sul-cidr    | 项目源码 | 文件源码
def grid_to_adjacency_matrix(grid, neighborhood=8):
    """Convert a boolean grid where 0's express holes and 1's connected pixel
    into a sparse adjacency matrix representing the grid-graph.
    Neighborhood for each pixel is calculated from its 4 or 8 more immediate
    surrounding neighbors (defaults to 8)."""
    coords = np.argwhere(grid)
    coords_x = coords[:, 0]
    coords_y = coords[:, 1]
    # lil is the most performance format to build a sparse matrix iteratively
    matrix = sparse.lil_matrix((0, coords.shape[0]), dtype=np.uint8)
    if neighborhood == 4:
        for px, py in coords:
            row = (((px == coords_x) & (np.abs(py - coords_y) == 1)) |
                   ((np.abs(px - coords_x) == 1) & (py == coords_y)))
            matrix = sparse.vstack([matrix, row])
    else:
        for px, py in coords:
            row = (np.abs(px - coords_x) <= 1) & (np.abs(py - coords_y) <= 1)
            matrix = sparse.vstack([matrix, row])
    matrix.setdiag(1)
    # Once built, we convert it to compressed sparse columns or rows
    return matrix.tocsc()  # or .tocsr()
项目:ConversationalQA    作者:btjhjeon    | 项目源码 | 文件源码
def process_text(text, dic, r, grams):
    """
    Return sparse feature matrix
    """
    X = lil_matrix((len(text), len(dic)))
    for i, l in enumerate(text):
        tokens = tokenize(l, grams)
        indexes = []
        for t in tokens:
            try:
                indexes += [dic[t]]
            except KeyError:
                pass
        indexes = list(set(indexes))
        indexes.sort()
        for j in indexes:
            X[i,j] = r[j]
    return csr_matrix(X)
项目:pyspark    作者:v-v-vishnevskiy    | 项目源码 | 文件源码
def test_serialize(self):
        from scipy.sparse import lil_matrix
        lil = lil_matrix((4, 1))
        lil[1, 0] = 1
        lil[3, 0] = 2
        sv = SparseVector(4, {1: 1, 3: 2})
        self.assertEqual(sv, _convert_to_vector(lil))
        self.assertEqual(sv, _convert_to_vector(lil.tocsc()))
        self.assertEqual(sv, _convert_to_vector(lil.tocoo()))
        self.assertEqual(sv, _convert_to_vector(lil.tocsr()))
        self.assertEqual(sv, _convert_to_vector(lil.todok()))

        def serialize(l):
            return ser.loads(ser.dumps(_convert_to_vector(l)))
        self.assertEqual(sv, serialize(lil))
        self.assertEqual(sv, serialize(lil.tocsc()))
        self.assertEqual(sv, serialize(lil.tocsr()))
        self.assertEqual(sv, serialize(lil.todok()))
项目:finite-element-course    作者:finite-element    | 项目源码 | 文件源码
def assemble(fs, f):
    """Assemble the finite element system for the Helmholtz problem given
    the function space in which to solve and the right hand side
    function."""

    raise NotImplementedError

    # Create an appropriate (complete) quadrature rule.

    # Tabulate the basis functions and their gradients at the quadrature points.

    # Create the left hand side matrix and right hand side vector.
    # This creates a sparse matrix because creating a dense one may
    # well run your machine out of memory!
    A = sp.lil_matrix((fs.node_count, fs.node_count))
    l = np.zeros(fs.node_count)

    # Now loop over all the cells and assemble A and l

    return A, l
项目:SFA    作者:Bollegala    | 项目源码 | 文件源码
def learnProjection(sourceDomain, targetDomain):
    """
    Learn the projection matrix and store it to a file. 
    """
    h = 50 # no. of latent dimensions.
    print "Loading the bipartite matrix...",
    coocData = sio.loadmat("../work/%s-%s/DSxDI.mat" % (sourceDomain, targetDomain))
    M = sp.lil_matrix(coocData['DSxDI'])
    (nDS, nDI) = M.shape
    print "Done."
    print "Computing the Laplacian...",
    D1 = sp.lil_matrix((nDS, nDS), dtype=np.float64)
    D2 = sp.lil_matrix((nDI, nDI), dtype=np.float64)
    for i in range(0, nDS):
        D1[i,i] = 1.0 / np.sqrt(np.sum(M[i,:].data[0]))
    for i in range(0, nDI):
        D2[i,i] = 1.0 / np.sqrt(np.sum(M[:,i].T.data[0]))
    B = (D1.tocsr().dot(M.tocsr())).dot(D2.tocsr())
    print "Done."
    print "Computing SVD...",
    ut, s, vt = sparsesvd(B.tocsc(), h)
    sio.savemat("../work/%s-%s/proj.mat" % (sourceDomain, targetDomain), {'proj':ut.T})
    print "Done."    
    pass
项目:pymake    作者:dtrckd    | 项目源码 | 文件源码
def rescal(X, K):

    ## Set logging to INFO to see RESCAL information
    #logging.basicConfig(level=logging.INFO)

    ## Load Matlab data and convert it to dense tensor format
    #T = loadmat('data/alyawarra.mat')['Rs']
    #X = [lil_matrix(T[:, :, k]) for k in range(T.shape[2])]

    X = [sp.sparse.csr_matrix(X)]
    A, R, fit, itr, exectimes = rescal_als(X, K, init='nvecs', lambda_A=10, lambda_R=10)

    theta =  A.dot(R).dot(A.T)
    Y = 1 / (1 + np.exp(-theta))
    Y =  Y[:,0,:]
    Y[Y <= 0.5] = 0
    Y[Y > 0.5] = 1
    #Y = sp.stats.bernoulli.rvs(Y)
    return Y
项目:chainer-graph-cnn    作者:pfnet-research    | 项目源码 | 文件源码
def create_laplacian(W, normalize=True):
    n = W.shape[0]
    W = ss.csr_matrix(W)
    WW_diag = W.dot(ss.csr_matrix(np.ones((n, 1)))).todense()
    if normalize:
        WWds = np.sqrt(WW_diag)
        # Let the inverse of zero entries become zero.
        WWds[WWds == 0] = np.float("inf")
        WW_diag_invroot = 1. / WWds
        D_invroot = ss.lil_matrix((n, n))
        D_invroot.setdiag(WW_diag_invroot)
        D_invroot = ss.csr_matrix(D_invroot)
        I = scipy.sparse.identity(W.shape[0], format='csr', dtype=W.dtype)
        L = I - D_invroot.dot(W.dot(D_invroot))
    else:
        D = ss.lil_matrix((n, n))
        D.setdiag(WW_diag)
        D = ss.csr_matrix(D)
        L = D - W

    return L.astype(W.dtype)
项目:abc    作者:daemon    | 项目源码 | 文件源码
def _init_svd(self, dictionary, definitions):
    self.td_matrix = lil_matrix((len(dictionary), self.n_terms))
    for defn, i in zip(definitions, range(len(definitions))):
      if i % 100 == 0:
        print("Building term-document matrix: {} / {}".format(i, len(dictionary)), end="\r")
      self.td_matrix[i, :] = self.compute_freq_vec(dictionary[defn])
    self.td_matrix = self.td_matrix.transpose().tocsr()
    print()
    for i in range(self.n_terms):
      n = float(self.td_matrix[i, :].getnnz())
      if i % 100 == 0:
        print("Applying td-idf: {} / {}".format(i, self.n_terms), end="\r")
      if n > 0:
        self.td_matrix[i, :] *= np.log(len(dictionary) / n)
    print()
    print("Performing rank reduction...")
    self.u, self.s, self.vt = randomized_svd(self.td_matrix, 50, transpose=False)
    self.doc_matrix = np.matmul(np.diag(self.s), self.vt).transpose()
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def random_lil(shape, dtype, nnz):
    rval = sp.lil_matrix(shape, dtype=dtype)
    huge = 2 ** 30
    for k in range(nnz):
        # set non-zeros in random locations (row x, col y)
        idx = numpy.random.randint(1, huge+1, size=2) % shape
        value = numpy.random.rand()
        # if dtype *int*, value will always be zeros!
        if "int" in dtype:
            value = int(value * 100)
        # The call to tuple is needed as scipy 0.13.1 do not support
        # ndarray with lenght 2 as idx tuple.
        rval.__setitem__(
            tuple(idx),
            value)
    return rval
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def random_lil(shape, dtype, nnz):
    rval = sp.lil_matrix(shape, dtype=dtype)
    huge = 2 ** 30
    for k in range(nnz):
        # set non-zeros in random locations (row x, col y)
        idx = numpy.random.randint(1, huge + 1, size=2) % shape
        value = numpy.random.rand()
        # if dtype *int*, value will always be zeros!
        if "int" in dtype:
            value = int(value * 100)
        # The call to tuple is needed as scipy 0.13.1 do not support
        # ndarray with lenght 2 as idx tuple.
        rval.__setitem__(
            tuple(idx),
            value)
    return rval
项目:cluster_paraphrases    作者:acocos    | 项目源码 | 文件源码
def load_pickled_vecs(filename, returnpp=False):
    '''
    Load word vecs from word-paraphrase matrix
    :param filename: str
    :return: dict, list, dict
    '''
    with open(filename, 'rb') as fin:
        word2ind, ordered_vocab, w2p = pickle.load(fin)

    word_vecs = {}
    N = len(word2ind.keys())

    for w, d in w2p.iteritems():
        lil_v = sparse.lil_matrix((1,N), dtype='float')
        for p, sc in d.iteritems():
            try:
                lil_v[0,word2ind[p]] = sc
            except KeyError:
                print 'Error loading vector:', w, p, sc
        word_vecs[w] = sparse.csr_matrix(lil_v)  # is this slow?
    if returnpp:
        return word_vecs, N, w2p
    else:
        return word_vecs, N
项目:orange3-recommendation    作者:biolab    | 项目源码 | 文件源码
def compute_mrr(self, data, users, queries=None):

        # Check data type
        if isinstance(data, lil_matrix):
            pass
        elif isinstance(data, Table):
            # Preprocess Orange.data.Table and transform it to sparse
            data, order, shape = preprocess(data)
            data = table2sparse(data, shape, order, m_type=lil_matrix)
        else:
            raise TypeError('Invalid data type')

        # Make predictions
        y_pred = self(users)

        # Get relevant items for the user[i]
        if queries is None:
            queries = []
            add_items = queries.append
            for u in users:
                add_items(np.asarray(data.rows[u]))

        # Compute Mean Reciprocal Rank (MRR)
        mrr = MeanReciprocalRank(results=y_pred, query=queries)
        return mrr, queries
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_scikit_vs_scipy():
    # Test scikit linkage with full connectivity (i.e. unstructured) vs scipy
    n, p, k = 10, 5, 3
    rng = np.random.RandomState(0)

    # Not using a lil_matrix here, just to check that non sparse
    # matrices are well handled
    connectivity = np.ones((n, n))
    for linkage in _TREE_BUILDERS.keys():
        for i in range(5):
            X = .1 * rng.normal(size=(n, p))
            X -= 4. * np.arange(n)[:, np.newaxis]
            X -= X.mean(axis=1)[:, np.newaxis]

            out = hierarchy.linkage(X, method=linkage)

            children_ = out[:, :2].astype(np.int)
            children, _, n_leaves, _ = _TREE_BUILDERS[linkage](X, connectivity)

            cut = _hc_cut(k, children, n_leaves)
            cut_ = _hc_cut(k, children_, n_leaves)
            assess_same_labelling(cut, cut_)

    # Test error management in _hc_cut
    assert_raises(ValueError, _hc_cut, n_leaves + 1, children, n_leaves)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_label_binarize_multilabel():
    y_ind = np.array([[0, 1, 0], [1, 1, 1], [0, 0, 0]])
    classes = [0, 1, 2]
    pos_label = 2
    neg_label = 0
    expected = pos_label * y_ind
    y_sparse = [sparse_matrix(y_ind)
                for sparse_matrix in [coo_matrix, csc_matrix, csr_matrix,
                                      dok_matrix, lil_matrix]]

    for y in [y_ind] + y_sparse:
        yield (check_binarized_results, y, classes, pos_label, neg_label,
               expected)

    assert_raises(ValueError, label_binarize, y, classes, neg_label=-1,
                  pos_label=pos_label, sparse_output=True)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_score_samples():
    # Test score_samples (pseudo-likelihood) method.
    # Assert that pseudo-likelihood is computed without clipping.
    # See Fabian's blog, http://bit.ly/1iYefRk
    rng = np.random.RandomState(42)
    X = np.vstack([np.zeros(1000), np.ones(1000)])
    rbm1 = BernoulliRBM(n_components=10, batch_size=2,
                        n_iter=10, random_state=rng)
    rbm1.fit(X)
    assert_true((rbm1.score_samples(X) < -300).all())

    # Sparse vs. dense should not affect the output. Also test sparse input
    # validation.
    rbm1.random_state = 42
    d_score = rbm1.score_samples(X)
    rbm1.random_state = 42
    s_score = rbm1.score_samples(lil_matrix(X))
    assert_almost_equal(d_score, s_score)

    # Test numerical stability (#2785): would previously generate infinities
    # and crash with an exception.
    with np.errstate(under='ignore'):
        rbm1.score_samples([np.arange(1000) * 100])
项目:VERSE    作者:jakelever    | 项目源码 | 文件源码
def doSplitAcrossSentences(self,examples,featureNamesOnly):
        if featureNamesOnly:
            return ["splitAcrossSentences"]

        out = lil_matrix((len(examples),1))
        for i,example in enumerate(examples):
            sentenceid0,_ = example.arguments[0]
            sameSentence = True
            for thisSID,_ in example.arguments:
                if sentenceid0 != thisSID:
                    sameSentence = False
                    break

            if not sameSentence:
                out[i,0] = 1

        return coo_matrix(out)
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def __init__(self, genes, bcs, dtype='int32'):
        self.genes = list(genes)
        self.genes_dim = len(self.genes)
        self.gene_ids_map = {gene.id:i for i, gene in enumerate(self.genes)}

        self.bcs = list(bcs)
        self.bcs_dim = len(self.bcs)
        self.bcs_map = {bc:i for i, bc in enumerate(self.bcs)}

        self.dtype = dtype
        self.m = sp_sparse.lil_matrix((self.genes_dim, self.bcs_dim), dtype=dtype)
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def tolil(self):
        if type(self.m) is not sp_sparse.lil_matrix:
            self.m = self.m.tolil()
项目:IDPanel    作者:CylanceSPEAR    | 项目源码 | 文件源码
def vectorize_with_sparse_features(sparse_feature_set, feature_count, c2_data):
    vector = lil_matrix((1, feature_count), dtype=np.float)
    for index, (offset, code, ssdeep_hash) in sparse_feature_set:
        if offset not in c2_data:
            continue
        if c2_data[offset]["code"] == code:
            d = ssdeep.compare(c2_data[offset]["content_ssdeep"], ssdeep_hash)
            d = float(d) / float(100.0)
            vector[0, index] = d

    return vector
项目:conec    作者:cod3licious    | 项目源码 | 文件源码
def _get_raw_context_matrix(self, sentences):
        """
        compute the raw context matrix with weighted counts
        it has an entry for every word in the vocabulary
        """
        # make the feature matrix
        featmat = lil_matrix((len(self.index2word), len(self.index2word)), dtype=float)
        for sentence_no, sentence in enumerate(sentences):
            if not sentence_no % self.progress:
                print("PROGRESS: at sentence #%i" % sentence_no)
            sentence = [word if word in self.word2index else None for word in sentence]
            # forward pass
            if self.forward:
                for i, word in enumerate(sentence[:-1]):
                    if word:
                        # get all words in the forward window
                        wwords = sentence[i + 1:min(i + 1 + self.window, len(sentence))]
                        for j, w in enumerate(wwords, 1):
                            if w:
                                featmat[self.word2index[word], self.word2index[w]] += 1.  # /j
            # backwards pass
            if self.backward:
                sentence_back = sentence[::-1]
                for i, word in enumerate(sentence_back[:-1]):
                    if word:
                        # get all words in the forward window of the backwards sentence
                        wwords = sentence_back[i + 1:min(i + 1 + self.window, len(sentence_back))]
                        for j, w in enumerate(wwords, 1):
                            if w:
                                featmat[self.word2index[word], self.word2index[w]] += 1.  # /j
        print("PROGRESS: through with all the sentences")
        self.featmat = csr_matrix(featmat)
项目:conec    作者:cod3licious    | 项目源码 | 文件源码
def get_context_matrix(self, fill_diag=True, norm='count'):
        """
        for every word in the sentences, create a vector that contains the counts of its context words
        (weighted by the distance to it with a max distance of window)
        Inputs:
            - norm: if the feature matrix should be normalized to contain ones on the diagonal
                    (--> average context vectors)
            - fill_diag: if diagonal of featmat should be filled with word counts
        Returns:
            - featmat: n_voc x n_voc sparse array with weighted context word counts for every word
        """
        featmat = deepcopy(self.featmat)
        # fill up the diagonals with the total counts of each word --> similarity matrix
        if fill_diag:
            featmat = lil_matrix(featmat)
            for i, word in enumerate(self.index2word):
                featmat[i, i] = self.wcounts[word]
            featmat = csr_matrix(featmat)
        assert ((featmat - featmat.transpose()).data**2).sum() < 2.220446049250313e-16, "featmat not symmetric"
        # possibly normalize by the max counts
        if norm == 'count':
            print("normalizing feature matrix by word count")
            normmat = lil_matrix(featmat.shape, dtype=float)
            normmat.setdiag([1. / self.wcounts[word] for word in self.index2word])
            featmat = csr_matrix(normmat) * featmat
        elif norm == 'max':
            print("normalizing feature matrix by max counts")
            normmat = lil_matrix(featmat.shape, dtype=float)
            normmat.setdiag([1. / v[0] if v[0] else 1. for v in featmat.max(axis=1).toarray()])
            featmat = csr_matrix(normmat) * featmat
        return featmat
项目:conec    作者:cod3licious    | 项目源码 | 文件源码
def get_local_context_matrix(self, tokens, forward=True, backward=True):
        """
        compute a local context matrix. it has an entry for every token, even if it is not present in the vocabulary
        Inputs:
            - tokens: list of words
        Returns:
            - local_featmat: size len(set(tokens)) x n_vocab
            - tok_idx: {word: index} to map the words from the tokens list to an index of the featmat
        """
        # for every token we still only need one representation per document
        tok_idx = {word: i for i, word in enumerate(set(tokens))}
        featmat = lil_matrix((len(tok_idx), len(self.index2word)), dtype=float)
        # clean out context words we don't know
        known_tokens = [word if word in self.word2index else None for word in tokens]
        # forward pass
        if self.forward:
            for i, word in enumerate(tokens[:-1]):
                # get all words in the forward window
                wwords = known_tokens[i + 1:min(i + 1 + self.window, len(known_tokens))]
                for j, w in enumerate(wwords, 1):
                    if w:
                        featmat[tok_idx[word], self.word2index[w]] += 1. / j
        # backwards pass
        if self.backward:
            tokens_back = tokens[::-1]
            known_tokens_back = known_tokens[::-1]
            for i, word in enumerate(tokens_back[:-1]):
                # get all words in the forward window of the backwards sentence, incl. word itself
                wwords = known_tokens_back[i + 1:min(i + 1 + self.window, len(known_tokens_back))]
                for j, w in enumerate(wwords, 1):
                    if w:
                        featmat[tok_idx[word], self.word2index[w]] += 1. / j
        featmat = csr_matrix(featmat)
        # normalize matrix
        normmat = lil_matrix((featmat.shape[0], featmat.shape[0]), dtype=float)
        normmat.setdiag([1. / v[0] if v[0] else 1. for v in featmat.max(axis=1).toarray()])
        featmat = csr_matrix(normmat) * featmat
        return featmat, tok_idx
项目:seqhawkes    作者:mlukasik    | 项目源码 | 文件源码
def infections_count(
    infecting_node,
    infected_node,
    infecting_vec,
    infected_vec,
    D,
    ):
    '''
    For each pair of nodes counts infections between them.    

    infecting_node - vector, mapping events to nodes of events that infected 
        them
    infected_node - vector of integers, mapping events to nodes where they 
        occurred
    infecting_vec - vector, mapping events to ids of events that infected them
    infected_vec - vector of integers, mapping events to ids
    D - number of nodes

    returns: matrix of counts
    '''

    infections_mat = sp.lil_matrix((D, D), dtype=np.int)
    for (infected_u, infecting_u, infected_e, infecting_e) in \
        izip(infected_node, infecting_node, infected_vec,
             infecting_vec):
        if infected_e != infecting_e:
            infections_mat[infecting_u, infected_u] += 1
    return infections_mat
项目:seqhawkes    作者:mlukasik    | 项目源码 | 文件源码
def one_hot_sparse(index_array, num_values):
    m = sp.lil_matrix((num_values, index_array.shape[0]), dtype=np.bool)
    for i in range(index_array.shape[0]):
        m[index_array[i], i] = 1
    return m.tocsr()
项目:gae    作者:tkipf    | 项目源码 | 文件源码
def load_data(dataset):
    # load the data: x, tx, allx, graph
    names = ['x', 'tx', 'allx', 'graph']
    objects = []
    for i in range(len(names)):
        objects.append(pkl.load(open("data/ind.{}.{}".format(dataset, names[i]))))
    x, tx, allx, graph = tuple(objects)
    test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    return adj, features
项目:Large-scale-bundle-adjustment-in-scipy    作者:bachmmmar    | 项目源码 | 文件源码
def bundle_adjustment_sparsity(n_cameras, n_points, camera_indices, point_indices):
    m = camera_indices.size * 2
    n = n_cameras * 9 + n_points * 3
    A = lil_matrix((m, n), dtype=int)

    i = np.arange(camera_indices.size)
    for s in range(9):
        A[2 * i, camera_indices * 9 + s] = 1
        A[2 * i + 1, camera_indices * 9 + s] = 1

    for s in range(3):
        A[2 * i, n_cameras * 9 + point_indices * 3 + s] = 1
        A[2 * i + 1, n_cameras * 9 + point_indices * 3 + s] = 1

    return A
项目:pysapc    作者:bioinfocao    | 项目源码 | 文件源码
def fit(self, X, preference=None):
        """
        Apply Sparse Affinity Propagation (SAP) to precomputed sparse affinity/similarity matrix X

        Parameters
        ----------------------
        X: coo_matrix,csr_matrix,lil_matrix, precomputed sparse affinity/similarity matrix
           (affinity/similarity could be cosine, pearson, euclidean distance, or others).
           Please note that affinity/similarity matrix doesn't need to be symmetric, s(A,B) can be different from s(B,A).
           In fact it could be that s(A,B) exist and s(B,A) not exist in the sparse affinity/similarity matrix

        preference: a numeric scalar(float), or a str of 'min'/'median', or a list/numpy 1D array(length of samples)
            the preference of a datapoint K, p(K), which will set to the affinity/similarity matrix s(K,K), is the 
            priori suitability of datapoint K to serve as an exemplar (cluster center), Higher values of preference will lead to more exemplars (cluster centers).
            A good initial choice is minimum('min') or median('median') of the full dense affinity/similarity matrix.
            Plsease note that minimum('min') or median('median') of sparse affinity/similarity matrix,
            which is top of the full dense affinity/similarity matrix, is not a good choice.  

        Notes
        ----------------------
        After fitting, the clustering result (exemplars/ cluster centers) could be accessed by exemplars_ Attribute
        Or use fit_predict function, which will return a list of exemplars (row index of affinity/similarity matrix)
        """
        if (self.preference is None) and (preference is None):
            raise ValueError("Preference should be a numeric scalar, or a string of 'min' / 'median',\
            or a list/np 1D array(length of samples).\n Your input preference is: {0})".format(str(prefernce)))
        if preference is not None:
            preference_input=preference
        else:
            preference_input=self.preference
        row_array,col_array,data_array=matixToRowColDataArr(X)
        self.exemplars_=sparseAffinityPropagation(row_array,col_array,data_array,\
                            preference=preference_input,convergence_iter=self.convergence_iter,\
                            convergence_percentage=self.convergence_percentage,\
                            max_iter=self.max_iter,damping=self.damping,verboseIter=self.verboseIter,parallel=self.parallel)
        return self
项目:pysapc    作者:bioinfocao    | 项目源码 | 文件源码
def fit_predict(self, X, preference=None):
        """
        Apply Sparse Affinity Propagation (SAP) to precomputed sparse affinity/similarity matrix X

        Parameters
        ----------------------
        X: coo_matrix,csr_matrix,lil_matrix, precomputed sparse affinity/similarity matrix
           (affinity/similarity could be cosine, pearson, euclidean distance, or others).
           Please note that affinity/similarity matrix doesn't need to be symmetric, s(A,B) can be different from s(B,A).
           In fact it could be that s(A,B) exist and s(B,A) not exist in the sparse affinity/similarity matrix

        preference: a numeric scalar(float), or a str of 'min'/'median', or a list/numpy 1D array(length of samples)
            the preference of a datapoint K, p(K), which will set to the affinity/similarity matrix s(K,K), is the 
            priori suitability of datapoint K to serve as an exemplar (cluster center), Higher values of preference will lead to more exemplars (cluster centers).
            A good initial choice is minimum('min') or median('median') of the full dense affinity/similarity matrix.
            Plsease note that minimum('min') or median('median') of sparse affinity/similarity matrix,
            which is top of the full dense affinity/similarity matrix, is not a good choice.       

        Returns
        ----------------------
        The exemplars (cluster centers) for each datapoint. Exemplars are index(row index of matrix) of cluster centers for each datapoint.
        """
        if (self.preference is None) and (preference is None):
            raise ValueError("Preference should be a numeric scalar, or a string of 'min' / 'median',\
            or a list/np 1D array(length of samples).\n Your input preference is: {0})".format(str(prefernce)))
        if preference is not None:
            preference_input=preference
        else:
            preference_input=self.preference
        row_array,col_array,data_array=matixToRowColDataArr(X)
        self.exemplars_=sparseAffinityPropagation(row_array,col_array,data_array,\
                            preference=self.preference,convergence_iter=self.convergence_iter,\
                            convergence_percentage=self.convergence_percentage,\
                            max_iter=self.max_iter,damping=self.damping,verboseIter=self.verboseIter,parallel=self.parallel)
        return self.exemplars_
项目:scikit-kge    作者:mnick    | 项目源码 | 文件源码
def to_tensor(xs, ys, sz):
    T = [sp.lil_matrix((sz[0], sz[1])) for _ in range(sz[2])]
    for i in range(len(xs)):
        i, j, k = xs[i]
        T[k][i, j] = ys[i]
    return T
项目:MIT-Thesis    作者:alec-heif    | 项目源码 | 文件源码
def test_dot(self):
        from scipy.sparse import lil_matrix
        lil = lil_matrix((4, 1))
        lil[1, 0] = 1
        lil[3, 0] = 2
        dv = DenseVector(array([1., 2., 3., 4.]))
        self.assertEqual(10.0, dv.dot(lil))
项目:MIT-Thesis    作者:alec-heif    | 项目源码 | 文件源码
def test_squared_distance(self):
        from scipy.sparse import lil_matrix
        lil = lil_matrix((4, 1))
        lil[1, 0] = 3
        lil[3, 0] = 2
        dv = DenseVector(array([1., 2., 3., 4.]))
        sv = SparseVector(4, {0: 1, 1: 2, 2: 3, 3: 4})
        self.assertEqual(15.0, dv.squared_distance(lil))
        self.assertEqual(15.0, sv.squared_distance(lil))
项目:MIT-Thesis    作者:alec-heif    | 项目源码 | 文件源码
def scipy_matrix(self, size, values):
        """Create a column SciPy matrix from a dictionary of values"""
        from scipy.sparse import lil_matrix
        lil = lil_matrix((size, 1))
        for key, value in values.items():
            lil[key, 0] = value
        return lil
项目:HSISeg    作者:HSISeg    | 项目源码 | 文件源码
def nonlocal_grad(W_sqrt, u):
    r = u.shape[0]
    diagu = sparse.lil_matrix((r,r),dtype=np.float64)
    diagu.setdiag(u)
    p1 = W_sqrt * diagu
    p2 = diagu * W_sqrt
    p = p1 - p2
    return p
项目:HSISeg    作者:HSISeg    | 项目源码 | 文件源码
def project_p(p,r):
    tempp = sparse.csr_matrix(p, copy=False)
    tempp.data **= 2
    coe = np.array((tempp.sum(axis=1)))**0.5
    coe = np.amax(coe,axis=1)
    coe[coe<1.0] = 1
    diagcoe = sparse.lil_matrix((r,r),dtype=np.float64)
    diagcoe.setdiag(1/coe)
    p = diagcoe*p
    return p
项目:HSISeg    作者:HSISeg    | 项目源码 | 文件源码
def nonlocal_grad(W_sqrt, u):
    r = u.shape[0]
    diagu = sparse.lil_matrix((r,r),dtype=np.float64)
    diagu.setdiag(u)
    p1 = W_sqrt * diagu
    p2 = diagu * W_sqrt
    p = p1 - p2
    return p
项目:HSISeg    作者:HSISeg    | 项目源码 | 文件源码
def project_p(p,r):
    tempp = sparse.csr_matrix(p, copy=False)
    tempp.data **= 2
    coe = np.array((tempp.sum(axis=1)))**0.5
    coe = np.amax(coe,axis=1)
    coe[coe<1.0] = 1
    diagcoe = sparse.lil_matrix((r,r),dtype=np.float64)
    diagcoe.setdiag(1/coe)
    p = diagcoe*p
    return p