我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用scipy.sparse.lil_matrix()。
def _get_data_batch(self, x_batch): # Construct LIL matrix X_lil = sparse.lil_matrix((len(x_batch), self.td)) for j, x in enumerate(x_batch): for t in x: X_lil[j, t] += 1 # Get batch data indices, ids, weights = [], [], [] max_len = 0 for i, (row, data) in enumerate(zip(X_lil.rows, X_lil.data)): # Dummy weight for all-zero row if len(row) == 0: indices.append((i, 0)) ids.append(0) weights.append(0.0) continue # Update indices by position max_len = max(max_len, len(row)) indices.extend((i, t) for t in xrange(len(row))) ids.extend(row) weights.extend(data) shape = (len(X_lil.rows), max_len) return [indices, shape, ids, weights], None
def matixToRowColDataArr(X): """ Convert sparse affinity/similarity matrix to numpy array format (row_array,col_array,data_array) So cython update function can work efficiently on it. """ # convert to coo format (from lil,csr,csc) if isinstance(X, coo_matrix): X_coo=X elif (isinstance(X, csr_matrix)) or (isinstance(X, lil_matrix)): X_coo=X.tocoo() else: # others like numpy matrix could be convert to coo matrix X_coo=coo_matrix(X) # Upcast matrix to a floating point format (if necessary) X_coo=X_coo.asfptype() # get row_array,col_array,data_array in their correct data type (for cython to work) row_array,col_array,data_array=X_coo.row.astype(np.int),X_coo.col.astype(np.int),X_coo.data return row_array,col_array,data_array
def test_serialize(self): from scipy.sparse import lil_matrix lil = lil_matrix((4, 1)) lil[1, 0] = 1 lil[3, 0] = 2 sv = SparseVector(4, {1: 1, 3: 2}) self.assertEqual(sv, _convert_to_vector(lil)) self.assertEqual(sv, _convert_to_vector(lil.tocsc())) self.assertEqual(sv, _convert_to_vector(lil.tocoo())) self.assertEqual(sv, _convert_to_vector(lil.tocsr())) self.assertEqual(sv, _convert_to_vector(lil.todok())) def serialize(l): return ser.loads(ser.dumps(_convert_to_vector(l))) self.assertEqual(sv, serialize(lil)) self.assertEqual(sv, serialize(lil.tocsc())) self.assertEqual(sv, serialize(lil.tocsr())) self.assertEqual(sv, serialize(lil.todok()))
def vectorize(features, vocab): """ Transform a features list into a numeric vector with a given vocab :type dpvocab: dict :param dpvocab: vocab for distributional representation :type projmat: scipy.lil_matrix :param projmat: projection matrix for disrep """ vec = lil_matrix((1, len(vocab))) for feat in features: try: fidx = vocab[feat] vec[0, fidx] += 1.0 except KeyError: pass # Normalization vec = normalize(vec) return vec
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None): ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids) ''' if exclude is None: exclude = [] # Compile network if needed if not hasattr(self, 'predict_function'): self._compile_predict_function() # Prepare RNN input max_length_seq = sequence[-min(self.max_length, len(sequence)):] X = sp.lil_matrix((1, self.n_items), dtype=theano.config.floatX) for j in sequence: X[0, j[0]] = 1./len(sequence)**self.alpha # Run RNN if self.interactions_are_unique: should_exclude = [i[0] for i in sequence] else: should_exclude = [] should_exclude.extend(exclude) return self.predict_function(X.tocsr(), k, should_exclude)
def reduce_system(A, x, b, i): """Remove variable(s) i from system. Row(s) i of matrix 'b' must be set before this gets called.""" # Convert all the lil format. A = sparse.lil_matrix(A) x = sparse.lil_matrix(x.reshape((len(x),1))) b = sparse.lil_matrix(b.reshape((len(b),1))) # Update rhs b (absorbs vars). for i in var_i: b = b - x[i,0] * A.getcol(i) # Drop rows form b vector. b = drop_rows(b, i) # Drop rows from the x vector. x = drop_rows(x, i) # Drop rows from the A matrix. A = drop_rows(A, i) # Drop cols from the A matrix. A = drop_rows(A.transpose(), i) return A, x, b
def calc_sim_matrix(uc1, uc2): """ Calculate matrix similarity between two sets of clusters. uc1: a set of clusters, dict{cluster_id:set(), cluster_id:set(), ...} uc2: a set of clusters, dict{cluster_id:set(), cluster_id:set(), ...} return (S, keys1, keys2, m, n), where S_ij = jacard_sim(uc1_i, uc2_j) keys1, keys2 = cluster ids in uc1 and uc2 m, n = number of clusters in uc1 and uc2 """ keys1 = uc1.keys() keys1.sort() keys2 = uc2.keys() keys2.sort() m, n = len(keys1), len(keys2) S = lil_matrix((m+1, n+1)) for i in xrange(m): for j in xrange(n): c1 = uc1[keys1[i]] c2 = uc2[keys2[j]] sim = jacard_sim(c1, c2) S[i, j] = sim return S, keys1, keys2, m, n
def convert_graph_connectivity_to_sparse(G, nodes): """ Given a networkx graph, return sparse adjacency matrix S and H S and H are different in that S's entires contain edge weights (if there are multiple edges, behavior is overwrite), and H just has a 1 for every non-zero entry. NOTE: for now just use H, so returns None,H """ n = G.number_of_nodes() # S = sparse.lil_matrix((n,n)) H = sparse.lil_matrix((n, n)) nodes_to_index = dict(zip(nodes, range(n))) for e in G.edges_iter(data=True): i = nodes_to_index[e[0]] j = nodes_to_index[e[1]] H[i, j] = 1 H[j, i] = 1 # we do a lot of column-slicing, so convert to CSC for efficiency H = H.tocsr() return None, H # return S,H
def sparse_random_matrix(pre, post, p, weight, delay=0): """ Returns a sparse (lil) matrix to connect the pre and post populations with the probability p and the value weight. """ try: from scipy.sparse import lil_matrix except: Global._warning("scipy is not installed, sparse matrices won't work") return None from random import sample W=lil_matrix((pre, post)) for i in xrange(pre): k=np.random.binomial(post,p,1)[0] W.rows[i]=sample(xrange(post),k) W.data[i]=[weight]*k return W
def setIWMult(self, mult): m = mult / float(self.iwMult) if self.sparse: v = spsparse.lil_matrix((self.hw.shape[0], self.hw.shape[0])) v.setdiag(np.ones(v.shape[0], dtype=self.dtype)) v.setdiag([m,]*(self.nIn+1)) self.hw = v*self.hw # good for debugging above #w = self.hw.todense() #w[:self.nIn+1,:] *= m #self.hw = spsparse.csc_matrix(w) else: self.hw[:self.nIn+1,:] *= m self.iwMult = mult
def setRWScale(self, x, scale): # why does this method not work? XXX - idfah m = scale / float(self.rwScale) if self.sparse: v = spsparse.lil_matrix((self.hw.shape[0], self.hw.shape[0])) d = np.ones(v.shape[0], dtype=self.dtype) d[self.nIn+1:] = m v.setdiag(d) self.hw = v*self.hw # good for debugging above #w = self.hw.todense() #w[self.nIn+1:,:] *= m #self.hw = spsparse.csc_matrix(w) else: self.hw[self.nIn+1:,:] *= m self.scaleIW(x)
def mc_logdet(train, mu=1., gamma=5, maxitr=2): m, n = train.shape nonzero_row, nonzero_col = train.nonzero() nonzero_index = zip(nonzero_row, nonzero_col) prevX = train#.toarray() X = None Y = train#.toarray() Z = sp.lil_matrix(np.zeros((m, n))) for itr in range(maxitr): X = update_X(Y - Z / mu, mu / 2., 6) for idx in nonzero_index: X[idx[0], idx[1]] = train[idx[0], idx[1]] Y = maximum(X + (Z/mu), sp.lil_matrix(np.zeros(X.shape))) Z = Z + mu * (X - Y) mu *= gamma #err = np.sum((X-prevX)**2)/np.sum(prevX**2) #print err prevX = X return X
def matrix_to_row_col_data(X): """Convert sparse affinity matrix to arrays. .. note:: Deprecated. It will be removed in icing 0.2. This is now done by check_array from numpy. """ # convert to coo format (from lil,csr,csc) if isinstance(X, coo_matrix): X_coo = X elif (isinstance(X, csr_matrix)) or (isinstance(X, lil_matrix)): X_coo = X.tocoo() else: # others like numpy matrix could be convert to coo matrix X_coo = coo_matrix(X) # Upcast matrix to a floating point format (if necessary) X_coo = X_coo.asfptype() return X_coo.row.astype(np.int), X_coo.col.astype(np.int), X_coo.data
def grid_to_adjacency_matrix(grid, neighborhood=8): """Convert a boolean grid where 0's express holes and 1's connected pixel into a sparse adjacency matrix representing the grid-graph. Neighborhood for each pixel is calculated from its 4 or 8 more immediate surrounding neighbors (defaults to 8).""" coords = np.argwhere(grid) coords_x = coords[:, 0] coords_y = coords[:, 1] # lil is the most performance format to build a sparse matrix iteratively matrix = sparse.lil_matrix((0, coords.shape[0]), dtype=np.uint8) if neighborhood == 4: for px, py in coords: row = (((px == coords_x) & (np.abs(py - coords_y) == 1)) | ((np.abs(px - coords_x) == 1) & (py == coords_y))) matrix = sparse.vstack([matrix, row]) else: for px, py in coords: row = (np.abs(px - coords_x) <= 1) & (np.abs(py - coords_y) <= 1) matrix = sparse.vstack([matrix, row]) matrix.setdiag(1) # Once built, we convert it to compressed sparse columns or rows return matrix.tocsc() # or .tocsr()
def process_text(text, dic, r, grams): """ Return sparse feature matrix """ X = lil_matrix((len(text), len(dic))) for i, l in enumerate(text): tokens = tokenize(l, grams) indexes = [] for t in tokens: try: indexes += [dic[t]] except KeyError: pass indexes = list(set(indexes)) indexes.sort() for j in indexes: X[i,j] = r[j] return csr_matrix(X)
def assemble(fs, f): """Assemble the finite element system for the Helmholtz problem given the function space in which to solve and the right hand side function.""" raise NotImplementedError # Create an appropriate (complete) quadrature rule. # Tabulate the basis functions and their gradients at the quadrature points. # Create the left hand side matrix and right hand side vector. # This creates a sparse matrix because creating a dense one may # well run your machine out of memory! A = sp.lil_matrix((fs.node_count, fs.node_count)) l = np.zeros(fs.node_count) # Now loop over all the cells and assemble A and l return A, l
def learnProjection(sourceDomain, targetDomain): """ Learn the projection matrix and store it to a file. """ h = 50 # no. of latent dimensions. print "Loading the bipartite matrix...", coocData = sio.loadmat("../work/%s-%s/DSxDI.mat" % (sourceDomain, targetDomain)) M = sp.lil_matrix(coocData['DSxDI']) (nDS, nDI) = M.shape print "Done." print "Computing the Laplacian...", D1 = sp.lil_matrix((nDS, nDS), dtype=np.float64) D2 = sp.lil_matrix((nDI, nDI), dtype=np.float64) for i in range(0, nDS): D1[i,i] = 1.0 / np.sqrt(np.sum(M[i,:].data[0])) for i in range(0, nDI): D2[i,i] = 1.0 / np.sqrt(np.sum(M[:,i].T.data[0])) B = (D1.tocsr().dot(M.tocsr())).dot(D2.tocsr()) print "Done." print "Computing SVD...", ut, s, vt = sparsesvd(B.tocsc(), h) sio.savemat("../work/%s-%s/proj.mat" % (sourceDomain, targetDomain), {'proj':ut.T}) print "Done." pass
def rescal(X, K): ## Set logging to INFO to see RESCAL information #logging.basicConfig(level=logging.INFO) ## Load Matlab data and convert it to dense tensor format #T = loadmat('data/alyawarra.mat')['Rs'] #X = [lil_matrix(T[:, :, k]) for k in range(T.shape[2])] X = [sp.sparse.csr_matrix(X)] A, R, fit, itr, exectimes = rescal_als(X, K, init='nvecs', lambda_A=10, lambda_R=10) theta = A.dot(R).dot(A.T) Y = 1 / (1 + np.exp(-theta)) Y = Y[:,0,:] Y[Y <= 0.5] = 0 Y[Y > 0.5] = 1 #Y = sp.stats.bernoulli.rvs(Y) return Y
def create_laplacian(W, normalize=True): n = W.shape[0] W = ss.csr_matrix(W) WW_diag = W.dot(ss.csr_matrix(np.ones((n, 1)))).todense() if normalize: WWds = np.sqrt(WW_diag) # Let the inverse of zero entries become zero. WWds[WWds == 0] = np.float("inf") WW_diag_invroot = 1. / WWds D_invroot = ss.lil_matrix((n, n)) D_invroot.setdiag(WW_diag_invroot) D_invroot = ss.csr_matrix(D_invroot) I = scipy.sparse.identity(W.shape[0], format='csr', dtype=W.dtype) L = I - D_invroot.dot(W.dot(D_invroot)) else: D = ss.lil_matrix((n, n)) D.setdiag(WW_diag) D = ss.csr_matrix(D) L = D - W return L.astype(W.dtype)
def _init_svd(self, dictionary, definitions): self.td_matrix = lil_matrix((len(dictionary), self.n_terms)) for defn, i in zip(definitions, range(len(definitions))): if i % 100 == 0: print("Building term-document matrix: {} / {}".format(i, len(dictionary)), end="\r") self.td_matrix[i, :] = self.compute_freq_vec(dictionary[defn]) self.td_matrix = self.td_matrix.transpose().tocsr() print() for i in range(self.n_terms): n = float(self.td_matrix[i, :].getnnz()) if i % 100 == 0: print("Applying td-idf: {} / {}".format(i, self.n_terms), end="\r") if n > 0: self.td_matrix[i, :] *= np.log(len(dictionary) / n) print() print("Performing rank reduction...") self.u, self.s, self.vt = randomized_svd(self.td_matrix, 50, transpose=False) self.doc_matrix = np.matmul(np.diag(self.s), self.vt).transpose()
def random_lil(shape, dtype, nnz): rval = sp.lil_matrix(shape, dtype=dtype) huge = 2 ** 30 for k in range(nnz): # set non-zeros in random locations (row x, col y) idx = numpy.random.randint(1, huge+1, size=2) % shape value = numpy.random.rand() # if dtype *int*, value will always be zeros! if "int" in dtype: value = int(value * 100) # The call to tuple is needed as scipy 0.13.1 do not support # ndarray with lenght 2 as idx tuple. rval.__setitem__( tuple(idx), value) return rval
def random_lil(shape, dtype, nnz): rval = sp.lil_matrix(shape, dtype=dtype) huge = 2 ** 30 for k in range(nnz): # set non-zeros in random locations (row x, col y) idx = numpy.random.randint(1, huge + 1, size=2) % shape value = numpy.random.rand() # if dtype *int*, value will always be zeros! if "int" in dtype: value = int(value * 100) # The call to tuple is needed as scipy 0.13.1 do not support # ndarray with lenght 2 as idx tuple. rval.__setitem__( tuple(idx), value) return rval
def load_pickled_vecs(filename, returnpp=False): ''' Load word vecs from word-paraphrase matrix :param filename: str :return: dict, list, dict ''' with open(filename, 'rb') as fin: word2ind, ordered_vocab, w2p = pickle.load(fin) word_vecs = {} N = len(word2ind.keys()) for w, d in w2p.iteritems(): lil_v = sparse.lil_matrix((1,N), dtype='float') for p, sc in d.iteritems(): try: lil_v[0,word2ind[p]] = sc except KeyError: print 'Error loading vector:', w, p, sc word_vecs[w] = sparse.csr_matrix(lil_v) # is this slow? if returnpp: return word_vecs, N, w2p else: return word_vecs, N
def compute_mrr(self, data, users, queries=None): # Check data type if isinstance(data, lil_matrix): pass elif isinstance(data, Table): # Preprocess Orange.data.Table and transform it to sparse data, order, shape = preprocess(data) data = table2sparse(data, shape, order, m_type=lil_matrix) else: raise TypeError('Invalid data type') # Make predictions y_pred = self(users) # Get relevant items for the user[i] if queries is None: queries = [] add_items = queries.append for u in users: add_items(np.asarray(data.rows[u])) # Compute Mean Reciprocal Rank (MRR) mrr = MeanReciprocalRank(results=y_pred, query=queries) return mrr, queries
def test_scikit_vs_scipy(): # Test scikit linkage with full connectivity (i.e. unstructured) vs scipy n, p, k = 10, 5, 3 rng = np.random.RandomState(0) # Not using a lil_matrix here, just to check that non sparse # matrices are well handled connectivity = np.ones((n, n)) for linkage in _TREE_BUILDERS.keys(): for i in range(5): X = .1 * rng.normal(size=(n, p)) X -= 4. * np.arange(n)[:, np.newaxis] X -= X.mean(axis=1)[:, np.newaxis] out = hierarchy.linkage(X, method=linkage) children_ = out[:, :2].astype(np.int) children, _, n_leaves, _ = _TREE_BUILDERS[linkage](X, connectivity) cut = _hc_cut(k, children, n_leaves) cut_ = _hc_cut(k, children_, n_leaves) assess_same_labelling(cut, cut_) # Test error management in _hc_cut assert_raises(ValueError, _hc_cut, n_leaves + 1, children, n_leaves)
def test_label_binarize_multilabel(): y_ind = np.array([[0, 1, 0], [1, 1, 1], [0, 0, 0]]) classes = [0, 1, 2] pos_label = 2 neg_label = 0 expected = pos_label * y_ind y_sparse = [sparse_matrix(y_ind) for sparse_matrix in [coo_matrix, csc_matrix, csr_matrix, dok_matrix, lil_matrix]] for y in [y_ind] + y_sparse: yield (check_binarized_results, y, classes, pos_label, neg_label, expected) assert_raises(ValueError, label_binarize, y, classes, neg_label=-1, pos_label=pos_label, sparse_output=True)
def test_score_samples(): # Test score_samples (pseudo-likelihood) method. # Assert that pseudo-likelihood is computed without clipping. # See Fabian's blog, http://bit.ly/1iYefRk rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert_true((rbm1.score_samples(X) < -300).all()) # Sparse vs. dense should not affect the output. Also test sparse input # validation. rbm1.random_state = 42 d_score = rbm1.score_samples(X) rbm1.random_state = 42 s_score = rbm1.score_samples(lil_matrix(X)) assert_almost_equal(d_score, s_score) # Test numerical stability (#2785): would previously generate infinities # and crash with an exception. with np.errstate(under='ignore'): rbm1.score_samples([np.arange(1000) * 100])
def doSplitAcrossSentences(self,examples,featureNamesOnly): if featureNamesOnly: return ["splitAcrossSentences"] out = lil_matrix((len(examples),1)) for i,example in enumerate(examples): sentenceid0,_ = example.arguments[0] sameSentence = True for thisSID,_ in example.arguments: if sentenceid0 != thisSID: sameSentence = False break if not sameSentence: out[i,0] = 1 return coo_matrix(out)
def __init__(self, genes, bcs, dtype='int32'): self.genes = list(genes) self.genes_dim = len(self.genes) self.gene_ids_map = {gene.id:i for i, gene in enumerate(self.genes)} self.bcs = list(bcs) self.bcs_dim = len(self.bcs) self.bcs_map = {bc:i for i, bc in enumerate(self.bcs)} self.dtype = dtype self.m = sp_sparse.lil_matrix((self.genes_dim, self.bcs_dim), dtype=dtype)
def tolil(self): if type(self.m) is not sp_sparse.lil_matrix: self.m = self.m.tolil()
def vectorize_with_sparse_features(sparse_feature_set, feature_count, c2_data): vector = lil_matrix((1, feature_count), dtype=np.float) for index, (offset, code, ssdeep_hash) in sparse_feature_set: if offset not in c2_data: continue if c2_data[offset]["code"] == code: d = ssdeep.compare(c2_data[offset]["content_ssdeep"], ssdeep_hash) d = float(d) / float(100.0) vector[0, index] = d return vector
def _get_raw_context_matrix(self, sentences): """ compute the raw context matrix with weighted counts it has an entry for every word in the vocabulary """ # make the feature matrix featmat = lil_matrix((len(self.index2word), len(self.index2word)), dtype=float) for sentence_no, sentence in enumerate(sentences): if not sentence_no % self.progress: print("PROGRESS: at sentence #%i" % sentence_no) sentence = [word if word in self.word2index else None for word in sentence] # forward pass if self.forward: for i, word in enumerate(sentence[:-1]): if word: # get all words in the forward window wwords = sentence[i + 1:min(i + 1 + self.window, len(sentence))] for j, w in enumerate(wwords, 1): if w: featmat[self.word2index[word], self.word2index[w]] += 1. # /j # backwards pass if self.backward: sentence_back = sentence[::-1] for i, word in enumerate(sentence_back[:-1]): if word: # get all words in the forward window of the backwards sentence wwords = sentence_back[i + 1:min(i + 1 + self.window, len(sentence_back))] for j, w in enumerate(wwords, 1): if w: featmat[self.word2index[word], self.word2index[w]] += 1. # /j print("PROGRESS: through with all the sentences") self.featmat = csr_matrix(featmat)
def get_context_matrix(self, fill_diag=True, norm='count'): """ for every word in the sentences, create a vector that contains the counts of its context words (weighted by the distance to it with a max distance of window) Inputs: - norm: if the feature matrix should be normalized to contain ones on the diagonal (--> average context vectors) - fill_diag: if diagonal of featmat should be filled with word counts Returns: - featmat: n_voc x n_voc sparse array with weighted context word counts for every word """ featmat = deepcopy(self.featmat) # fill up the diagonals with the total counts of each word --> similarity matrix if fill_diag: featmat = lil_matrix(featmat) for i, word in enumerate(self.index2word): featmat[i, i] = self.wcounts[word] featmat = csr_matrix(featmat) assert ((featmat - featmat.transpose()).data**2).sum() < 2.220446049250313e-16, "featmat not symmetric" # possibly normalize by the max counts if norm == 'count': print("normalizing feature matrix by word count") normmat = lil_matrix(featmat.shape, dtype=float) normmat.setdiag([1. / self.wcounts[word] for word in self.index2word]) featmat = csr_matrix(normmat) * featmat elif norm == 'max': print("normalizing feature matrix by max counts") normmat = lil_matrix(featmat.shape, dtype=float) normmat.setdiag([1. / v[0] if v[0] else 1. for v in featmat.max(axis=1).toarray()]) featmat = csr_matrix(normmat) * featmat return featmat
def get_local_context_matrix(self, tokens, forward=True, backward=True): """ compute a local context matrix. it has an entry for every token, even if it is not present in the vocabulary Inputs: - tokens: list of words Returns: - local_featmat: size len(set(tokens)) x n_vocab - tok_idx: {word: index} to map the words from the tokens list to an index of the featmat """ # for every token we still only need one representation per document tok_idx = {word: i for i, word in enumerate(set(tokens))} featmat = lil_matrix((len(tok_idx), len(self.index2word)), dtype=float) # clean out context words we don't know known_tokens = [word if word in self.word2index else None for word in tokens] # forward pass if self.forward: for i, word in enumerate(tokens[:-1]): # get all words in the forward window wwords = known_tokens[i + 1:min(i + 1 + self.window, len(known_tokens))] for j, w in enumerate(wwords, 1): if w: featmat[tok_idx[word], self.word2index[w]] += 1. / j # backwards pass if self.backward: tokens_back = tokens[::-1] known_tokens_back = known_tokens[::-1] for i, word in enumerate(tokens_back[:-1]): # get all words in the forward window of the backwards sentence, incl. word itself wwords = known_tokens_back[i + 1:min(i + 1 + self.window, len(known_tokens_back))] for j, w in enumerate(wwords, 1): if w: featmat[tok_idx[word], self.word2index[w]] += 1. / j featmat = csr_matrix(featmat) # normalize matrix normmat = lil_matrix((featmat.shape[0], featmat.shape[0]), dtype=float) normmat.setdiag([1. / v[0] if v[0] else 1. for v in featmat.max(axis=1).toarray()]) featmat = csr_matrix(normmat) * featmat return featmat, tok_idx
def infections_count( infecting_node, infected_node, infecting_vec, infected_vec, D, ): ''' For each pair of nodes counts infections between them. infecting_node - vector, mapping events to nodes of events that infected them infected_node - vector of integers, mapping events to nodes where they occurred infecting_vec - vector, mapping events to ids of events that infected them infected_vec - vector of integers, mapping events to ids D - number of nodes returns: matrix of counts ''' infections_mat = sp.lil_matrix((D, D), dtype=np.int) for (infected_u, infecting_u, infected_e, infecting_e) in \ izip(infected_node, infecting_node, infected_vec, infecting_vec): if infected_e != infecting_e: infections_mat[infecting_u, infected_u] += 1 return infections_mat
def one_hot_sparse(index_array, num_values): m = sp.lil_matrix((num_values, index_array.shape[0]), dtype=np.bool) for i in range(index_array.shape[0]): m[index_array[i], i] = 1 return m.tocsr()
def load_data(dataset): # load the data: x, tx, allx, graph names = ['x', 'tx', 'allx', 'graph'] objects = [] for i in range(len(names)): objects.append(pkl.load(open("data/ind.{}.{}".format(dataset, names[i])))) x, tx, allx, graph = tuple(objects) test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset)) test_idx_range = np.sort(test_idx_reorder) if dataset == 'citeseer': # Fix citeseer dataset (there are some isolated nodes in the graph) # Find isolated nodes, add them as zero-vecs into the right position test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1) tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1])) tx_extended[test_idx_range-min(test_idx_range), :] = tx tx = tx_extended features = sp.vstack((allx, tx)).tolil() features[test_idx_reorder, :] = features[test_idx_range, :] adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph)) return adj, features
def bundle_adjustment_sparsity(n_cameras, n_points, camera_indices, point_indices): m = camera_indices.size * 2 n = n_cameras * 9 + n_points * 3 A = lil_matrix((m, n), dtype=int) i = np.arange(camera_indices.size) for s in range(9): A[2 * i, camera_indices * 9 + s] = 1 A[2 * i + 1, camera_indices * 9 + s] = 1 for s in range(3): A[2 * i, n_cameras * 9 + point_indices * 3 + s] = 1 A[2 * i + 1, n_cameras * 9 + point_indices * 3 + s] = 1 return A
def fit(self, X, preference=None): """ Apply Sparse Affinity Propagation (SAP) to precomputed sparse affinity/similarity matrix X Parameters ---------------------- X: coo_matrix,csr_matrix,lil_matrix, precomputed sparse affinity/similarity matrix (affinity/similarity could be cosine, pearson, euclidean distance, or others). Please note that affinity/similarity matrix doesn't need to be symmetric, s(A,B) can be different from s(B,A). In fact it could be that s(A,B) exist and s(B,A) not exist in the sparse affinity/similarity matrix preference: a numeric scalar(float), or a str of 'min'/'median', or a list/numpy 1D array(length of samples) the preference of a datapoint K, p(K), which will set to the affinity/similarity matrix s(K,K), is the priori suitability of datapoint K to serve as an exemplar (cluster center), Higher values of preference will lead to more exemplars (cluster centers). A good initial choice is minimum('min') or median('median') of the full dense affinity/similarity matrix. Plsease note that minimum('min') or median('median') of sparse affinity/similarity matrix, which is top of the full dense affinity/similarity matrix, is not a good choice. Notes ---------------------- After fitting, the clustering result (exemplars/ cluster centers) could be accessed by exemplars_ Attribute Or use fit_predict function, which will return a list of exemplars (row index of affinity/similarity matrix) """ if (self.preference is None) and (preference is None): raise ValueError("Preference should be a numeric scalar, or a string of 'min' / 'median',\ or a list/np 1D array(length of samples).\n Your input preference is: {0})".format(str(prefernce))) if preference is not None: preference_input=preference else: preference_input=self.preference row_array,col_array,data_array=matixToRowColDataArr(X) self.exemplars_=sparseAffinityPropagation(row_array,col_array,data_array,\ preference=preference_input,convergence_iter=self.convergence_iter,\ convergence_percentage=self.convergence_percentage,\ max_iter=self.max_iter,damping=self.damping,verboseIter=self.verboseIter,parallel=self.parallel) return self
def fit_predict(self, X, preference=None): """ Apply Sparse Affinity Propagation (SAP) to precomputed sparse affinity/similarity matrix X Parameters ---------------------- X: coo_matrix,csr_matrix,lil_matrix, precomputed sparse affinity/similarity matrix (affinity/similarity could be cosine, pearson, euclidean distance, or others). Please note that affinity/similarity matrix doesn't need to be symmetric, s(A,B) can be different from s(B,A). In fact it could be that s(A,B) exist and s(B,A) not exist in the sparse affinity/similarity matrix preference: a numeric scalar(float), or a str of 'min'/'median', or a list/numpy 1D array(length of samples) the preference of a datapoint K, p(K), which will set to the affinity/similarity matrix s(K,K), is the priori suitability of datapoint K to serve as an exemplar (cluster center), Higher values of preference will lead to more exemplars (cluster centers). A good initial choice is minimum('min') or median('median') of the full dense affinity/similarity matrix. Plsease note that minimum('min') or median('median') of sparse affinity/similarity matrix, which is top of the full dense affinity/similarity matrix, is not a good choice. Returns ---------------------- The exemplars (cluster centers) for each datapoint. Exemplars are index(row index of matrix) of cluster centers for each datapoint. """ if (self.preference is None) and (preference is None): raise ValueError("Preference should be a numeric scalar, or a string of 'min' / 'median',\ or a list/np 1D array(length of samples).\n Your input preference is: {0})".format(str(prefernce))) if preference is not None: preference_input=preference else: preference_input=self.preference row_array,col_array,data_array=matixToRowColDataArr(X) self.exemplars_=sparseAffinityPropagation(row_array,col_array,data_array,\ preference=self.preference,convergence_iter=self.convergence_iter,\ convergence_percentage=self.convergence_percentage,\ max_iter=self.max_iter,damping=self.damping,verboseIter=self.verboseIter,parallel=self.parallel) return self.exemplars_
def to_tensor(xs, ys, sz): T = [sp.lil_matrix((sz[0], sz[1])) for _ in range(sz[2])] for i in range(len(xs)): i, j, k = xs[i] T[k][i, j] = ys[i] return T
def test_dot(self): from scipy.sparse import lil_matrix lil = lil_matrix((4, 1)) lil[1, 0] = 1 lil[3, 0] = 2 dv = DenseVector(array([1., 2., 3., 4.])) self.assertEqual(10.0, dv.dot(lil))
def test_squared_distance(self): from scipy.sparse import lil_matrix lil = lil_matrix((4, 1)) lil[1, 0] = 3 lil[3, 0] = 2 dv = DenseVector(array([1., 2., 3., 4.])) sv = SparseVector(4, {0: 1, 1: 2, 2: 3, 3: 4}) self.assertEqual(15.0, dv.squared_distance(lil)) self.assertEqual(15.0, sv.squared_distance(lil))
def scipy_matrix(self, size, values): """Create a column SciPy matrix from a dictionary of values""" from scipy.sparse import lil_matrix lil = lil_matrix((size, 1)) for key, value in values.items(): lil[key, 0] = value return lil
def nonlocal_grad(W_sqrt, u): r = u.shape[0] diagu = sparse.lil_matrix((r,r),dtype=np.float64) diagu.setdiag(u) p1 = W_sqrt * diagu p2 = diagu * W_sqrt p = p1 - p2 return p
def project_p(p,r): tempp = sparse.csr_matrix(p, copy=False) tempp.data **= 2 coe = np.array((tempp.sum(axis=1)))**0.5 coe = np.amax(coe,axis=1) coe[coe<1.0] = 1 diagcoe = sparse.lil_matrix((r,r),dtype=np.float64) diagcoe.setdiag(1/coe) p = diagcoe*p return p