我们从Python开源项目中,提取了以下16个代码示例,用于说明如何使用scipy.sparse.find()。
def make_symmetric(A): ''' Transforms a matrix, not necessary triangular, to symmetric Parameters ---------- A : array_like The matrix Returns : CSR sparse matrix The symmetric matrix ''' G = sp.csr_matrix(A) n = G.shape[0] G2 = G.transpose() G3 = G + G2 i, j, v = sp.find(G.multiply(G2)) v = np.sqrt(v) N = sp.csr_matrix((v, (i, j)), shape=(n, n)) Gsym = G3 - N return Gsym
def fit(self, X, Y): n_samples = X.shape[0] F = self.firing_threshold decay = self.decay coef_ = np.zeros(shape=(X.shape[1]), dtype=np.float64) fired_ = np.zeros(shape=(X.shape[1]), dtype=np.bool_) _, I, V = sp.find(Y) coef_[I] += np.divide(V[I], X.shape[0]) markers = deque(I) while markers: i = markers.popleft() if coef_[i] >= F and not fired[i]: #fire for j in self.hierarchy.neighbors(i): if self.use_weights: coef_[j] += coef[i] * decay * hierarchy[i][j]['weight'] else: coef_[j] += coef[i] * decay if coef_[j] >= F: coef_[j] = F markers.append(n) self.coef_ = coef_ return self
def BuildAttributeFromSPMatrix(self, sp_matrix, n, m): res = [] for _ in range(n): res.append([]) (row,col,value) = find(sp_matrix) for r,c,v in zip(row,col,value): res[r].append([c,float(v)]) return res
def get_fiedler_julia(mat, julia_path, julia_fiedler_script): # Construct Laplacian (iis, jjs, vvs) = find(mat) n = mat.shape[0] # add a random number to the filename to avoid inconsistent writes and reads with multiprocessing randn = randrange(1000) # write to csv itempf = 'mat_coords_iis_%d_%d.csv' % (n, randn) iis.tofile(itempf, sep=',') jtempf = 'mat_coords_jjs_%d_%d.csv' % (n, randn) jjs.tofile(jtempf, sep=',') vtempf = 'mat_data.csv_%d_%d' % (n, randn) vvs.tofile(vtempf, sep=',') outf = 'temp_fidvec_%d_%d.csv' % (n, randn) # call julia cmd = [julia_path, julia_fiedler_script, itempf, jtempf, vtempf, outf] subprocess.call(cmd) # remove temporary files os.remove(itempf) os.remove(jtempf) os.remove(vtempf) # check output looks OK and return permutation if os.path.exists(outf): myperm = np.fromfile(outf, dtype=int, sep=',') myperm = myperm - 1 os.remove(outf) if (len(myperm) == mat.shape[0]): return myperm else: return np.arange(n) # output identity permutation if something went wrong else: return np.arange(n)
def embed(X, E): """ This is effectively: X @ E, just slower... by foot Arguments: - X -- (n_samples, n_features) - E -- (n_features, n_dims) - X -- @ E (n_samples, n_dims) """ raise DeprecationWarning("This is slow, use X @ syn0 instead.") embedded = np.zeros((X.shape[0], E.shape[1]), dtype=E.dtype) for (row, col, val) in zip(*sp.find(X)): update = val * E[col, :] embedded[row, :] += update return embedded
def load_csmatrix(path, fmt='csr'): """ Return as CSR/CSC matrix with data structures memory-mapped onto disk. Parameters ========== path : str Path to a location on disk under which to find the {data/indices/indptr/shape}.npy files. fmt : str any of 'csr' or 'csc'. Default: csr. Returns ======= adj : `scipy.sparse.csr_matrix` or `scipy.sparse.csc_matrix` A compressed sparse row/column matrix. Notes ===== The structures data/indices/indptr and `numpy.memmap` objects open in copy-on-write mode (changes take place in memory but are not written back to disk, see `numpy.memmap`). """ if fmt not in ['csc', 'csr']: raise ValueError("expecting either csc or csr: {}".format(fmt)) # one data = np.load(os.path.join(path, 'data.npy'), mmap_mode='c') indices = np.load(os.path.join(path, 'indices.npy'), mmap_mode='c') indptr = np.load(os.path.join(path, 'indptr.npy'), mmap_mode='c') shape = np.load(os.path.join(path, 'shape.npy')) if fmt == 'csr': A = sp.csr_matrix((data, indices, indptr), shape=shape) else: A = sp.csc_matrix((data, indices, indptr), shape=shape) A.has_sorted_indices = True return A
def make_task(A, b, obj, intflag, integer_solver): vprint('starting mosek environment...') env = mosek.Env() env.set_Stream(mosek.streamtype.log, streamprinter) vprint('creating mosek task...') task = env.Task(0,0) task.set_Stream(mosek.streamtype.log, streamprinter) num_constraints, num_vars = A.shape # set problem size task.appendcons(num_constraints) task.appendvars(num_vars) vprint('creating objective...') task.putclist(range(num_vars), obj) vprint('creating bounds...') for j in xrange(num_vars): # 0 <= x_j <= np.inf #task.putbound(mosek.accmode.var, j, mosek.boundkey.lo, 0, np.inf) task.putbound(mosek.accmode.var, j, mosek.boundkey.ra, 0, 1) vprint('creating equalities...') boundkeys = [mosek.boundkey.fx]*num_constraints #pdb.set_trace() task.putboundlist(mosek.accmode.con, range(num_constraints), boundkeys, b, b) [i_s, j_s, v_s] = ss.find(A) task.putaijlist(i_s, j_s, v_s) # we want to maximize task.putobjsense(mosek.objsense.maximize) if integer_solver: # define the integer variables task.putvartypelist(intflag, [mosek.variabletype.type_int]*len(intflag)) # turn off basis identification task.putintparam(mosek.iparam.intpnt_basis, mosek.basindtype.never) return task
def close(self): i, j, v = sparse.find(self.data) log.info("Glove matrix has %d entries", len(i))
def transform(self, X): F = self.firing_threshold hierarchy = self.hierarchy decay = self.decay if self.verbose: print("[SA] %.4f concepts per sample."%(float(X.getnnz()) / X.shape[0])) if self.verbose: print("[SA] Starting Spreading Activation") X_out = sp.lil_matrix(X.shape,dtype=X.dtype) fired = sp.lil_matrix(X.shape,dtype=np.bool_) I, J, V = sp.find(X) X_out[I,J] = V markers = deque(zip(I,J)) while markers: i, j = markers.popleft() if X_out[i,j] >= F and not fired[i,j]: #markers.extend(self._fire(X_out, i, j)) fired[i,j] = True for target in hierarchy.predecessors(j): if self.weighting: X_out[i,target] += X_out[i,j] * decay * hierarchy[target][j]['weight'] else: X_out[i,target] += X_out[i,j] * decay if X_out[i, target] >= F: if self.strict: A[i,target] = F markers.append((i,target)) if self.verbose: print("[SA] %.4f fired per sample."%(float(fired.getnnz()) / X.shape[0])) return sp.csr_matrix(X_out)
def transform(self, X): hierarchy = self.hierarchy decay = self.decay threshold = self.child_threshold verbose = self.verbose n_hops = 0 if verbose: print("[OneHopActivation]") X_out = sp.lil_matrix(X.shape, dtype=X.dtype) I, J, _ = sp.find(X) for i, j in zip(I,J): n_children = 0 sum_children = 0 for child in hierarchy.successors(j): if X[i, child] > 0: # same row i n_children += 1 sum_children += X[i, child] if n_children >= threshold: if verbose: print("Hop", end=" ") n_hops += 1 X_out[i,j] = X[i,j] + sum_children * decay else: X_out[i,j] = X[i,j] if verbose: print("\n[OneHopActivation] %d hops." % n_hops) return sp.csr_matrix(X_out)
def transform(self, X, y=None): ''' From each value in the feature matrix, traverse upwards in the hierarchy (including multiple parents in DAGs), and set all nodes to one''' hierarchy = self.hierarchy X_out = np.zeros(X.shape, dtype=np.bool_) samples, relevant_topics, _ = sp.find(X) for sample, topic in zip(samples, relevant_topics): X_out[sample, topic] = 1 ancestors = nx.ancestors(hierarchy, topic) for ancestor in ancestors: X_out[sample, ancestor] = 1 return X_out
def fit(self, X, Y): self.n_topics = Y.shape[1] ones = len(sp.find(Y)[2]) self.mu = ones / X.shape[0] for clf in self.clfs: clf.fit(X, Y) return self
def remove_bridge_reads(a_mat): """ Remove some edges from the similarity graph. When the set of neighbors N(i) of a node i are not connected if that node i is removed from the graph, the edges between i and j are cut for all j that is not in the largest connected group among N(i). Parameters ---------- a_mat : scipy.sparse matrix (similarity matrix) Returns ---------- a_clr : scipy.sparse matrix (similarity matrix pre-preocessed) """ Ikill = [] Jkill = [] if not(isspmatrix_csr(a_mat)): a_mat = a_mat.tocsr() for i in xrange(a_mat.shape[0]): (_, J, _) = find(a_mat[i, :]) if len(J) == 0: continue Jl = list(set(J)) a_r = a_mat[Jl, :].tocsc() a_r = a_r[:, Jl] Jl = np.array(Jl) (n_c, lbl) = connected_components(a_r, directed=False, return_labels=True) if n_c > 1: sizeccs = np.zeros(n_c) for ncc in xrange(n_c): sizeccs[ncc] = sum(lbl == ncc) ccmax = np.argmax(sizeccs) away_idx = np.where(lbl != ccmax)[0] away_nbrs = list(Jl[away_idx]) Ikill.extend([i] * len(away_nbrs)) Jkill.extend(away_nbrs) Ikill = np.array(Ikill) Jkill = np.array(Jkill) Vkill = np.ones(Ikill.size) kill_mat = coo_matrix((Vkill, (Ikill, Jkill)), shape=a_mat.shape, dtype=int).tocsr() kill_mat = sym_max(kill_mat) kill_mat = kill_mat.multiply(a_mat) a_clr = a_mat - kill_mat if not(isspmatrix_csr(a_clr)): a_clr = a_clr.tocsr() return a_clr ############################################################################### ###### Spectral ordering related functions (gets coarse-grained layout) ####### ###############################################################################
def reorder_mat(A, thr_list, min_cc_len, VERB): if not isspmatrix_csr(A): A = A.tocsr() # Initialization. ccs_ord = [] #Create list of unordered connected components todo_ccs = [np.arange(A.shape[0])] todo_next = [] n_loop = 0 while len(todo_ccs) > 0: thr_sub = thr_list[n_loop] # starts at 0.4 for n_loop=0 # Reorder each of them for cc in todo_ccs: # if statement # in order not to make the preprocessing twice. We could also remove # the preprocessing from the pipeline and do it here. if n_loop > 0: A_sub = A[cc, :][:, cc] A_sub = remove_bridge_reads(A_sub.multiply(A_sub > thr_sub)) else: A_sub = A # Compute connected components (n_cc, labels) = connected_components(A_sub, directed=False, return_labels=True) # Reorder each cc with spectral and keep the ordering if it looks OK for i_cc in xrange(n_cc): cc_sub = np.argwhere(labels == i_cc)[:, 0] if len(cc_sub) <= min_cc_len: continue msg = " Running spectral algorithm in connected"\ "component of size %d..." % (len(cc_sub)) oprint(msg, cond=(VERB >= 2)) (_, fidvec) = get_fiedler(A_sub[cc_sub, :][:, cc_sub]) permu = np.argsort(fidvec) (ii, jj, _) = find(A_sub[cc_sub[permu], :][:, cc_sub[permu]]) bw = max(abs(ii - jj)) if bw >= 80: oprint("Bandwidth larger than 80 in reordered matrix.", cond=(VERB >= 2)) todo_next.append(cc[cc_sub]) else: ccs_ord.append(cc[cc_sub[permu]]) todo_ccs = todo_next todo_next = [] n_loop += 1 return ccs_ord
def reord_submat(in_tuple, A, opts): (thr_sub, cc) = in_tuple min_len = int(opts['MIN_CC_LEN']) verb = int(opts['VERB']) JULIA_PATH = opts['JULIA_PATH'] JULIA_SCRIPT = opts['JULIA_SCRIPT'] # rep_time_fh = open('%s/time_evs.txt' %(opts['ROOT_DIR']), 'a') sub_todo_next = [] sub_ccs_ord = [] A_sub = A[cc, :][:, cc] A_sub = remove_bridge_reads(A_sub.multiply(A_sub > thr_sub)) # Compute connected components (n_cc, labels) = connected_components(A_sub, directed=False, return_labels=True) # Reorder each cc with spectral and keep the ordering if it looks OK for i_cc in xrange(n_cc): cc_sub = np.argwhere(labels == i_cc)[:, 0] if len(cc_sub) <= min_len: continue msg = " Running spectral algorithm in connected "\ "component of size %d..." % (len(cc_sub)) oprint(msg, cond=(verb >= 2)) t1 = time() if JULIA_PATH and (len(cc_sub) > 3000): permu = get_fiedler_julia(A_sub[cc_sub, :][:, cc_sub], JULIA_PATH, JULIA_SCRIPT) # rep_time_fh.write("%d\t%3.6f\t(julia)\n" %(len(cc_sub), time()-t1)) else: (_, fidvec) = get_fiedler(A_sub[cc_sub, :][:, cc_sub]) permu = np.argsort(fidvec) # rep_time_fh.write("%d\t%3.6f\n" %(len(cc_sub), time()-t1)) oprint("Done in %3.6fs" % (time()-t1), cond=(verb>=2)) (ii, jj, _) = find(A_sub[cc_sub[permu], :][:, cc_sub[permu]]) bw = max(abs(ii - jj)) if bw >= 80: oprint("Bandwidth larger than 90 in reordered matrix.", cond=(verb >= 2)) sub_todo_next.append(cc[cc_sub]) else: sub_ccs_ord.append(cc[cc_sub[permu]]) # rep_time_fh.close() return sub_ccs_ord, sub_todo_next