Python scipy.sparse 模块，find() 实例源码

我们从Python开源项目中，提取了以下16个代码示例，用于说明如何使用scipy.sparse.find()。

项目：knowledge_linker 作者：glciampaglia | 项目源码 | 文件源码

def make_symmetric(A):
    '''
    Transforms a matrix, not necessary triangular, to symmetric

    Parameters
    ----------
    A : array_like
        The matrix

    Returns : CSR sparse matrix
        The symmetric matrix
    '''
    G = sp.csr_matrix(A)
    n = G.shape[0]
    G2 = G.transpose()
    G3 = G + G2
    i, j, v = sp.find(G.multiply(G2))
    v = np.sqrt(v)
    N = sp.csr_matrix((v, (i, j)), shape=(n, n))
    Gsym = G3 - N
    return Gsym

项目：Quadflor 作者：quadflor | 项目源码 | 文件源码

def fit(self, X, Y):
        n_samples = X.shape[0]
        F = self.firing_threshold
        decay = self.decay
        coef_ = np.zeros(shape=(X.shape[1]), dtype=np.float64)
        fired_ = np.zeros(shape=(X.shape[1]), dtype=np.bool_)
        _, I, V = sp.find(Y)
        coef_[I] += np.divide(V[I], X.shape[0])

        markers = deque(I)
        while markers:
            i = markers.popleft()
            if coef_[i] >= F and not fired[i]:
                #fire
                for j in self.hierarchy.neighbors(i):
                    if self.use_weights:
                        coef_[j] += coef[i] * decay * hierarchy[i][j]['weight']
                    else:
                        coef_[j] += coef[i] * decay 
                    if coef_[j] >= F:
                        coef_[j] = F
                        markers.append(n)

        self.coef_ = coef_
        return self

项目：DeepFM 作者：dwt0317 | 项目源码 | 文件源码

def BuildAttributeFromSPMatrix(self, sp_matrix, n, m):
        res = [] 
        for _ in range(n):
            res.append([])
        (row,col,value) = find(sp_matrix)
        for r,c,v in zip(row,col,value):
            res[r].append([c,float(v)])
        return res

项目：spectrassembler 作者：antrec | 项目源码 | 文件源码

def get_fiedler_julia(mat, julia_path, julia_fiedler_script):

    # Construct Laplacian
    (iis, jjs, vvs) = find(mat)
    n = mat.shape[0]
    # add a random number to the filename to avoid inconsistent writes and reads with multiprocessing
    randn = randrange(1000)

    # write to csv
    itempf = 'mat_coords_iis_%d_%d.csv' % (n, randn)
    iis.tofile(itempf, sep=',')
    jtempf = 'mat_coords_jjs_%d_%d.csv' % (n, randn)
    jjs.tofile(jtempf, sep=',')
    vtempf = 'mat_data.csv_%d_%d' % (n, randn)
    vvs.tofile(vtempf, sep=',')

    outf = 'temp_fidvec_%d_%d.csv' % (n, randn)
    # call julia
    cmd = [julia_path, julia_fiedler_script, itempf, jtempf, vtempf, outf]
    subprocess.call(cmd)

    # remove temporary files
    os.remove(itempf)
    os.remove(jtempf)
    os.remove(vtempf)

    # check output looks OK and return permutation
    if os.path.exists(outf):
        myperm = np.fromfile(outf, dtype=int, sep=',')
        myperm = myperm - 1
        os.remove(outf)
        if (len(myperm) == mat.shape[0]):
            return myperm
        else:
            return np.arange(n)
    # output identity permutation if something went wrong
    else:
        return np.arange(n)

项目：vec4ir 作者：lgalke | 项目源码 | 文件源码

def embed(X, E):
    """
    This is effectively: X @ E, just slower... by foot
    Arguments:
        - X  -- (n_samples, n_features)
        - E  -- (n_features, n_dims)
        - X  -- @ E (n_samples, n_dims)
    """
    raise DeprecationWarning("This is slow, use X @ syn0 instead.")
    embedded = np.zeros((X.shape[0], E.shape[1]), dtype=E.dtype)
    for (row, col, val) in zip(*sp.find(X)):
        update = val * E[col, :]
        embedded[row, :] += update
    return embedded

项目：OpenLearning4DeepRecsys 作者：Leavingseason | 项目源码 | 文件源码

def BuildAttributeFromSPMatrix(self, sp_matrix, n, m):
        res = [] 
        for _ in range(n):
            res.append([])
        (row,col,value) = find(sp_matrix)
        for r,c,v in zip(row,col,value):
            res[r].append([c,float(v)])
        return res

项目：knowledge_linker 作者：glciampaglia | 项目源码 | 文件源码

def load_csmatrix(path, fmt='csr'):
    """
    Return as CSR/CSC matrix with data structures memory-mapped onto disk.

    Parameters
    ==========

    path : str
        Path to a location on disk under which to find the
        {data/indices/indptr/shape}.npy files.

    fmt : str
        any of 'csr' or 'csc'. Default: csr.

    Returns
    =======

    adj : `scipy.sparse.csr_matrix` or `scipy.sparse.csc_matrix` A compressed
    sparse row/column matrix.

    Notes
    =====
    The structures data/indices/indptr and
    `numpy.memmap` objects open in copy-on-write mode (changes take place in
    memory but are not written back to disk, see `numpy.memmap`).
    """
    if fmt not in ['csc', 'csr']:
        raise ValueError("expecting either csc or csr: {}".format(fmt))
    # one
    data = np.load(os.path.join(path, 'data.npy'), mmap_mode='c')
    indices = np.load(os.path.join(path, 'indices.npy'), mmap_mode='c')
    indptr = np.load(os.path.join(path, 'indptr.npy'), mmap_mode='c')
    shape = np.load(os.path.join(path, 'shape.npy'))
    if fmt == 'csr':
        A = sp.csr_matrix((data, indices, indptr), shape=shape)
    else:
        A = sp.csc_matrix((data, indices, indptr), shape=shape)
    A.has_sorted_indices = True
    return A

项目：TerpreT 作者：51alg | 项目源码 | 文件源码

def make_task(A, b, obj, intflag, integer_solver):

    vprint('starting mosek environment...')
    env = mosek.Env()
    env.set_Stream(mosek.streamtype.log, streamprinter)

    vprint('creating mosek task...')
    task = env.Task(0,0)
    task.set_Stream(mosek.streamtype.log, streamprinter)

    num_constraints, num_vars = A.shape

    # set problem size
    task.appendcons(num_constraints)
    task.appendvars(num_vars)

    vprint('creating objective...')
    task.putclist(range(num_vars), obj)

    vprint('creating bounds...')
    for j in xrange(num_vars):
        # 0 <= x_j <= np.inf
        #task.putbound(mosek.accmode.var, j, mosek.boundkey.lo, 0, np.inf)
        task.putbound(mosek.accmode.var, j, mosek.boundkey.ra, 0, 1)

    vprint('creating equalities...')
    boundkeys = [mosek.boundkey.fx]*num_constraints  
    #pdb.set_trace()  
    task.putboundlist(mosek.accmode.con, range(num_constraints), boundkeys, b, b)
    [i_s, j_s, v_s] = ss.find(A)
    task.putaijlist(i_s, j_s, v_s)
    # we want to maximize
    task.putobjsense(mosek.objsense.maximize)

    if integer_solver:
        # define the integer variables
        task.putvartypelist(intflag, [mosek.variabletype.type_int]*len(intflag))

    # turn off basis identification
    task.putintparam(mosek.iparam.intpnt_basis, mosek.basindtype.never)
    return task

项目：nlp 作者：Shmuma | 项目源码 | 文件源码

def close(self):
        i, j, v = sparse.find(self.data)
        log.info("Glove matrix has %d entries", len(i))

项目：Quadflor 作者：quadflor | 项目源码 | 文件源码

def transform(self, X):
        F = self.firing_threshold
        hierarchy = self.hierarchy
        decay = self.decay
        if self.verbose: print("[SA] %.4f concepts per sample."%(float(X.getnnz()) / X.shape[0]))
        if self.verbose: print("[SA] Starting Spreading Activation")
        X_out = sp.lil_matrix(X.shape,dtype=X.dtype)
        fired = sp.lil_matrix(X.shape,dtype=np.bool_)
        I, J, V = sp.find(X)
        X_out[I,J] = V
        markers = deque(zip(I,J))
        while markers:
            i, j = markers.popleft()
            if X_out[i,j] >= F and not fired[i,j]:
                #markers.extend(self._fire(X_out, i, j))
                fired[i,j] = True 
                for target in hierarchy.predecessors(j):
                    if self.weighting:
                        X_out[i,target] += X_out[i,j] * decay * hierarchy[target][j]['weight']     
                    else:
                        X_out[i,target] += X_out[i,j] * decay 

                    if X_out[i, target] >= F:
                        if self.strict: A[i,target] = F
                        markers.append((i,target))

        if self.verbose: print("[SA] %.4f fired per sample."%(float(fired.getnnz()) / X.shape[0]))
        return sp.csr_matrix(X_out)

项目：Quadflor 作者：quadflor | 项目源码 | 文件源码

def transform(self, X):
        hierarchy = self.hierarchy
        decay = self.decay
        threshold = self.child_threshold
        verbose = self.verbose

        n_hops = 0
        if verbose: print("[OneHopActivation]")
        X_out = sp.lil_matrix(X.shape, dtype=X.dtype)
        I, J, _ = sp.find(X)
        for i, j in zip(I,J):
            n_children = 0
            sum_children = 0
            for child in hierarchy.successors(j):
                if X[i, child] > 0: # same row i
                    n_children += 1
                    sum_children += X[i, child]
            if n_children >= threshold:
                if verbose: print("Hop", end=" ")
                n_hops += 1
                X_out[i,j] = X[i,j] + sum_children * decay
            else:
                X_out[i,j] = X[i,j]

        if verbose: print("\n[OneHopActivation] %d hops." % n_hops)

        return sp.csr_matrix(X_out)

项目：Quadflor 作者：quadflor | 项目源码 | 文件源码

def transform(self, X, y=None):
        ''' From each value in the feature matrix,
        traverse upwards in the hierarchy (including multiple parents in DAGs),
        and set all nodes to one'''
        hierarchy = self.hierarchy
        X_out = np.zeros(X.shape, dtype=np.bool_)
        samples, relevant_topics, _ = sp.find(X)
        for sample, topic in zip(samples, relevant_topics):
            X_out[sample, topic] = 1
            ancestors = nx.ancestors(hierarchy, topic)
            for ancestor in ancestors:
                X_out[sample, ancestor] = 1

        return X_out

项目：Quadflor 作者：quadflor | 项目源码 | 文件源码

def fit(self, X, Y):
        self.n_topics = Y.shape[1]
        ones = len(sp.find(Y)[2])
        self.mu = ones / X.shape[0]
        for clf in self.clfs:
            clf.fit(X, Y)
        return self

项目：spectrassembler 作者：antrec | 项目源码 | 文件源码

def remove_bridge_reads(a_mat):
    """ Remove some edges from the similarity graph.

    When the set of neighbors N(i) of a node i are not connected if that node i is removed from the graph,
    the edges between i and j are cut for all j that is not in the largest connected group among N(i).

    Parameters
    ----------
    a_mat : scipy.sparse matrix (similarity matrix)

    Returns
    ----------
    a_clr : scipy.sparse matrix (similarity matrix pre-preocessed)

    """
    Ikill = []
    Jkill = []
    if not(isspmatrix_csr(a_mat)):
        a_mat = a_mat.tocsr()
    for i in xrange(a_mat.shape[0]):
        (_, J, _) = find(a_mat[i, :])
        if len(J) == 0:
            continue
        Jl = list(set(J))
        a_r = a_mat[Jl, :].tocsc()
        a_r = a_r[:, Jl]
        Jl = np.array(Jl)
        (n_c, lbl) = connected_components(a_r, directed=False, return_labels=True)
        if n_c > 1:
            sizeccs = np.zeros(n_c)
            for ncc in xrange(n_c):
                sizeccs[ncc] = sum(lbl == ncc)
            ccmax = np.argmax(sizeccs)
            away_idx = np.where(lbl != ccmax)[0]
            away_nbrs = list(Jl[away_idx])
            Ikill.extend([i] * len(away_nbrs))
            Jkill.extend(away_nbrs)

    Ikill = np.array(Ikill)
    Jkill = np.array(Jkill)
    Vkill = np.ones(Ikill.size)
    kill_mat = coo_matrix((Vkill, (Ikill, Jkill)), shape=a_mat.shape, dtype=int).tocsr()
    kill_mat = sym_max(kill_mat)
    kill_mat = kill_mat.multiply(a_mat)
    a_clr = a_mat - kill_mat
    if not(isspmatrix_csr(a_clr)):
        a_clr = a_clr.tocsr()

    return a_clr


###############################################################################
###### Spectral ordering related functions (gets coarse-grained layout) #######
###############################################################################

项目：spectrassembler 作者：antrec | 项目源码 | 文件源码

def reorder_mat(A, thr_list, min_cc_len, VERB):

    if not isspmatrix_csr(A):
        A = A.tocsr()
    # Initialization.
    ccs_ord = []
    #Create list of unordered connected components
    todo_ccs = [np.arange(A.shape[0])]
    todo_next = []
    n_loop = 0

    while len(todo_ccs) > 0:
        thr_sub = thr_list[n_loop] # starts at 0.4 for n_loop=0
        # Reorder each of them
        for cc in todo_ccs:
            # if statement
            # in order not to make the preprocessing twice. We could also remove
            # the preprocessing from the pipeline and do it here.
            if n_loop > 0:
                A_sub = A[cc, :][:, cc]
                A_sub = remove_bridge_reads(A_sub.multiply(A_sub > thr_sub))
            else:
                A_sub = A

            # Compute connected components
            (n_cc, labels) = connected_components(A_sub, directed=False, return_labels=True)

            # Reorder each cc with spectral and keep the ordering if it looks OK
            for i_cc in xrange(n_cc):
                cc_sub = np.argwhere(labels == i_cc)[:, 0]
                if len(cc_sub) <= min_cc_len:
                    continue
                msg = " Running spectral algorithm in connected"\
                      "component of size %d..." % (len(cc_sub))
                oprint(msg, cond=(VERB >= 2))
                (_, fidvec) = get_fiedler(A_sub[cc_sub, :][:, cc_sub])
                permu = np.argsort(fidvec)
                (ii, jj, _) = find(A_sub[cc_sub[permu], :][:, cc_sub[permu]])
                bw = max(abs(ii - jj))
                if bw >= 80:
                    oprint("Bandwidth larger than 80 in reordered matrix.",
                           cond=(VERB >= 2))
                    todo_next.append(cc[cc_sub])
                else:
                    ccs_ord.append(cc[cc_sub[permu]])

        todo_ccs = todo_next
        todo_next = []
        n_loop += 1

    return ccs_ord

项目：spectrassembler 作者：antrec | 项目源码 | 文件源码

def reord_submat(in_tuple, A, opts):

    (thr_sub, cc) = in_tuple
    min_len = int(opts['MIN_CC_LEN'])
    verb = int(opts['VERB'])
    JULIA_PATH = opts['JULIA_PATH']
    JULIA_SCRIPT = opts['JULIA_SCRIPT']
    # rep_time_fh = open('%s/time_evs.txt' %(opts['ROOT_DIR']), 'a')

    sub_todo_next = []
    sub_ccs_ord = []

    A_sub = A[cc, :][:, cc]
    A_sub = remove_bridge_reads(A_sub.multiply(A_sub > thr_sub))
    # Compute connected components
    (n_cc, labels) = connected_components(A_sub, directed=False, return_labels=True)

    # Reorder each cc with spectral and keep the ordering if it looks OK
    for i_cc in xrange(n_cc):
        cc_sub = np.argwhere(labels == i_cc)[:, 0]
        if len(cc_sub) <= min_len:
            continue
        msg = " Running spectral algorithm in connected "\
              "component of size %d..." % (len(cc_sub))
        oprint(msg, cond=(verb >= 2))
        t1 = time()

        if JULIA_PATH and (len(cc_sub) > 3000):
            permu = get_fiedler_julia(A_sub[cc_sub, :][:, cc_sub], JULIA_PATH, JULIA_SCRIPT)
            # rep_time_fh.write("%d\t%3.6f\t(julia)\n" %(len(cc_sub), time()-t1))
        else:
            (_, fidvec) = get_fiedler(A_sub[cc_sub, :][:, cc_sub])
            permu = np.argsort(fidvec)
            # rep_time_fh.write("%d\t%3.6f\n" %(len(cc_sub), time()-t1))

        oprint("Done in %3.6fs" % (time()-t1), cond=(verb>=2))

        (ii, jj, _) = find(A_sub[cc_sub[permu], :][:, cc_sub[permu]])
        bw = max(abs(ii - jj))
        if bw >= 80:
            oprint("Bandwidth larger than 90 in reordered matrix.",
                   cond=(verb >= 2))
            sub_todo_next.append(cc[cc_sub])
        else:
            sub_ccs_ord.append(cc[cc_sub[permu]])

    # rep_time_fh.close()

    return sub_ccs_ord, sub_todo_next