项目:py_jive    作者:idc9
def fix_scipy_svds(scipy_svds):
    scipy.sparse.linalg.svds orders the singular values backwards,
    this function fixes this insanity and returns the singular values
    in decreasing order

    scipy_svds: the out put from scipy.sparse.linalg.svds

    U, D, V
    ordered in decreasing singular values
    U, D, V = scipy_svds

    U = U[:, ::-1]
    D = D[::-1]
    V = V[::-1, :]

    return U, D, V
项目:Deep-subspace-clustering-networks    作者:panji1990
def post_proC(C, K, d, alpha):
    # C: coefficient matrix, K: number of clusters, d: dimension of each subspace
    C = 0.5*(C + C.T)
    r = d*K + 1
    U, S, _ = svds(C,r,v0 = np.ones(C.shape[0]))
    U = U[:,::-1]    
    S = np.sqrt(S[::-1])
    S = np.diag(S)    
    U =    
    U = normalize(U, norm='l2', axis = 1)       
    Z =
    Z = Z * (Z>0)    
    L = np.abs(Z ** alpha) 
    L = L/L.max()   
    L = 0.5 * (L + L.T)    
    spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed',assign_labels='discretize')
    grp = spectral.fit_predict(L) + 1
    return grp, L
项目:Deep-subspace-clustering-networks    作者:panji1990
项目:Deep-subspace-clustering-networks    作者:panji1990
def post_proC(C, K, d, alpha):
    # C: coefficient matrix, K: number of clusters, d: dimension of each subspace
    n = C.shape[0]
    C = 0.5*(C + C.T)    
    C = C - np.diag(np.diag(C)) + np.eye(n,n) # for sparse C, this step will make the algorithm more numerically stable
    r = d*K + 1     
    U, S, _ = svds(C,r,v0 = np.ones(n))
    U = U[:,::-1] 
    S = np.sqrt(S[::-1])
    S = np.diag(S)
    U =
    U = normalize(U, norm='l2', axis = 1)  
    Z =
    Z = Z * (Z>0)
    L = np.abs(Z ** alpha)
    L = L/L.max()
    L = 0.5 * (L + L.T) 
    spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed', assign_labels='discretize')
    grp = spectral.fit_predict(L) + 1
    return grp, L
项目:Deep-subspace-clustering-networks    作者:panji1990
项目:100knock2016    作者:tmu-nlp
def dimension_reduction():
    X = PPMI_matrix()
    word_list = list()
    vecdict_list = list()
    for word, vector in sorted(X.items()):
    Dic2Vec = DictVectorizer(sparse=True)
    vector_list = Dic2Vec.fit_transform(vecdict_list)

    X_svd = svds(vector_list, 300)
    X_pca =[0], np.diag(X_svd[1]))
    word_matrix = dict()
    for word, vector in zip(word_list, X_pca):
        word_matrix[word] = vector

    return word_matrix
项目:100knock2016    作者:tmu-nlp
def dim_reduction():
  dic2vec = DictVectorizer(sparse=True)
  PPMI = getPPMI()
  tc = list()
  token_list = list()
  for token, contexts in sorted(PPMI.items()):
    contexts = dict(contexts)

  tc_vec = dic2vec.fit_transform(tc)
  tc_svd = svds(tc_vec, 300)
  tc_pca =[0], np.diag(tc_svd[1]))

  word_vec = dict()
  for token, vec in zip(token_list, tc_pca):
    word_vec[token] = vec

  return word_vec
项目:vrec    作者:tn1031
def __init__(self, ratings, sideinfo, n_factor=10, reg=0.1):
        super(LoCo, self).__init__(ratings)

        # side information
        self.sideinfo = sideinfo.copy()

        # auxiliary variables
        self.Z = np.random.uniform(low=-.001, high=.001, size=(self.n_item, n_factor))

        # hyper parameters
        self.n_factor = n_factor
        self.reg = reg

        # svd
        u, s, v = svds(self.sideinfo, n_factor)
        self.V = v   # n_factor x n_attr
项目:GEM    作者:palash1992
def learn_embedding(self, graph=None, edge_f=None,
                        is_weighted=False, no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        graph = graph.to_undirected()
        t1 = time()
        A = nx.to_scipy_sparse_matrix(graph)
        normalize(A, norm='l1', axis=1, copy=False)
        I_n = sp.eye(graph.number_of_nodes())
        I_min_A = I_n - A
        u, s, vt = lg.svds(I_min_A, k=self._d + 1, which='SM')
        t2 = time()
        self._X = vt.T
        self._X = self._X[:, 1:]
        return self._X, (t2 - t1)
项目:py_jive    作者:idc9
def svd_wrapper(X, rank = None):
    Computes the (possibly partial) SVD of a matrix. Handles the case where
    X is either dense or sparse.

    X: either dense or sparse
    rank: rank of the desired SVD (required for sparse matrices)

    U, D, V
    the columns of U are the left singular vectors
    the COLUMNS of V are the left singular vectors

    if isinstance(X, LinearOperator):
        scipy_svds = svds(convert2scipy(X), rank)
        U, D, V = fix_scipy_svds(scipy_svds)
        V = V.T

    elif issparse(X):
        scipy_svds = svds(X, rank)
        U, D, V = fix_scipy_svds(scipy_svds)
        V = V.T

        # TODO: implement partial SVD
        U, D, V = full_svd(X, full_matrices=False)
        V = V.T

        if rank:
            U = U[:, :rank]
            D = D[:rank]
            V = V[:, :rank]

    return U, D, V
项目:polara    作者:Evfro
def build(self, operator=None):
        svd_matrix = self.operator or operator or self.get_training_matrix(dtype=np.float64)

        with Timer(self.method):
            _, _, items_factors = svds(svd_matrix, k=self.rank, return_singular_vectors='vh')

        self._items_factors = np.ascontiguousarray(items_factors[::-1, :]).T
项目:factorix    作者:gbouchar    | 项目源码 | 文件源码
项目:factorix    作者:gbouchar
    exact approximation of a matrix using square loss an fully observed entries
        y_mat: input matrix to approximate
        rank: rank of the approximation
        return_embeddings: boolean. If True, it returns the embeddings instead of the approximate matrix

        approximate matrix of the specified rank

        >>> np.random.seed(1)
        >>> mat = toy_factorization_problem(5, 4)
        >>> svd_factorize_matrix(mat, 2)
        array([[ 3.492,  0.148,  1.681,  1.545],
               [ 2.356, -0.032,  1.273,  0.648],
               [ 6.038,  0.099,  3.074,  2.198],
               [ 3.338, -0.508,  2.295, -0.472],
               [ 0.09 ,  0.148, -0.11 ,  0.473]])
    from scipy.sparse.linalg import svds
    u1_mat, d1_vec, v1_matt = svds(y_mat, rank)
    d1_diag_matrix = np.zeros((rank, rank))
    for i in range(rank):
        d1_diag_matrix[i, i] = np.sqrt(d1_vec[i])
    u =, d1_diag_matrix)
    v =, d1_diag_matrix)
    if return_embeddings:
        return u, v
        return, v.T)
项目:vrec    作者:tn1031
def update_X(X, mu, k=6):
    #U, S, VT = svdp(X, k=k)
    U, S, VT = svds(X, k=k, which='LM')
    P = np.c_[np.ones((k, 1)), 1-S, 1./2./mu-S]
    sigma_star = np.zeros(k)
    for t in range(k):
        p = P[t, :]
        delta = p[1]**2 - 4 * p[0] * p[2]
        if delta <= 0:
            sigma_star[t] = 0.
            solution = np.roots(p)
            solution = solution.tolist()
            solution = np.array(solution)
            if solution[0] * solution[1] <= 0:
                sigma_star[t] = solution[1]
            elif solution[1] < 0:
                sigma_star[t] = 0.
                f = np.log(1 + solution[1]) + mu * (solution[1] - s[t])**2
                if f > mu *s[t]**2:
                    sigma_star[t] = 0.
                    sigma_star[t] = solution[1]

    sigma_star = sp.csr_matrix(np.diag(sigma_star))
    sigma_star = safe_sparse_dot(safe_sparse_dot(U, sigma_star), VT)
    sigma_star[abs(sigma_star)<1e-10] = 0
    return sp.lil_matrix(sigma_star)
项目:TensorGlue    作者:Evfro
def train_model(self, model, svd_rank=10, tensor_ranks=(13, 8, 12)):
        userid, itemid, contextid, values = self.fields
        if model.lower() == 'svd':
            self._get_recommendations = self.svd_recommender
            svd_idx = (self.train[userid].values,
            #TODO: needs to be more failproof with self.arrange_by and contextid
            if contextid:
                svd_val = self._contextualize(self.train).values
                svd_val = self.train[values].values
            #the data is reindexed - no need to specify shape
            #svd_shp = self.train[[userid, itemid]].max()+1
            #.tocsr() will accumulate duplicates values (having different context)
            svd_matrix = sp.sparse.coo_matrix((svd_val, svd_idx),
                                              dtype=np.float64).tocsr() #shape=svd_shp

            _, _, items_factors = svds(svd_matrix, k=svd_rank, return_singular_vectors='vh')
            self._items_factors = np.ascontiguousarray(items_factors[::-1, :])

        elif model.lower() == 'i2i':
            if contextid:
                raise NotImplementedError

            self._get_recommendations = self.i2i_recommender
            i2i_matrix = self._build_i2i_matrix()
            self._i2i_matrix = i2i_matrix

        elif model.lower() == 'tensor':
            self._get_recommendations = self.tensor_recommender
            idx, val, shp = self._to_coo()
            _, items_factors, context_factors, _ = tucker_als(idx, val, shp, tensor_ranks, growth_tol=0.001)
            self._items_factors = items_factors
            self._context_factors = context_factors

            raise NotImplementedError
项目:tensorly    作者:tensorly
def test_partial_svd():
    """Test for partial_svd"""
    sizes = [(100, 100), (100, 5), (10, 10), (5, 100)]
    n_eigenvecs = [10, 4, 5, 4]

    # Compare with sparse SVD
    for s, n in zip(sizes, n_eigenvecs):
        matrix = np.random.random(s)
        fU, fS, fV = T.partial_svd(T.tensor(matrix), n_eigenvecs=n)
        U, S, V = svds(matrix, k=n, which='LM')
        U, S, V = U[:, ::-1], S[::-1], V[::-1, :]
        T.assert_array_almost_equal(np.abs(S), T.abs(fS))
        T.assert_array_almost_equal(np.abs(U), T.abs(fU))
        T.assert_array_almost_equal(np.abs(V), T.abs(fV))

    # Compare with standard SVD
    sizes = [(100, 100), (100, 5), (10, 10), (10, 4), (5, 100)]
    n_eigenvecs = [10, 4, 5, 4, 4]
    for s, n in zip(sizes, n_eigenvecs):
        matrix = np.random.random(s)
        fU, fS, fV = T.partial_svd(T.tensor(matrix), n_eigenvecs=n)

        U, S, V = svd(matrix)
        U, S, V = U[:, :n], S[:n], V[:n, :]
        # Test for SVD
        T.assert_array_almost_equal(np.abs(S), T.abs(fS))
        T.assert_array_almost_equal(np.abs(U), T.abs(fU))
        T.assert_array_almost_equal(np.abs(V), T.abs(fV))

    with T.assert_raises(ValueError):
        tensor = T.tensor(np.random.random((3, 3, 3)))
项目:Machine_Learning_Playground    作者:yao23
def _CFSVD(self, ratingsMat):
        user_ratings_mean = np.mean(ratingsMat, axis=1)  # mean over user ratings
        R_demeaned = ratingsMat - user_ratings_mean.reshape(-1, 1)
        from scipy.sparse.linalg import svds
        U, sigma, Vt = svds(R_demeaned, k=10)
        sigma = np.diag(sigma)
        self.all_user_predicted_ratings =, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
项目:text-analytics-with-python    作者:dipanjanS
def low_rank_svd(matrix, singular_count=2):

    u, s, vt = svds(matrix, k=singular_count)
    return u, s, vt
项目:multilingual-joint-embeddings    作者:dcferreira
def gen_repM(counts, freqs, total_counts):
    Receives a dictionary with the frequency of each word, and generates LSA representations with the thresholds defined in src.config
    M = gen_mat(counts, freqs, total_counts, 'pmi')

    U, D, _ = slinalg.svds(M, config.NCOLS)
    repM =**config.DIAG_EXP))

    return repM, word_to_index
项目:GEM    作者:palash1992
def learn_embedding(self, graph=None, edge_f=None,
                        is_weighted=False, no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)

        t1 = time()
        # A = nx.to_scipy_sparse_matrix(graph)
        # I = sp.eye(graph.number_of_nodes())
        # M_g = I - self._beta*A
        # M_l = self._beta*A
        A = nx.to_numpy_matrix(graph)
        M_g = np.eye(graph.number_of_nodes()) - self._beta * A
        M_l = self._beta * A
        S =, M_l)

        u, s, vt = lg.svds(S, k=self._d // 2)
        X1 =, np.diag(np.sqrt(s)))
        X2 =, np.diag(np.sqrt(s)))
        t2 = time()
        self._X = np.concatenate((X1, X2), axis=1)

        p_d_p_t =,, vt))
        eig_err = np.linalg.norm(p_d_p_t - S)
        print('SVD error (low rank): %f' % eig_err)
        return self._X, (t2 - t1)
项目:TextAsGraphClassification    作者:NightmareNyx
def fit(self, graphs, y=None):
        rnd = check_random_state(self.random_state)
        n_samples = len(graphs)

        # get basis vectors
        if self.n_components > n_samples:
            # XXX should we just bail?
            n_components = n_samples
            warnings.warn("n_components > n_samples. This is not possible.\n"
                          "n_components was set to n_samples, which results"
                          " in inefficient evaluation of the full kernel.")

            n_components = self.n_components
        n_components = min(n_samples, n_components)
        inds = rnd.permutation(n_samples)
        basis_inds = inds[:n_components]
        basis = []
        for ind in basis_inds:

        basis_kernel = self.kernel(basis, basis, **self._get_kernel_params())

        # sqrt of kernel matrix on basis vectors
        # U, S, V = svd(basis_kernel)
        U, S, V = svds(basis_kernel, k=min(basis_kernel.shape) - 1)
        S = np.maximum(S, 1e-12)
        self.normalization_ = * 1. / np.sqrt(S), V)
        self.components_ = basis
        self.component_indices_ = inds
        return self
项目:pca    作者:vighneshbirodkar
def _svd(method, X, rank, tol, **args):
    rank = min(rank, np.min(X.shape))
    if method == "approximate":
        return fbpca.pca(X, k=rank, raw=True, **args)
    elif method == "exact":
        return np.linalg.svd(X, full_matrices=False, **args)
    elif method == "sparse":
        if rank >= np.min(X.shape):
            return np.linalg.svd(X, full_matrices=False)
        u, s, v = svds(X, k=rank, tol=tol)
        u, s, v = u[:, ::-1], s[::-1], v[::-1, :]
        return u, s, v
    raise ValueError("invalid SVD method")
项目:kaggle-quora-question-pairs    作者:stys
def compute_svd(X, method, **options):
    if method == 'ARPACK':
        k = options.get('k', 10)
        maxiter = options.get('maxiter')
        tol = float(options.get('tol', 1.e-4))

        U, S, VT = svds(X, k=k, tol=tol, maxiter=maxiter, return_singular_vectors='vh')
        return S, VT
项目:conceptnet5    作者:ymmah
def build_ppmi(input_path, output_path):
    spmat, index = sparse_from_parallel_text(
        ['de', 'en', 'es', 'fa', 'fr', 'it', 'pt']
    ppmi = counts_to_ppmi(spmat)
    u, s, vT = linalg.svds(ppmi, 300)
    v = vT.T
    values = (u + v) * (s ** 0.5)
    ppmi_frame = l2_normalize_rows(pd.DataFrame(values, index=index))
    save_hdf(ppmi_frame, output_path)
项目:conceptnet5    作者:ymmah
def build_ppmi(conceptnet_filename, ndim=300):
    sparse_csr, index = build_from_conceptnet_table(conceptnet_filename)
    ppmi = counts_to_ppmi(sparse_csr)
    u, s, vT = linalg.svds(ppmi, ndim)
    v = vT.T
    values = (u + v) * (s ** 0.5)

    return pd.DataFrame(values, index=index)
项目:untwist    作者:IoSR-Surrey
def process(self, X):
        X = X.T
        Y = X.copy()
        (m, n) = Y.shape
        n = Y.shape[1]
        self.l = self.l / np.sqrt(m)
        u,s,v = svds(Y,1,which="LM")
        norm_two = s[0]
        norm_inf = norm(Y.ravel(), np.inf) / self.l
        dual_norm = np.max([norm_two, norm_inf])
        Y = Y / dual_norm
        A = np.zeros(Y.shape)
        E = np.zeros(Y.shape)
        d_norm = norm(X, 'fro')
        mu = / norm_two
        mu_bar = mu * 1e7
        sv = 10

        for i in range(self.iterations):
            temp_T = X - A + (1 / mu) * Y
            E = np.maximum(temp_T - self.l / mu, 0)
            E += np.minimum(temp_T + self.l / mu, 0)
            sparse_svd = self.choosvd( n, sv)
            if sparse_svd:
                U, S, V = svds(X - E + (1 / mu) * Y, sv, which= "LM")
                U, S, V = svd(X - E + (1 / mu) * Y, full_matrices = False)
            svp = len(np.where(S > (1 / mu))[0])
            if svp < sv:
                sv = int(np.min([svp+1, n]))
                sv = int(np.min([svp + np.round(0.05 * n), n]))
            if sparse_svd:
                A =[:,-svp:], np.diag(S[-svp:] -1/mu)), V[-svp:,:])
                A =[:, :svp], np.diag(S[:svp] - 1 / mu)), V[:svp, :])
            Z = X - A - E
            Y = Y + mu * Z
            mu = np.min([mu * self.rho, mu_bar])
            err = norm(Z, 'fro') / d_norm
            print(i, err)
            if self.threshold is not None and err < self.threshold:
        return (, X.sample_rate, X.window_size, X.hop_size),
      , X.sample_rate, X.window_size, X.hop_size)
项目:ro_sgns    作者:AlexGrinch
def projector_splitting(self, eta=5e-6, d=100, 
                            MAX_ITER=1, from_iter=0, display=0, 
                            init=(False, None, None), save=(False, None)):
        Projector splitting algorithm for word2vec matrix factorization.

        # Initialization
        if (init[0]):
            self.C = init[1]
            self.W = init[2]
            self.C = np.random.rand(d, self.D.shape[0])
            self.W = np.random.rand(d, self.D.shape[1]) 

        if (save[0] and from_iter==0):
                self.save_CW(save[1], 0)

        X = (self.C)
        for it in xrange(from_iter, from_iter+MAX_ITER):

            if (display):
                print "Iter #:", it+1

            U, S, V = svds(X, d)
            S = np.diag(S)
            V = V.T

            self.C =
            self.W = np.sqrt(S).dot(V.T)

            if (save[0]):
                self.save_CW(save[1], it+1)

            F = self.grad_MF(self.C, self.W)
            #mask = np.random.binomial(1, .5, size=F.shape)
            #F = F * mask

            U, _ = qr((X + eta*F).dot(V))
            V, S = qr((X + eta*F)
            V = V.T
            S = S.T

            X =
项目:Parallel-SGD    作者:angadgill
def fit_transform(self, X, y=None):
        """Fit LSI model to X and perform dimensionality reduction on X.

        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Training data.

        X_new : array, shape (n_samples, n_components)
            Reduced version of X. This will always be a dense array.
        X = as_float_array(X, copy=False)
        random_state = check_random_state(self.random_state)

        # If sparse and not csr or csc, convert to csr
        if sp.issparse(X) and X.getformat() not in ["csr", "csc"]:
            X = X.tocsr()

        if self.algorithm == "arpack":
            U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol)
            # svds doesn't abide by scipy.linalg.svd/randomized_svd
            # conventions, so reverse its outputs.
            Sigma = Sigma[::-1]
            U, VT = svd_flip(U[:, ::-1], VT[::-1])

        elif self.algorithm == "randomized":
            k = self.n_components
            n_features = X.shape[1]
            if k >= n_features:
                raise ValueError("n_components must be < n_features;"
                                 " got %d >= %d" % (k, n_features))
            U, Sigma, VT = randomized_svd(X, self.n_components,
            raise ValueError("unknown algorithm %r" % self.algorithm)

        self.components_ = VT

        # Calculate explained variance & explained variance ratio
        X_transformed =, np.diag(Sigma))
        self.explained_variance_ = exp_var = np.var(X_transformed, axis=0)
        if sp.issparse(X):
            _, full_var = mean_variance_axis(X, axis=0)
            full_var = full_var.sum()
            full_var = np.var(X, axis=0).sum()
        self.explained_variance_ratio_ = exp_var / full_var
        return X_transformed