我们从Python开源项目中,提取了以下27个代码示例,用于说明如何使用scipy.sparse.linalg.svds()。
def fix_scipy_svds(scipy_svds): """ scipy.sparse.linalg.svds orders the singular values backwards, this function fixes this insanity and returns the singular values in decreasing order Parameters ---------- scipy_svds: the out put from scipy.sparse.linalg.svds Output ------ U, D, V ordered in decreasing singular values """ U, D, V = scipy_svds U = U[:, ::-1] D = D[::-1] V = V[::-1, :] return U, D, V
def post_proC(C, K, d, alpha): # C: coefficient matrix, K: number of clusters, d: dimension of each subspace C = 0.5*(C + C.T) r = d*K + 1 U, S, _ = svds(C,r,v0 = np.ones(C.shape[0])) U = U[:,::-1] S = np.sqrt(S[::-1]) S = np.diag(S) U = U.dot(S) U = normalize(U, norm='l2', axis = 1) Z = U.dot(U.T) Z = Z * (Z>0) L = np.abs(Z ** alpha) L = L/L.max() L = 0.5 * (L + L.T) spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed',assign_labels='discretize') spectral.fit(L) grp = spectral.fit_predict(L) + 1 return grp, L
def post_proC(C, K, d, alpha): # C: coefficient matrix, K: number of clusters, d: dimension of each subspace C = 0.5*(C + C.T) r = min(d*K + 1, C.shape[0]-1) U, S, _ = svds(C,r,v0 = np.ones(C.shape[0])) U = U[:,::-1] S = np.sqrt(S[::-1]) S = np.diag(S) U = U.dot(S) U = normalize(U, norm='l2', axis = 1) Z = U.dot(U.T) Z = Z * (Z>0) L = np.abs(Z ** alpha) L = L/L.max() L = 0.5 * (L + L.T) spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed',assign_labels='discretize') spectral.fit(L) grp = spectral.fit_predict(L) + 1 return grp, L
def post_proC(C, K, d, alpha): # C: coefficient matrix, K: number of clusters, d: dimension of each subspace n = C.shape[0] C = 0.5*(C + C.T) C = C - np.diag(np.diag(C)) + np.eye(n,n) # for sparse C, this step will make the algorithm more numerically stable r = d*K + 1 U, S, _ = svds(C,r,v0 = np.ones(n)) U = U[:,::-1] S = np.sqrt(S[::-1]) S = np.diag(S) U = U.dot(S) U = normalize(U, norm='l2', axis = 1) Z = U.dot(U.T) Z = Z * (Z>0) L = np.abs(Z ** alpha) L = L/L.max() L = 0.5 * (L + L.T) spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed', assign_labels='discretize') spectral.fit(L) grp = spectral.fit_predict(L) + 1 return grp, L
def dimension_reduction(): X = PPMI_matrix() word_list = list() vecdict_list = list() for word, vector in sorted(X.items()): word_list.append(word) vecdict_list.append(dict(vector)) Dic2Vec = DictVectorizer(sparse=True) vector_list = Dic2Vec.fit_transform(vecdict_list) X_svd = svds(vector_list, 300) X_pca = np.dot(X_svd[0], np.diag(X_svd[1])) word_matrix = dict() for word, vector in zip(word_list, X_pca): word_matrix[word] = vector return word_matrix
def dim_reduction(): dic2vec = DictVectorizer(sparse=True) PPMI = getPPMI() tc = list() token_list = list() for token, contexts in sorted(PPMI.items()): token_list.append(token) contexts = dict(contexts) tc.append(contexts) tc_vec = dic2vec.fit_transform(tc) tc_svd = svds(tc_vec, 300) tc_pca = np.dot(tc_svd[0], np.diag(tc_svd[1])) word_vec = dict() for token, vec in zip(token_list, tc_pca): word_vec[token] = vec return word_vec
def __init__(self, ratings, sideinfo, n_factor=10, reg=0.1): super(LoCo, self).__init__(ratings) # side information self.sideinfo = sideinfo.copy() # auxiliary variables self.Z = np.random.uniform(low=-.001, high=.001, size=(self.n_item, n_factor)) # hyper parameters self.n_factor = n_factor self.reg = reg # svd u, s, v = svds(self.sideinfo, n_factor) self.V = v # n_factor x n_attr
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() t1 = time() A = nx.to_scipy_sparse_matrix(graph) normalize(A, norm='l1', axis=1, copy=False) I_n = sp.eye(graph.number_of_nodes()) I_min_A = I_n - A u, s, vt = lg.svds(I_min_A, k=self._d + 1, which='SM') t2 = time() self._X = vt.T self._X = self._X[:, 1:] return self._X, (t2 - t1)
def svd_wrapper(X, rank = None): """ Computes the (possibly partial) SVD of a matrix. Handles the case where X is either dense or sparse. Parameters ---------- X: either dense or sparse rank: rank of the desired SVD (required for sparse matrices) Output ------ U, D, V the columns of U are the left singular vectors the COLUMNS of V are the left singular vectors """ if isinstance(X, LinearOperator): scipy_svds = svds(convert2scipy(X), rank) U, D, V = fix_scipy_svds(scipy_svds) V = V.T elif issparse(X): scipy_svds = svds(X, rank) U, D, V = fix_scipy_svds(scipy_svds) V = V.T else: # TODO: implement partial SVD U, D, V = full_svd(X, full_matrices=False) V = V.T if rank: U = U[:, :rank] D = D[:rank] V = V[:, :rank] return U, D, V
def build(self, operator=None): svd_matrix = self.operator or operator or self.get_training_matrix(dtype=np.float64) with Timer(self.method): _, _, items_factors = svds(svd_matrix, k=self.rank, return_singular_vectors='vh') self._items_factors = np.ascontiguousarray(items_factors[::-1, :]).T
def svd_factorize_matrix(y_mat, rank, return_embeddings=False): """ exact approximation of a matrix using square loss an fully observed entries Args: y_mat: input matrix to approximate rank: rank of the approximation return_embeddings: boolean. If True, it returns the embeddings instead of the approximate matrix Returns: approximate matrix of the specified rank Example: >>> np.random.seed(1) >>> mat = toy_factorization_problem(5, 4) >>> svd_factorize_matrix(mat, 2) array([[ 3.492, 0.148, 1.681, 1.545], [ 2.356, -0.032, 1.273, 0.648], [ 6.038, 0.099, 3.074, 2.198], [ 3.338, -0.508, 2.295, -0.472], [ 0.09 , 0.148, -0.11 , 0.473]]) """ from scipy.sparse.linalg import svds u1_mat, d1_vec, v1_matt = svds(y_mat, rank) d1_diag_matrix = np.zeros((rank, rank)) for i in range(rank): d1_diag_matrix[i, i] = np.sqrt(d1_vec[i]) u = np.dot(u1_mat, d1_diag_matrix) v = np.dot(v1_matt.T, d1_diag_matrix) if return_embeddings: return u, v else: return np.dot(u, v.T)
def update_X(X, mu, k=6): #U, S, VT = svdp(X, k=k) U, S, VT = svds(X, k=k, which='LM') P = np.c_[np.ones((k, 1)), 1-S, 1./2./mu-S] sigma_star = np.zeros(k) for t in range(k): p = P[t, :] delta = p[1]**2 - 4 * p[0] * p[2] if delta <= 0: sigma_star[t] = 0. else: solution = np.roots(p) solution = solution.tolist() solution.sort(key=abs) solution = np.array(solution) if solution[0] * solution[1] <= 0: sigma_star[t] = solution[1] elif solution[1] < 0: sigma_star[t] = 0. else: f = np.log(1 + solution[1]) + mu * (solution[1] - s[t])**2 if f > mu *s[t]**2: sigma_star[t] = 0. else: sigma_star[t] = solution[1] sigma_star = sp.csr_matrix(np.diag(sigma_star)) sigma_star = safe_sparse_dot(safe_sparse_dot(U, sigma_star), VT) sigma_star[abs(sigma_star)<1e-10] = 0 return sp.lil_matrix(sigma_star)
def train_model(self, model, svd_rank=10, tensor_ranks=(13, 8, 12)): userid, itemid, contextid, values = self.fields if model.lower() == 'svd': self._get_recommendations = self.svd_recommender svd_idx = (self.train[userid].values, self.train[itemid].values) #TODO: needs to be more failproof with self.arrange_by and contextid if contextid: svd_val = self._contextualize(self.train).values else: svd_val = self.train[values].values #the data is reindexed - no need to specify shape #svd_shp = self.train[[userid, itemid]].max()+1 #.tocsr() will accumulate duplicates values (having different context) svd_matrix = sp.sparse.coo_matrix((svd_val, svd_idx), dtype=np.float64).tocsr() #shape=svd_shp _, _, items_factors = svds(svd_matrix, k=svd_rank, return_singular_vectors='vh') self._items_factors = np.ascontiguousarray(items_factors[::-1, :]) elif model.lower() == 'i2i': if contextid: raise NotImplementedError self._get_recommendations = self.i2i_recommender i2i_matrix = self._build_i2i_matrix() self._i2i_matrix = i2i_matrix elif model.lower() == 'tensor': self._get_recommendations = self.tensor_recommender idx, val, shp = self._to_coo() _, items_factors, context_factors, _ = tucker_als(idx, val, shp, tensor_ranks, growth_tol=0.001) self._items_factors = items_factors self._context_factors = context_factors else: raise NotImplementedError
def test_partial_svd(): """Test for partial_svd""" sizes = [(100, 100), (100, 5), (10, 10), (5, 100)] n_eigenvecs = [10, 4, 5, 4] # Compare with sparse SVD for s, n in zip(sizes, n_eigenvecs): matrix = np.random.random(s) fU, fS, fV = T.partial_svd(T.tensor(matrix), n_eigenvecs=n) U, S, V = svds(matrix, k=n, which='LM') U, S, V = U[:, ::-1], S[::-1], V[::-1, :] T.assert_array_almost_equal(np.abs(S), T.abs(fS)) T.assert_array_almost_equal(np.abs(U), T.abs(fU)) T.assert_array_almost_equal(np.abs(V), T.abs(fV)) # Compare with standard SVD sizes = [(100, 100), (100, 5), (10, 10), (10, 4), (5, 100)] n_eigenvecs = [10, 4, 5, 4, 4] for s, n in zip(sizes, n_eigenvecs): matrix = np.random.random(s) fU, fS, fV = T.partial_svd(T.tensor(matrix), n_eigenvecs=n) U, S, V = svd(matrix) U, S, V = U[:, :n], S[:n], V[:n, :] # Test for SVD T.assert_array_almost_equal(np.abs(S), T.abs(fS)) T.assert_array_almost_equal(np.abs(U), T.abs(fU)) T.assert_array_almost_equal(np.abs(V), T.abs(fV)) with T.assert_raises(ValueError): tensor = T.tensor(np.random.random((3, 3, 3))) T.partial_svd(tensor)
def _CFSVD(self, ratingsMat): user_ratings_mean = np.mean(ratingsMat, axis=1) # mean over user ratings R_demeaned = ratingsMat - user_ratings_mean.reshape(-1, 1) from scipy.sparse.linalg import svds U, sigma, Vt = svds(R_demeaned, k=10) sigma = np.diag(sigma) self.all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
def low_rank_svd(matrix, singular_count=2): u, s, vt = svds(matrix, k=singular_count) return u, s, vt
def gen_repM(counts, freqs, total_counts): """ Receives a dictionary with the frequency of each word, and generates LSA representations with the thresholds defined in src.config """ M = gen_mat(counts, freqs, total_counts, 'pmi') U, D, _ = slinalg.svds(M, config.NCOLS) repM = U.dot(np.diag(D**config.DIAG_EXP)) return repM, word_to_index
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) t1 = time() # A = nx.to_scipy_sparse_matrix(graph) # I = sp.eye(graph.number_of_nodes()) # M_g = I - self._beta*A # M_l = self._beta*A A = nx.to_numpy_matrix(graph) M_g = np.eye(graph.number_of_nodes()) - self._beta * A M_l = self._beta * A S = np.dot(np.linalg.inv(M_g), M_l) u, s, vt = lg.svds(S, k=self._d // 2) X1 = np.dot(u, np.diag(np.sqrt(s))) X2 = np.dot(vt.T, np.diag(np.sqrt(s))) t2 = time() self._X = np.concatenate((X1, X2), axis=1) p_d_p_t = np.dot(u, np.dot(np.diag(s), vt)) eig_err = np.linalg.norm(p_d_p_t - S) print('SVD error (low rank): %f' % eig_err) return self._X, (t2 - t1)
def fit(self, graphs, y=None): rnd = check_random_state(self.random_state) n_samples = len(graphs) # get basis vectors if self.n_components > n_samples: # XXX should we just bail? n_components = n_samples warnings.warn("n_components > n_samples. This is not possible.\n" "n_components was set to n_samples, which results" " in inefficient evaluation of the full kernel.") else: n_components = self.n_components n_components = min(n_samples, n_components) inds = rnd.permutation(n_samples) basis_inds = inds[:n_components] basis = [] for ind in basis_inds: basis.append(graphs[ind]) basis_kernel = self.kernel(basis, basis, **self._get_kernel_params()) # sqrt of kernel matrix on basis vectors # U, S, V = svd(basis_kernel) U, S, V = svds(basis_kernel, k=min(basis_kernel.shape) - 1) S = np.maximum(S, 1e-12) self.normalization_ = np.dot(U * 1. / np.sqrt(S), V) self.components_ = basis self.component_indices_ = inds return self
def _svd(method, X, rank, tol, **args): rank = min(rank, np.min(X.shape)) if method == "approximate": return fbpca.pca(X, k=rank, raw=True, **args) elif method == "exact": return np.linalg.svd(X, full_matrices=False, **args) elif method == "sparse": if rank >= np.min(X.shape): return np.linalg.svd(X, full_matrices=False) u, s, v = svds(X, k=rank, tol=tol) u, s, v = u[:, ::-1], s[::-1], v[::-1, :] return u, s, v raise ValueError("invalid SVD method")
def compute_svd(X, method, **options): if method == 'ARPACK': k = options.get('k', 10) maxiter = options.get('maxiter') tol = float(options.get('tol', 1.e-4)) U, S, VT = svds(X, k=k, tol=tol, maxiter=maxiter, return_singular_vectors='vh') return S, VT
def build_ppmi(input_path, output_path): spmat, index = sparse_from_parallel_text( pathlib.Path(input_path), ['de', 'en', 'es', 'fa', 'fr', 'it', 'pt'] ) ppmi = counts_to_ppmi(spmat) u, s, vT = linalg.svds(ppmi, 300) v = vT.T values = (u + v) * (s ** 0.5) ppmi_frame = l2_normalize_rows(pd.DataFrame(values, index=index)) save_hdf(ppmi_frame, output_path)
def build_ppmi(conceptnet_filename, ndim=300): sparse_csr, index = build_from_conceptnet_table(conceptnet_filename) ppmi = counts_to_ppmi(sparse_csr) u, s, vT = linalg.svds(ppmi, ndim) v = vT.T values = (u + v) * (s ** 0.5) return pd.DataFrame(values, index=index)
def process(self, X): X = X.T Y = X.copy() (m, n) = Y.shape n = Y.shape[1] self.l = self.l / np.sqrt(m) u,s,v = svds(Y,1,which="LM") norm_two = s[0] norm_inf = norm(Y.ravel(), np.inf) / self.l dual_norm = np.max([norm_two, norm_inf]) Y = Y / dual_norm A = np.zeros(Y.shape) E = np.zeros(Y.shape) d_norm = norm(X, 'fro') mu = self.mu / norm_two mu_bar = mu * 1e7 sv = 10 for i in range(self.iterations): temp_T = X - A + (1 / mu) * Y E = np.maximum(temp_T - self.l / mu, 0) E += np.minimum(temp_T + self.l / mu, 0) sparse_svd = self.choosvd( n, sv) if sparse_svd: U, S, V = svds(X - E + (1 / mu) * Y, sv, which= "LM") else: U, S, V = svd(X - E + (1 / mu) * Y, full_matrices = False) svp = len(np.where(S > (1 / mu))[0]) if svp < sv: sv = int(np.min([svp+1, n])) else: sv = int(np.min([svp + np.round(0.05 * n), n])) if sparse_svd: A = np.dot(np.dot(U[:,-svp:], np.diag(S[-svp:] -1/mu)), V[-svp:,:]) else: A = np.dot(np.dot(U[:, :svp], np.diag(S[:svp] - 1 / mu)), V[:svp, :]) Z = X - A - E Y = Y + mu * Z mu = np.min([mu * self.rho, mu_bar]) err = norm(Z, 'fro') / d_norm print(i, err) if self.threshold is not None and err < self.threshold: break return (data.audio.Spectrogram(A.T, X.sample_rate, X.window_size, X.hop_size), data.audio.Spectrogram(E.T, X.sample_rate, X.window_size, X.hop_size) )
def projector_splitting(self, eta=5e-6, d=100, MAX_ITER=1, from_iter=0, display=0, init=(False, None, None), save=(False, None)): """ Projector splitting algorithm for word2vec matrix factorization. """ # Initialization if (init[0]): self.C = init[1] self.W = init[2] else: self.C = np.random.rand(d, self.D.shape[0]) self.W = np.random.rand(d, self.D.shape[1]) if (save[0] and from_iter==0): self.save_CW(save[1], 0) X = (self.C).T.dot(self.W) for it in xrange(from_iter, from_iter+MAX_ITER): if (display): print "Iter #:", it+1 U, S, V = svds(X, d) S = np.diag(S) V = V.T self.C = U.dot(np.sqrt(S)).T self.W = np.sqrt(S).dot(V.T) if (save[0]): self.save_CW(save[1], it+1) F = self.grad_MF(self.C, self.W) #mask = np.random.binomial(1, .5, size=F.shape) #F = F * mask U, _ = qr((X + eta*F).dot(V)) V, S = qr((X + eta*F).T.dot(U)) V = V.T S = S.T X = U.dot(S).dot(V)
def fit_transform(self, X, y=None): """Fit LSI model to X and perform dimensionality reduction on X. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data. Returns ------- X_new : array, shape (n_samples, n_components) Reduced version of X. This will always be a dense array. """ X = as_float_array(X, copy=False) random_state = check_random_state(self.random_state) # If sparse and not csr or csc, convert to csr if sp.issparse(X) and X.getformat() not in ["csr", "csc"]: X = X.tocsr() if self.algorithm == "arpack": U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol) # svds doesn't abide by scipy.linalg.svd/randomized_svd # conventions, so reverse its outputs. Sigma = Sigma[::-1] U, VT = svd_flip(U[:, ::-1], VT[::-1]) elif self.algorithm == "randomized": k = self.n_components n_features = X.shape[1] if k >= n_features: raise ValueError("n_components must be < n_features;" " got %d >= %d" % (k, n_features)) U, Sigma, VT = randomized_svd(X, self.n_components, n_iter=self.n_iter, random_state=random_state) else: raise ValueError("unknown algorithm %r" % self.algorithm) self.components_ = VT # Calculate explained variance & explained variance ratio X_transformed = np.dot(U, np.diag(Sigma)) self.explained_variance_ = exp_var = np.var(X_transformed, axis=0) if sp.issparse(X): _, full_var = mean_variance_axis(X, axis=0) full_var = full_var.sum() else: full_var = np.var(X, axis=0).sum() self.explained_variance_ratio_ = exp_var / full_var return X_transformed