我们从Python开源项目中,提取了以下29个代码示例,用于说明如何使用tensorflow.cholesky()。
def test_whiten(self): """ make sure that predicting using the whitened representation is the sameas the non-whitened one. """ with self.test_context() as sess: Xs, X, F, k, num_data, feed_dict = self.prepare() k.compile(session=sess) K = k.K(X) + tf.eye(num_data, dtype=settings.float_type) * 1e-6 L = tf.cholesky(K) V = tf.matrix_triangular_solve(L, F, lower=True) Fstar_mean, Fstar_var = gpflow.conditionals.conditional(Xs, X, k, F) Fstar_w_mean, Fstar_w_var = gpflow.conditionals.conditional(Xs, X, k, V, white=True) mean1, var1 = sess.run([Fstar_w_mean, Fstar_w_var], feed_dict=feed_dict) mean2, var2 = sess.run([Fstar_mean, Fstar_var], feed_dict=feed_dict) # TODO: should tolerance be type dependent? assert_allclose(mean1, mean2) assert_allclose(var1, var2)
def _build_predict(self, Xnew, full_cov=False): """ Xnew is a data matrix, point at which we want to predict This method computes p(F* | Y ) where F* are points on the GP at Xnew, Y are noisy observations at X. """ Kx = self.kern.K(self.X, Xnew) K = self.kern.K(self.X) + tf.eye(tf.shape(self.X)[0], dtype=settings.float_type) * self.likelihood.variance L = tf.cholesky(K) A = tf.matrix_triangular_solve(L, Kx, lower=True) V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X)) fmean = tf.matmul(A, V, transpose_a=True) + self.mean_function(Xnew) if full_cov: fvar = self.kern.K(Xnew) - tf.matmul(A, A, transpose_a=True) shape = tf.stack([1, 1, tf.shape(self.Y)[1]]) fvar = tf.tile(tf.expand_dims(fvar, 2), shape) else: fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0) fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, tf.shape(self.Y)[1]]) return fmean, fvar
def _build_common_terms(self): num_inducing = len(self.feature) err = self.Y - self.mean_function(self.X) # size N x R Kdiag = self.kern.Kdiag(self.X) Kuf = self.feature.Kuf(self.kern, self.X) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) Luu = tf.cholesky(Kuu) # => Luu Luu^T = Kuu V = tf.matrix_triangular_solve(Luu, Kuf) # => V^T V = Qff = Kuf^T Kuu^-1 Kuf diagQff = tf.reduce_sum(tf.square(V), 0) nu = Kdiag - diagQff + self.likelihood.variance B = tf.eye(num_inducing, dtype=settings.float_type) + tf.matmul(V / nu, V, transpose_b=True) L = tf.cholesky(B) beta = err / tf.expand_dims(nu, 1) # size N x R alpha = tf.matmul(V, beta) # size N x R gamma = tf.matrix_triangular_solve(L, alpha, lower=True) # size N x R return err, nu, Luu, L, alpha, beta, gamma
def _define_full_covariance_probs(self, shard_id, shard): """Defines the full covariance probabilties per example in a class. Updates a matrix with dimension num_examples X num_classes. Args: shard_id: id of the current shard. shard: current data shard, 1 X num_examples X dimensions. """ diff = shard - self._means cholesky = tf.cholesky(self._covs + self._min_var) log_det_covs = 2.0 * tf.reduce_sum(tf.log(tf.matrix_diag_part(cholesky)), 1) x_mu_cov = tf.square( tf.matrix_triangular_solve( cholesky, tf.transpose( diff, perm=[0, 2, 1]), lower=True)) diag_m = tf.transpose(tf.reduce_sum(x_mu_cov, 1)) self._probs[shard_id] = -0.5 * ( diag_m + tf.to_float(self._dimensions) * tf.log(2 * np.pi) + log_det_covs)
def build_backward_variance(self, Yvar): """ Additional method for scaling variance backward (used in :class:`.Normalizer`). Can process both the diagonal variances returned by predict_f, as well as full covariance matrices. :param Yvar: size N x N x P or size N x P :return: Yvar scaled, same rank and size as input """ rank = tf.rank(Yvar) # Because TensorFlow evaluates both fn1 and fn2, the transpose can't be in the same line. If a full cov # matrix is provided fn1 turns it into a rank 4, then tries to transpose it as a rank 3. # Splitting it in two steps however works fine. Yvar = tf.cond(tf.equal(rank, 2), lambda: tf.matrix_diag(tf.transpose(Yvar)), lambda: Yvar) Yvar = tf.cond(tf.equal(rank, 2), lambda: tf.transpose(Yvar, perm=[1, 2, 0]), lambda: Yvar) N = tf.shape(Yvar)[0] D = tf.shape(Yvar)[2] L = tf.cholesky(tf.square(tf.transpose(self.A))) Yvar = tf.reshape(Yvar, [N * N, D]) scaled_var = tf.reshape(tf.transpose(tf.cholesky_solve(L, tf.transpose(Yvar))), [N, N, D]) return tf.cond(tf.equal(rank, 2), lambda: tf.reduce_sum(scaled_var, axis=1), lambda: scaled_var)
def normal_sample(mean, var, full_cov=False): if full_cov is False: z = tf.random_normal(tf.shape(mean), dtype=float_type) return mean + z * var ** 0.5 else: S, N, D = shape_as_list(mean) # var is SNND mean = tf.transpose(mean, (0, 2, 1)) # SND -> SDN var = tf.transpose(var, (0, 3, 1, 2)) # SNND -> SDNN # I = jitter * tf.eye(N, dtype=float_type)[None, None, :, :] # 11NN chol = tf.cholesky(var)# + I) # SDNN should be ok without as var already has jitter z = tf.random_normal([S, D, N, 1], dtype=float_type) f = mean + tf.matmul(chol, z)[:, :, :, 0] # SDN(1) return tf.transpose(f, (0, 2, 1)) # SND
def test_whiten(self): """ make sure that predicting using the whitened representation is the sameas the non-whitened one. """ with self.test_context() as sess: rng = np.random.RandomState(0) Xs, X, F, k, num_data, feed_dict = self.prepare() k.compile(session=sess) F_sqrt = tf.placeholder(settings.float_type, [num_data, 1]) F_sqrt_data = rng.rand(num_data, 1) feed_dict[F_sqrt] = F_sqrt_data K = k.K(X) L = tf.cholesky(K) V = tf.matrix_triangular_solve(L, F, lower=True) V_chol = tf.matrix_triangular_solve(L, tf.diag(F_sqrt[:, 0]), lower=True) V_sqrt = tf.expand_dims(V_chol, 2) Fstar_mean, Fstar_var = gpflow.conditionals.conditional( Xs, X, k, F, q_sqrt=F_sqrt) Fstar_w_mean, Fstar_w_var = gpflow.conditionals.conditional( Xs, X, k, V, q_sqrt=V_sqrt, white=True) mean_difference = sess.run(Fstar_w_mean - Fstar_mean, feed_dict=feed_dict) var_difference = sess.run(Fstar_w_var - Fstar_var, feed_dict=feed_dict) assert_allclose(mean_difference, 0, atol=4) assert_allclose(var_difference, 0, atol=4)
def mvnquad(func, means, covs, H, Din, Dout=()): """ Computes N Gaussian expectation integrals of a single function 'f' using Gauss-Hermite quadrature. :param f: integrand function. Takes one input of shape ?xD. :param means: NxD :param covs: NxDxD :param H: Number of Gauss-Hermite evaluation points. :param Din: Number of input dimensions. Needs to be known at call-time. :param Dout: Number of output dimensions. Defaults to (). Dout is assumed to leave out the item index, i.e. f actually maps (?xD)->(?x*Dout). :return: quadratures (N,*Dout) """ xn, wn = mvhermgauss(H, Din) N = tf.shape(means)[0] # transform points based on Gaussian parameters cholXcov = tf.cholesky(covs) # NxDxD Xt = tf.matmul(cholXcov, tf.tile(xn[None, :, :], (N, 1, 1)), transpose_b=True) # NxDxH**D X = 2.0 ** 0.5 * Xt + tf.expand_dims(means, 2) # NxDxH**D Xr = tf.reshape(tf.transpose(X, [2, 0, 1]), (-1, Din)) # (H**D*N)xD # perform quadrature fX = tf.reshape(func(Xr), (H ** Din, N,) + Dout) wr = np.reshape(wn * np.pi ** (-Din * 0.5), (-1,) + (1,) * (1 + len(Dout))) return tf.reduce_sum(fX * wr, 0)
def _build_likelihood(self): """ Construct a tensorflow function to compute the likelihood. \log p(Y | theta). """ K = self.kern.K(self.X) + tf.eye(tf.shape(self.X)[0], dtype=settings.float_type) * self.likelihood.variance L = tf.cholesky(K) m = self.mean_function(self.X) return multivariate_normal(self.Y, m, L)
def _build_likelihood(self): """ q_alpha, q_lambda are variational parameters, size N x R This method computes the variational lower bound on the likelihood, which is: E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)] with q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) . """ K = self.kern.K(self.X) K_alpha = tf.matmul(K, self.q_alpha) f_mean = K_alpha + self.mean_function(self.X) # compute the variance for each of the outputs I = tf.tile(tf.expand_dims(tf.eye(self.num_data, dtype=settings.float_type), 0), [self.num_latent, 1, 1]) A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \ tf.expand_dims(tf.transpose(self.q_lambda), 2) * K L = tf.cholesky(A) Li = tf.matrix_triangular_solve(L, I) tmp = Li / tf.expand_dims(tf.transpose(self.q_lambda), 1) f_var = 1. / tf.square(self.q_lambda) - tf.transpose(tf.reduce_sum(tf.square(tmp), 1)) # some statistics about A are used in the KL A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.matrix_diag_part(L))) trAi = tf.reduce_sum(tf.square(Li)) KL = 0.5 * (A_logdet + trAi - self.num_data * self.num_latent + tf.reduce_sum(K_alpha * self.q_alpha)) v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y) return tf.reduce_sum(v_exp) - KL
def _build_predict(self, Xnew, full_cov=False): """ The posterior variance of F is given by q(f) = N(f | K alpha + mean, [K^-1 + diag(lambda**2)]^-1) Here we project this to F*, the values of the GP at Xnew which is given by q(F*) = N ( F* | K_{*F} alpha + mean, K_{**} - K_{*f}[K_{ff} + diag(lambda**-2)]^-1 K_{f*} ) """ # compute kernel things Kx = self.kern.K(self.X, Xnew) K = self.kern.K(self.X) # predictive mean f_mean = tf.matmul(Kx, self.q_alpha, transpose_a=True) + self.mean_function(Xnew) # predictive var A = K + tf.matrix_diag(tf.transpose(1. / tf.square(self.q_lambda))) L = tf.cholesky(A) Kx_tiled = tf.tile(tf.expand_dims(Kx, 0), [self.num_latent, 1, 1]) LiKx = tf.matrix_triangular_solve(L, Kx_tiled) if full_cov: f_var = self.kern.K(Xnew) - tf.matmul(LiKx, LiKx, transpose_a=True) else: f_var = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(LiKx), 1) return f_mean, tf.transpose(f_var)
def _build_likelihood(self): """ Construct a tf function to compute the likelihood of a general GP model. \log p(Y, V | theta). """ K = self.kern.K(self.X) L = tf.cholesky( K + tf.eye(tf.shape(self.X)[0], dtype=settings.float_type) * settings.numerics.jitter_level) F = tf.matmul(L, self.V) + self.mean_function(self.X) return tf.reduce_sum(self.likelihood.logp(F, self.Y))
def predict_f_samples(self, Xnew, num_samples): """ Produce samples from the posterior latent function(s) at the points Xnew. """ mu, var = self._build_predict(Xnew, full_cov=True) jitter = tf.eye(tf.shape(mu)[0], dtype=settings.float_type) * settings.numerics.jitter_level samples = [] for i in range(self.num_latent): L = tf.cholesky(var[:, :, i] + jitter) shape = tf.stack([tf.shape(L)[0], num_samples]) V = tf.random_normal(shape, dtype=settings.float_type) samples.append(mu[:, i:i + 1] + tf.matmul(L, V)) return tf.transpose(tf.stack(samples))
def compute_upper_bound(self): num_data = tf.cast(tf.shape(self.Y)[0], settings.float_type) Kdiag = self.kern.Kdiag(self.X) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) Kuf = self.feature.Kuf(self.kern, self.X) L = tf.cholesky(Kuu) LB = tf.cholesky(Kuu + self.likelihood.variance ** -1.0 * tf.matmul(Kuf, Kuf, transpose_b=True)) LinvKuf = tf.matrix_triangular_solve(L, Kuf, lower=True) # Using the Trace bound, from Titsias' presentation c = tf.reduce_sum(Kdiag) - tf.reduce_sum(LinvKuf ** 2.0) # Kff = self.kern.K(self.X) # Qff = tf.matmul(Kuf, LinvKuf, transpose_a=True) # Alternative bound on max eigenval: # c = tf.reduce_max(tf.reduce_sum(tf.abs(Kff - Qff), 0)) corrected_noise = self.likelihood.variance + c const = -0.5 * num_data * tf.log(2 * np.pi * self.likelihood.variance) logdet = tf.reduce_sum(tf.log(tf.diag_part(L))) - tf.reduce_sum(tf.log(tf.diag_part(LB))) LC = tf.cholesky(Kuu + corrected_noise ** -1.0 * tf.matmul(Kuf, Kuf, transpose_b=True)) v = tf.matrix_triangular_solve(LC, corrected_noise ** -1.0 * tf.matmul(Kuf, self.Y), lower=True) quad = -0.5 * corrected_noise ** -1.0 * tf.reduce_sum(self.Y ** 2.0) + 0.5 * tf.reduce_sum(v ** 2.0) return const + logdet + quad
def _build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points Xnew. For a derivation of the terms in here, see the associated SGPR notebook. """ num_inducing = len(self.feature) err = self.Y - self.mean_function(self.X) Kuf = self.feature.Kuf(self.kern, self.X) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) Kus = self.feature.Kuf(self.kern, Xnew) sigma = tf.sqrt(self.likelihood.variance) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) Aerr = tf.matmul(A, err) c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tmp2, c, transpose_a=True) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \ - tf.matmul(tmp1, tmp1, transpose_a=True) shape = tf.stack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.stack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def _build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points. Note that this is very similar to the SGPR prediction, for which there are notes in the SGPR notebook. :param Xnew: Point to predict at. """ num_inducing = tf.shape(self.Z)[0] psi1 = self.kern.eKxz(self.Z, self.X_mean, self.X_var) psi2 = tf.reduce_sum(self.kern.eKzxKxz(self.Z, self.X_mean, self.X_var), 0) Kuu = self.kern.K(self.Z) + tf.eye(num_inducing, dtype=settings.float_type) * settings.numerics.jitter_level Kus = self.kern.K(self.Z, Xnew) sigma2 = self.likelihood.variance sigma = tf.sqrt(sigma2) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma tmp = tf.matrix_triangular_solve(L, psi2, lower=True) AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2 B = AAT + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tmp2, c, transpose_a=True) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \ - tf.matmul(tmp1, tmp1, transpose_a=True) shape = tf.stack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.stack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def eKxz(self, Z, Xmu, Xcov): """ Also known as phi_1: <K_{x, Z}>_{q(x)}. :param Z: MxD inducing inputs :param Xmu: X mean (NxD) :param Xcov: NxDxD :return: NxM """ # use only active dimensions Xcov = self._slice_cov(Xcov) Z, Xmu = self._slice(Z, Xmu) D = tf.shape(Xmu)[1] if self.ARD: lengthscales = self.lengthscales else: lengthscales = tf.zeros((D,), dtype=settings.float_type) + self.lengthscales vec = tf.expand_dims(Xmu, 2) - tf.expand_dims(tf.transpose(Z), 0) # NxDxM chols = tf.cholesky(tf.expand_dims(tf.matrix_diag(lengthscales ** 2), 0) + Xcov) Lvec = tf.matrix_triangular_solve(chols, vec) q = tf.reduce_sum(Lvec ** 2, [1]) chol_diags = tf.matrix_diag_part(chols) # N x D half_log_dets = tf.reduce_sum(tf.log(chol_diags), 1) - tf.reduce_sum(tf.log(lengthscales)) # N, return self.variance * tf.exp(-0.5 * q - tf.expand_dims(half_log_dets, 1))
def Linear_RBF_eKxzKzx(self, Ka, Kb, Z, Xmu, Xcov): Xcov = self._slice_cov(Xcov) Z, Xmu = self._slice(Z, Xmu) lin, rbf = (Ka, Kb) if isinstance(Ka, Linear) else (Kb, Ka) if not isinstance(lin, Linear): TypeError("{in_lin} is not {linear}".format(in_lin=str(type(lin)), linear=str(Linear))) if not isinstance(rbf, RBF): TypeError("{in_rbf} is not {rbf}".format(in_rbf=str(type(rbf)), rbf=str(RBF))) if lin.ARD or type(lin.active_dims) is not slice or type(rbf.active_dims) is not slice: raise NotImplementedError("Active dims and/or Linear ARD not implemented. " "Switching to quadrature.") D = tf.shape(Xmu)[1] M = tf.shape(Z)[0] N = tf.shape(Xmu)[0] if rbf.ARD: lengthscales = rbf.lengthscales else: lengthscales = tf.zeros((D, ), dtype=settings.float_type) + rbf.lengthscales lengthscales2 = lengthscales ** 2.0 const = rbf.variance * lin.variance * tf.reduce_prod(lengthscales) gaussmat = Xcov + tf.matrix_diag(lengthscales2)[None, :, :] # NxDxD det = tf.matrix_determinant(gaussmat) ** -0.5 # N cgm = tf.cholesky(gaussmat) # NxDxD tcgm = tf.tile(cgm[:, None, :, :], [1, M, 1, 1]) vecmin = Z[None, :, :] - Xmu[:, None, :] # NxMxD d = tf.matrix_triangular_solve(tcgm, vecmin[:, :, :, None]) # NxMxDx1 exp = tf.exp(-0.5 * tf.reduce_sum(d ** 2.0, [2, 3])) # NxM # exp = tf.Print(exp, [tf.shape(exp)]) vecplus = (Z[None, :, :, None] / lengthscales2[None, None, :, None] + tf.matrix_solve(Xcov, Xmu[:, :, None])[:, None, :, :]) # NxMxDx1 mean = tf.cholesky_solve( tcgm, tf.matmul(tf.tile(Xcov[:, None, :, :], [1, M, 1, 1]), vecplus)) mean = mean[:, :, :, 0] * lengthscales2[None, None, :] # NxMxD a = tf.matmul(tf.tile(Z[None, :, :], [N, 1, 1]), mean * exp[:, :, None] * det[:, None, None] * const, transpose_b=True) return a + tf.transpose(a, [0, 2, 1])
def quad_eKzx1Kxz2(self, Ka, Kb, Z, Xmu, Xcov): # Quadrature for Cov[(Kzx1 - eKzx1)(kxz2 - eKxz2)] self._check_quadrature() warnings.warn("gpflow.ekernels.Sum: Using numerical quadrature for kernel expectation cross terms.") Xmu, Z = self._slice(Xmu, Z) Xcov = self._slice_cov(Xcov) N, M, HpowD = tf.shape(Xmu)[0], tf.shape(Z)[0], self.num_gauss_hermite_points ** self.input_dim xn, wn = mvhermgauss(self.num_gauss_hermite_points, self.input_dim) # transform points based on Gaussian parameters cholXcov = tf.cholesky(Xcov) # NxDxD Xt = tf.matmul(cholXcov, tf.tile(xn[None, :, :], (N, 1, 1)), transpose_b=True) # NxDxH**D X = 2.0 ** 0.5 * Xt + tf.expand_dims(Xmu, 2) # NxDxH**D Xr = tf.reshape(tf.transpose(X, [2, 0, 1]), (-1, self.input_dim)) # (H**D*N)xD cKa, cKb = [tf.reshape( k.K(tf.reshape(Xr, (-1, self.input_dim)), Z, presliced=False), (HpowD, N, M) ) - k.eKxz(Z, Xmu, Xcov)[None, :, :] for k in (Ka, Kb)] # Centred Kxz eKa, eKb = Ka.eKxz(Z, Xmu, Xcov), Kb.eKxz(Z, Xmu, Xcov) wr = wn * nppi ** (-self.input_dim * 0.5) cc = tf.reduce_sum(cKa[:, :, None, :] * cKb[:, :, :, None] * wr[:, None, None, None], 0) cm = eKa[:, None, :] * eKb[:, :, None] return cc + tf.transpose(cc, [0, 2, 1]) + cm + tf.transpose(cm, [0, 2, 1])
def base_conditional(Kmn, Kmm, Knn, f, *, full_cov=False, q_sqrt=None, white=False): # compute kernel stuff num_func = tf.shape(f)[1] # K Lm = tf.cholesky(Kmm) # Compute the projection matrix A A = tf.matrix_triangular_solve(Lm, Kmn, lower=True) # compute the covariance due to the conditioning if full_cov: fvar = Knn - tf.matmul(A, A, transpose_a=True) shape = tf.stack([num_func, 1, 1]) else: fvar = Knn - tf.reduce_sum(tf.square(A), 0) shape = tf.stack([num_func, 1]) fvar = tf.tile(tf.expand_dims(fvar, 0), shape) # K x N x N or K x N # another backsubstitution in the unwhitened case if not white: A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False) # construct the conditional mean fmean = tf.matmul(A, f, transpose_a=True) if q_sqrt is not None: if q_sqrt.get_shape().ndims == 2: LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2) # K x M x N elif q_sqrt.get_shape().ndims == 3: L = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0) # K x M x M A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([num_func, 1, 1])) LTA = tf.matmul(L, A_tiled, transpose_a=True) # K x M x N else: # pragma: no cover raise ValueError("Bad dimension for q_sqrt: %s" % str(q_sqrt.get_shape().ndims)) if full_cov: fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True) # K x N x N else: fvar = fvar + tf.reduce_sum(tf.square(LTA), 1) # K x N fvar = tf.transpose(fvar) # N x K or N x N x K return fmean, fvar
def sim_multitask_GP(times,length,noise_vars,K_f,trainfrac): """ draw from a multitask GP. we continue to assume for now that the dim of the input space is 1, ie just time M: number of tasks (labs/vitals/time series) train_frac: proportion of full M x N data matrix Y to include """ M = np.shape(K_f)[0] N = len(times) n = N*M K_t = OU_kernel_np(length,times) #just a correlation function Sigma = np.diag(noise_vars) K = np.kron(K_f,K_t) + np.kron(Sigma,np.eye(N)) + 1e-6*np.eye(n) L_K = np.linalg.cholesky(K) y = np.dot(L_K,np.random.normal(0,1,n)) #Draw normal #get indices of which time series and which time point, for each element in y ind_kf = np.tile(np.arange(M),(N,1)).flatten('F') #vec by column ind_kx = np.tile(np.arange(N),(M,1)).flatten() #randomly dropout some fraction of fully observed time series perm = np.random.permutation(n) n_train = int(trainfrac*n) train_inds = perm[:n_train] y_ = y[train_inds] ind_kf_ = ind_kf[train_inds] ind_kx_ = ind_kx[train_inds] return y_,ind_kf_,ind_kx_
def cholesky(kron_a): """Computes the Cholesky decomposition of a given Kronecker-factorized matrix. Args: kron_a: `TensorTrain` object containing a matrix of size N x N, factorized into a Kronecker product of square matrices (all tt-ranks are 1 and all tt-cores are square). All the cores must be symmetric positive-definite. Returns: `TensorTrain` object, containing a TT-matrix of size N x N. Raises: ValueError if the tt-cores of the provided matrix are not square, or the tt-ranks are not 1. """ if not _is_kron(kron_a): raise ValueError('The argument should be a Kronecker product ' '(tt-ranks should be 1)') shapes_defined = kron_a.get_shape().is_fully_defined() if shapes_defined: i_shapes = kron_a.get_raw_shape()[0] j_shapes = kron_a.get_raw_shape()[1] else: i_shapes = ops.raw_shape(kron_a)[0] j_shapes = ops.raw_shape(kron_a)[1] if shapes_defined: if i_shapes != j_shapes: raise ValueError('The argument should be a Kronecker product of square ' 'matrices (tt-cores must be square)') cho_cores = [] for core_idx in range(kron_a.ndims()): core = kron_a.tt_cores[core_idx] core_cho = tf.cholesky(core[0, :, :, 0]) cho_cores.append(tf.expand_dims(tf.expand_dims(core_cho, 0), -1)) res_ranks = kron_a.get_tt_ranks() res_shape = kron_a.get_raw_shape() return TensorTrain(cho_cores, res_shape, res_ranks)
def _build_entropy(self, weights, means, covars): # First build half a square matrix of normals. This avoids re-computing symmetric normals. log_normal_probs = util.init_list(0.0, [self.num_components, self.num_components]) for i in xrange(self.num_components): for j in xrange(i, self.num_components): for k in xrange(self.num_latent): if self.diag_post: normal = util.DiagNormal(means[i, k, :], covars[i, k, :] + covars[j, k, :]) else: if i == j: # Compute chol(2S) = sqrt(2)*chol(S). covars_sum = tf.sqrt(2.0) * covars[i, k, :, :] else: # TODO(karl): Can we just stay in cholesky space somehow? covars_sum = tf.cholesky(util.mat_square(covars[i, k, :, :]) + util.mat_square(covars[j, k, :, :])) normal = util.CholNormal(means[i, k, :], covars_sum) log_normal_probs[i][j] += normal.log_prob(means[j, k, :]) # Now compute the entropy. entropy = 0.0 for i in xrange(self.num_components): weighted_log_probs = util.init_list(0.0, [self.num_components]) for j in xrange(self.num_components): if i <= j: weighted_log_probs[j] = tf.log(weights[j]) + log_normal_probs[i][j] else: weighted_log_probs[j] = tf.log(weights[j]) + log_normal_probs[j][i] entropy -= weights[i] * util.logsumexp(tf.stack(weighted_log_probs)) return entropy
def test_Cholesky(self): t = tf.cholesky(np.array(3 * [8, 3, 3, 8]).reshape(3, 2, 2).astype("float32")) self.check(t)
def build_backward(self, Y): """ TensorFlow implementation of the inverse mapping """ L = tf.cholesky(tf.transpose(self.A)) XT = tf.cholesky_solve(L, tf.transpose(Y-self.b)) return tf.transpose(XT)
def _build_likelihood(self): """ Construct a tensorflow function to compute the bound on the marginal likelihood. """ num_inducing = tf.shape(self.Z)[0] psi0 = tf.reduce_sum(self.kern.eKdiag(self.X_mean, self.X_var), 0) psi1 = self.kern.eKxz(self.Z, self.X_mean, self.X_var) psi2 = tf.reduce_sum(self.kern.eKzxKxz(self.Z, self.X_mean, self.X_var), 0) Kuu = self.kern.K(self.Z) + tf.eye(num_inducing, dtype=settings.float_type) * settings.numerics.jitter_level L = tf.cholesky(Kuu) sigma2 = self.likelihood.variance sigma = tf.sqrt(sigma2) # Compute intermediate matrices A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma tmp = tf.matrix_triangular_solve(L, psi2, lower=True) AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2 B = AAT + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) log_det_B = 2. * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB))) c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma # KL[q(x) || p(x)] dX_var = self.X_var if len(self.X_var.get_shape()) == 2 else tf.matrix_diag_part(self.X_var) NQ = tf.cast(tf.size(self.X_mean), settings.float_type) D = tf.cast(tf.shape(self.Y)[1], settings.float_type) KL = -0.5 * tf.reduce_sum(tf.log(dX_var)) \ + 0.5 * tf.reduce_sum(tf.log(self.X_prior_var)) \ - 0.5 * NQ \ + 0.5 * tf.reduce_sum((tf.square(self.X_mean - self.X_prior_mean) + dX_var) / self.X_prior_var) # compute log marginal bound ND = tf.cast(tf.size(self.Y), settings.float_type) bound = -0.5 * ND * tf.log(2 * np.pi * sigma2) bound += -0.5 * D * log_det_B bound += -0.5 * tf.reduce_sum(tf.square(self.Y)) / sigma2 bound += 0.5 * tf.reduce_sum(tf.square(c)) bound += -0.5 * D * (tf.reduce_sum(psi0) / sigma2 - tf.reduce_sum(tf.matrix_diag_part(AAT))) bound -= KL return bound
def likelihood(self, hyp, X_batch, y_batch, monitor=False): M = self.M Z = self.Z m = self.m S = self.S jitter = self.jitter jitter_cov = self.jitter_cov N = tf.shape(X_batch)[0] logsigma_n = hyp[-1] sigma_n = tf.exp(logsigma_n) # Compute K_u_inv K_u = kernel_tf(Z, Z, hyp[:-1]) L = tf.cholesky(K_u + np.eye(M)*jitter_cov) K_u_inv = tf.matrix_triangular_solve(tf.transpose(L), tf.matrix_triangular_solve(L, np.eye(M), lower=True), lower=False) K_u_inv_op = self.K_u_inv.assign(K_u_inv) # Compute mu psi = kernel_tf(Z, X_batch, hyp[:-1]) K_u_inv_m = tf.matmul(K_u_inv, m) MU = tf.matmul(tf.transpose(psi), K_u_inv_m) # Compute cov Alpha = tf.matmul(K_u_inv, psi) COV = kernel_tf(X_batch, X_batch, hyp[:-1]) - tf.matmul(tf.transpose(psi), tf.matmul(K_u_inv,psi)) + \ tf.matmul(tf.transpose(Alpha), tf.matmul(S,Alpha)) # Compute COV_inv LL = tf.cholesky(COV + tf.eye(N, dtype=tf.float64)*sigma_n + tf.eye(N, dtype=tf.float64)*jitter) COV_inv = tf.matrix_triangular_solve(tf.transpose(LL), tf.matrix_triangular_solve(LL, tf.eye(N, dtype=tf.float64), lower=True), lower=False) # Compute cov(Z, X) cov_ZX = tf.matmul(S,Alpha) # Update m and S alpha = tf.matmul(COV_inv, tf.transpose(cov_ZX)) m_new = m + tf.matmul(cov_ZX, tf.matmul(COV_inv, y_batch-MU)) S_new = S - tf.matmul(cov_ZX, alpha) if monitor == False: m_op = self.m.assign(m_new) S_op = self.S.assign(S_new) # Compute NLML K_u_inv_m = tf.matmul(K_u_inv, m_new) NLML = 0.5*tf.matmul(tf.transpose(m_new), K_u_inv_m) + tf.reduce_sum(tf.log(tf.diag_part(L))) + 0.5*np.log(2.*np.pi)*tf.cast(M, tf.float64) train = self.optimizer.minimize(NLML) nlml_op = self.nlml.assign(NLML[0,0]) return tf.group(*[train, m_op, S_op, nlml_op, K_u_inv_op])
def _build_graph(self, raw_weights, raw_means, raw_covars, raw_inducing_inputs, train_inputs, train_outputs, num_train, test_inputs): # First transform all raw variables into their internal form. # Use softmax(raw_weights) to keep all weights normalized. weights = tf.exp(raw_weights) / tf.reduce_sum(tf.exp(raw_weights)) if self.diag_post: # Use exp(raw_covars) so as to guarantee the diagonal matrix remains positive definite. covars = tf.exp(raw_covars) else: # Use vec_to_tri(raw_covars) so as to only optimize over the lower triangular portion. # We note that we will always operate over the cholesky space internally. covars_list = [None] * self.num_components for i in xrange(self.num_components): mat = util.vec_to_tri(raw_covars[i, :, :]) diag_mat = tf.matrix_diag(tf.matrix_diag_part(mat)) exp_diag_mat = tf.matrix_diag(tf.exp(tf.matrix_diag_part(mat))) covars_list[i] = mat - diag_mat + exp_diag_mat covars = tf.stack(covars_list, 0) # Both inducing inputs and the posterior means can vary freely so don't change them. means = raw_means inducing_inputs = raw_inducing_inputs # Build the matrices of covariances between inducing inputs. kernel_mat = [self.kernels[i].kernel(inducing_inputs[i, :, :]) for i in xrange(self.num_latent)] kernel_chol = tf.stack([tf.cholesky(k) for k in kernel_mat], 0) # Now build the objective function. entropy = self._build_entropy(weights, means, covars) cross_ent = self._build_cross_ent(weights, means, covars, kernel_chol) ell = self._build_ell(weights, means, covars, inducing_inputs, kernel_chol, train_inputs, train_outputs) batch_size = tf.to_float(tf.shape(train_inputs)[0]) nelbo = -((batch_size / num_train) * (entropy + cross_ent) + ell) # Build the leave one out loss function. loo_loss = self._build_loo_loss(weights, means, covars, inducing_inputs, kernel_chol, train_inputs, train_outputs) # Finally, build the prediction function. predictions = self._build_predict(weights, means, covars, inducing_inputs, kernel_chol, test_inputs) return nelbo, loo_loss, predictions