我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.diag()。
def _symmetric_matrix_square_root(mat, eps=1e-10): """Compute square root of a symmetric matrix. Note that this is different from an elementwise square root. We want to compute M' where M' = sqrt(mat) such that M' * M' = mat. Also note that this method **only** works for symmetric matrices. Args: mat: Matrix to take the square root of. eps: Small epsilon such that any element less than eps will not be square rooted to guard against numerical instability. Returns: Matrix square root of mat. """ # Unlike numpy, tensorflow's return order is (s, u, v) s, u, v = tf.svd(mat) # sqrt is unstable around 0, just use 0 in such case si = tf.where(tf.less(s, eps), s, tf.sqrt(s)) # Note that the v returned by Tensorflow is v = V # (when referencing the equation A = U S V^T) # This is unlike Numpy which returns v = V^T return tf.matmul( tf.matmul(u, tf.diag(si)), v, transpose_b=True)
def transition(h): # compute A,B,o linearization matrices with tf.variable_scope("trans"): for l in range(2): h = ReLU(h, 100, "aggregate_loss" + str(l)) with tf.variable_scope("A"): v, r = tf.split(1, 2, linear(h, z_dim * 2)) v1 = tf.expand_dims(v, -1) # (batch, z_dim, 1) rT = tf.expand_dims(r, 1) # batch, 1, z_dim I = tf.diag([1.] * z_dim) A = ( I + tf.batch_matmul(v1, rT) ) # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted) with tf.variable_scope("B"): B = linear(h, z_dim * u_dim) B = tf.reshape(B, [-1, z_dim, u_dim]) with tf.variable_scope("o"): o = linear(h, z_dim) return A, B, o, v, r
def transition(h,share=None): # compute A,B,o linearization matrices with tf.variable_scope("trans",reuse=share): for l in range(2): h=ReLU(h,100,"aggregate_loss"+str(l)) with tf.variable_scope("A"): v,r=tf.split(1,2,linear(h,z_dim*2)) v1=tf.expand_dims(v,-1) # (batch, z_dim, 1) rT=tf.expand_dims(r,1) # batch, 1, z_dim I=tf.diag([1.]*z_dim) A=(I+tf.batch_matmul(v1,rT)) # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted) with tf.variable_scope("B"): B=linear(h,z_dim*u_dim) B=tf.reshape(B,[-1,z_dim,u_dim]) with tf.variable_scope("o"): o=linear(h,z_dim) return A,B,o,v,r
def prepare(self): num_latent = 2 num_data = 3 k = gpflow.kernels.Matern32(1) + gpflow.kernels.White(1) k.white.variance = 0.01 X = tf.placeholder(settings.float_type) mu = tf.placeholder(settings.float_type) Xs = tf.placeholder(settings.float_type) sqrt = tf.placeholder(settings.float_type, shape=[num_data, num_latent]) rng = np.random.RandomState(0) X_data = rng.randn(num_data, 1) mu_data = rng.randn(num_data, num_latent) sqrt_data = rng.randn(num_data, num_latent) Xs_data = rng.randn(50, 1) feed_dict = {X: X_data, Xs: Xs_data, mu: mu_data, sqrt: sqrt_data} k.compile() #the chols are diagonal matrices, with the same entries as the diag representation. chol = tf.stack([tf.diag(sqrt[:, i]) for i in range(num_latent)]) chol = tf.transpose(chol, perm=[1, 2, 0]) return Xs, X, k, mu, sqrt, chol, feed_dict
def setUp(self): with self.test_session(): N = 4 M = 5 self.mu = tf.placeholder(settings.float_type, [M, N]) self.sqrt = tf.placeholder(settings.float_type, [M, N]) self.K = tf.placeholder(settings.float_type, [M, M]) self.rng = np.random.RandomState(0) self.mu_data = self.rng.randn(M, N) self.sqrt_data = self.rng.randn(M, N) Ksqrt = self.rng.randn(M, M) self.K_data = squareT(Ksqrt) + 1e-6 * np.eye(M) self.feed_dict = { self.mu: self.mu_data, self.sqrt: self.sqrt_data, self.K: self.K_data, } # the chols are diagonal matrices, with the same entries as the diag representation. self.chol = tf.stack([tf.diag(self.sqrt[:, i]) for i in range(N)]) self.chol = tf.transpose(self.chol, perm=[1, 2, 0])
def setUp(self): with self.test_session(): N = 4 M = 5 self.mu = tf.placeholder(settings.float_type, [M, N]) self.sqrt = tf.placeholder(settings.float_type, [M, N]) self.chol = tf.placeholder(settings.float_type, [M, M, N]) self.K = tf.placeholder(settings.float_type, [M, M]) self.Kdiag = tf.placeholder(settings.float_type, [M, M]) self.rng = np.random.RandomState(0) self.mu_data = self.rng.randn(M, N) sqrt_diag = self.rng.randn(M) self.sqrt_data = np.array([sqrt_diag for _ in range(N)]).T sqrt_chol = np.tril(self.rng.randn(M, M)) self.chol_data = np.rollaxis(np.array([sqrt_chol for _ in range(N)]), 0, 3) self.feed_dict = { self.mu: np.zeros((M, N)), self.sqrt: self.sqrt_data, self.chol: self.chol_data, self.K: squareT(sqrt_chol), self.Kdiag: np.diag(sqrt_diag ** 2), }
def _covariance(x, diag): """Defines the covariance operation of a matrix. Args: x: a matrix Tensor. Dimension 0 should contain the number of examples. diag: if True, it computes the diagonal covariance. Returns: A Tensor representing the covariance of x. In the case of diagonal matrix just the diagonal is returned. """ num_points = tf.to_float(tf.shape(x)[0]) x -= tf.reduce_mean(x, 0, keep_dims=True) if diag: cov = tf.reduce_sum( tf.square(x), 0, keep_dims=True) / (num_points - 1) else: cov = tf.matmul(x, x, transpose_a=True) / (num_points - 1) return cov
def covar_loss(self, top_states): """""" n_dims = len(top_states.get_shape().as_list()) hidden_size = top_states.get_shape().as_list()[-1] n_tokens = tf.to_float(self.n_tokens) I = tf.diag(tf.ones([hidden_size])) if n_dims == 3: top_states = top_states * self.tokens_to_keep3D n_tokens = self.n_tokens elif n_dims == 4: top_states = top_states * tf.expand_dims(self.tokens_to_keep3D, 1) * tf.expand_dims(self.tokens_to_keep3D, 2) n_tokens = self.n_tokens**2 top_states = tf.reshape(top_states * self.tokens_to_keep3D, [-1, hidden_size]) means = tf.reduce_sum(top_states, 0, keep_dims=True) / n_tokens centered_states = top_states - means covar_mat = tf.matmul(centered_states, centered_states, transpose_a=True) / n_tokens off_diag_covar_mat = covar_mat * (1-I) return tf.nn.l2_loss(off_diag_covar_mat) #=============================================================
def update_scipy_svd(self): sess = u.get_default_session() target0 = sess.run(self.target) # A=u.diag(s).v', singular vectors are columns # TODO: catch "ValueError: array must not contain infs or NaNs" try: u0, s0, vt0 = linalg.svd(target0) v0 = vt0.T except Exception as e: print("Got error %s"%(repr(e),)) if DUMP_BAD_SVD: dump32(target0, "badsvd") print("gesdd failed, trying gesvd") u0, s0, vt0 = linalg.svd(target0, lapack_driver="gesvd") v0 = vt0.T feed_dict = {self.holder.u: u0, self.holder.v: v0, self.holder.s: s0} sess.run(self.update_external_op, feed_dict=feed_dict)
def transition(h): # compute A,B,o linearization matrices with tf.variable_scope("trans"): for l in range(2): h=ReLU(h,100,"l"+str(l)) with tf.variable_scope("A"): v,r=tf.split(1,2,linear(h,z_dim*2)) v1=tf.expand_dims(v,-1) # (batch, z_dim, 1) rT=tf.expand_dims(r,1) # batch, 1, z_dim I=tf.diag([1.]*z_dim) A=(I+tf.batch_matmul(v1,rT)) # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted) with tf.variable_scope("B"): B=linear(h,z_dim*u_dim) B=tf.reshape(B,[-1,z_dim,u_dim]) with tf.variable_scope("o"): o=linear(h,z_dim) return A,B,o,v,r
def transition(h,share=None): # compute A,B,o linearization matrices with tf.variable_scope("trans",reuse=share): for l in range(2): h=ReLU(h,100,"l"+str(l)) with tf.variable_scope("A"): v,r=tf.split(1,2,linear(h,z_dim*2)) v1=tf.expand_dims(v,-1) # (batch, z_dim, 1) rT=tf.expand_dims(r,1) # batch, 1, z_dim I=tf.diag([1.]*z_dim) A=(I+tf.batch_matmul(v1,rT)) # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted) with tf.variable_scope("B"): B=linear(h,z_dim*u_dim) B=tf.reshape(B,[-1,z_dim,u_dim]) with tf.variable_scope("o"): o=linear(h,z_dim) return A,B,o,v,r
def _build_cross_ent(self, weights, means, covars, kernel_chol): cross_ent = 0.0 for i in xrange(self.num_components): sum_val = 0.0 for j in xrange(self.num_latent): if self.diag_post: # TODO(karl): this is a bit inefficient since we're not making use of the fact # that covars is diagonal. A solution most likely involves a custom tf op. trace = tf.trace(tf.cholesky_solve(kernel_chol[j, :, :], tf.diag(covars[i, j, :]))) else: trace = tf.reduce_sum(util.diag_mul( tf.cholesky_solve(kernel_chol[j, :, :], covars[i, j, :, :]), tf.transpose(covars[i, j, :, :]))) sum_val += (util.CholNormal(means[i, j, :], kernel_chol[j, :, :]).log_prob(0.0) - 0.5 * trace) cross_ent += weights[i] * sum_val return cross_ent
def build_decoder(self): """Inference Network. p(X|h)""" with tf.variable_scope("decoder"): R = tf.get_variable("R", [self.reader.vocab_size, self.h_dim]) b = tf.get_variable("b", [self.reader.vocab_size]) x_i = tf.diag([1.]*self.reader.vocab_size) e = -tf.matmul(tf.matmul(self.h, R, transpose_b=True), x_i) + b self.p_x_i = tf.squeeze(tf.nn.softmax(e))
def test_whiten(self): """ make sure that predicting using the whitened representation is the sameas the non-whitened one. """ with self.test_context() as sess: rng = np.random.RandomState(0) Xs, X, F, k, num_data, feed_dict = self.prepare() k.compile(session=sess) F_sqrt = tf.placeholder(settings.float_type, [num_data, 1]) F_sqrt_data = rng.rand(num_data, 1) feed_dict[F_sqrt] = F_sqrt_data K = k.K(X) L = tf.cholesky(K) V = tf.matrix_triangular_solve(L, F, lower=True) V_chol = tf.matrix_triangular_solve(L, tf.diag(F_sqrt[:, 0]), lower=True) V_sqrt = tf.expand_dims(V_chol, 2) Fstar_mean, Fstar_var = gpflow.conditionals.conditional( Xs, X, k, F, q_sqrt=F_sqrt) Fstar_w_mean, Fstar_w_var = gpflow.conditionals.conditional( Xs, X, k, V, q_sqrt=V_sqrt, white=True) mean_difference = sess.run(Fstar_w_mean - Fstar_mean, feed_dict=feed_dict) var_difference = sess.run(Fstar_w_var - Fstar_var, feed_dict=feed_dict) assert_allclose(mean_difference, 0, atol=4) assert_allclose(var_difference, 0, atol=4)
def _define_distance_to_clusters(self, data): """Defines the Mahalanobis distance to the assigned Gaussian.""" # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input - # mean) from log probability function. self._all_scores = [] for shard in data: all_scores = [] shard = tf.expand_dims(shard, 0) for c in xrange(self._num_classes): if self._covariance_type == FULL_COVARIANCE: cov = self._covs[c, :, :] elif self._covariance_type == DIAG_COVARIANCE: cov = tf.diag(self._covs[c, :]) inverse = tf.matrix_inverse(cov + self._min_var) inv_cov = tf.tile( tf.expand_dims(inverse, 0), tf.pack([self._num_examples, 1, 1])) diff = tf.transpose(shard - self._means[c, :, :], perm=[1, 0, 2]) m_left = tf.batch_matmul(diff, inv_cov) all_scores.append(tf.sqrt(tf.batch_matmul( m_left, tf.transpose(diff, perm=[0, 2, 1]) ))) self._all_scores.append(tf.reshape( tf.concat(1, all_scores), tf.pack([self._num_examples, self._num_classes]))) # Distance to the associated class. self._all_scores = tf.concat(0, self._all_scores) assignments = tf.concat(0, self.assignments()) rows = tf.to_int64(tf.range(0, self._num_examples)) indices = tf.concat(1, [tf.expand_dims(rows, 1), tf.expand_dims(assignments, 1)]) self._scores = tf.gather_nd(self._all_scores, indices)
def _define_distance_to_clusters(self, data): """Defines the Mahalanobis distance to the assigned Gaussian.""" # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input - # mean) from log probability function. self._all_scores = [] for shard in data: all_scores = [] shard = tf.expand_dims(shard, 0) for c in xrange(self._num_classes): if self._covariance_type == FULL_COVARIANCE: cov = self._covs[c, :, :] elif self._covariance_type == DIAG_COVARIANCE: cov = tf.diag(self._covs[c, :]) inverse = tf.matrix_inverse(cov + self._min_var) inv_cov = tf.tile( tf.expand_dims(inverse, 0), tf.stack([self._num_examples, 1, 1])) diff = tf.transpose(shard - self._means[c, :, :], perm=[1, 0, 2]) m_left = tf.batch_matmul(diff, inv_cov) all_scores.append(tf.sqrt(tf.batch_matmul( m_left, tf.transpose(diff, perm=[0, 2, 1]) ))) self._all_scores.append( tf.reshape( tf.concat(1, all_scores), tf.stack([self._num_examples, self._num_classes]))) # Distance to the associated class. self._all_scores = tf.concat(0, self._all_scores) assignments = tf.concat(0, self.assignments()) rows = tf.to_int64(tf.range(0, self._num_examples)) indices = tf.concat(1, [tf.expand_dims(rows, 1), tf.expand_dims(assignments, 1)]) self._scores = tf.gather_nd(self._all_scores, indices)
def build_model(self): batch_size, input_noise_size, seq_size, vocab_size = \ self.batch_size, self.input_noise_size, \ self.seq_size, self.vocab_size embedding = tf.diag(np.ones((vocab_size, ), dtype=np.float32)) self.embedding = embedding input_noise = tf.placeholder(tf.float32, [batch_size, input_noise_size]) input_noise_one_sent = tf.placeholder(tf.float32, [1, input_noise_size]) self.input_noise = input_noise self.input_noise_one_sent = input_noise_one_sent real_sent = tf.placeholder(tf.int32, [batch_size, seq_size]) input_sentence = tf.nn.embedding_lookup(embedding, real_sent) self.real_sent = real_sent _, gen_vars = self.build_generator(input_noise, is_train = True) generated_sent, _ = self.build_generator(input_noise, reuse = True) sent_generator, _ = self.build_generator(input_noise_one_sent, reuse = True) self.gen_vars = gen_vars self.generated_sent = generated_sent self.sent_generator = sent_generator _, disc_vars = self.build_discriminator(input_sentence, is_train = True) desc_decision_fake, _ = self.build_discriminator(generated_sent, reuse = True) disc_decision_real, _ = self.build_discriminator(input_sentence, reuse = True) self.disc_vars = disc_vars self.desc_decision_fake = desc_decision_fake self.disc_decision_real = disc_decision_real self.gen_cost = 1. - desc_decision_fake self.disc_cost = 1. - disc_decision_real*(1. - desc_decision_fake)
def compute_loss_reg(self, sim_reg_mat, offset_label): sim_score_mat, p_reg_mat, l_reg_mat = tf.split(2, 3, sim_reg_mat) sim_score_mat = tf.reshape(sim_score_mat, [self.batch_size, self.batch_size]) l_reg_mat = tf.reshape(l_reg_mat, [self.batch_size, self.batch_size]) p_reg_mat = tf.reshape(p_reg_mat, [self.batch_size, self.batch_size]) # unit matrix with -2 I_2 = tf.diag(tf.constant(-2.0, shape=[self.batch_size])) all1 = tf.constant(1.0, shape=[self.batch_size, self.batch_size]) # | -1 1 1... | # mask_mat = | 1 -1 -1... | # | 1 1 -1 ... | mask_mat = tf.add(I_2, all1) # loss cls, not considering iou I = tf.diag(tf.constant(1.0, shape=[self.batch_size])) I_half = tf.diag(tf.constant(0.5, shape=[self.batch_size])) batch_para_mat = tf.constant(self.alpha, shape=[self.batch_size, self.batch_size]) para_mat = tf.add(I,batch_para_mat) loss_mat = tf.log(tf.add(all1, tf.exp(tf.mul(mask_mat, sim_score_mat)))) loss_mat = tf.mul(loss_mat, para_mat) loss_align = tf.reduce_mean(loss_mat) # regression loss l_reg_diag = tf.matmul(tf.mul(l_reg_mat, I), tf.constant(1.0, shape=[self.batch_size, 1])) p_reg_diag = tf.matmul(tf.mul(p_reg_mat, I), tf.constant(1.0, shape=[self.batch_size, 1])) offset_pred = tf.concat(1, (p_reg_diag, l_reg_diag)) loss_reg = tf.reduce_mean(tf.abs(tf.sub(offset_pred, offset_label))) loss=tf.add(tf.mul(self.lambda_regression, loss_reg), loss_align) return loss, offset_pred, loss_reg
def pseudo_inverse(mat, eps=1e-10): """Computes pseudo-inverse of mat, treating eigenvalues below eps as 0.""" s, u, v = tf.svd(mat) eps = 1e-10 # zero threshold for eigenvalues si = tf.where(tf.less(s, eps), s, 1./s) return u @ tf.diag(si) @ tf.transpose(v)
def symsqrt(mat, eps=1e-7): """Symmetric square root.""" s, u, v = tf.svd(mat) # sqrt is unstable around 0, just use 0 in such case print("Warning, cutting off at eps") si = tf.where(tf.less(s, eps), s, tf.sqrt(s)) return u @ tf.diag(si) @ tf.transpose(v)
def pseudo_inverse_sqrt(mat, eps=1e-7): """half pseduo-inverse""" s, u, v = tf.svd(mat) # zero threshold for eigenvalues si = tf.where(tf.less(s, eps), s, 1./tf.sqrt(s)) return u @ tf.diag(si) @ tf.transpose(v)
def pseudo_inverse_sqrt2(svd, eps=1e-7): """half pseduo-inverse, accepting existing values""" # zero threshold for eigenvalues if svd.__class__.__name__=='SvdTuple': (s, u, v) = (svd.s, svd.u, svd.v) elif svd.__class__.__name__=='SvdWrapper': (s, u, v) = (svd.s, svd.u, svd.v) else: assert False, "Unknown type" si = tf.where(tf.less(s, eps), s, 1./tf.sqrt(s)) return u @ tf.diag(si) @ tf.transpose(v)
def pseudo_inverse2(svd, eps=1e-7): """pseudo-inverse, accepting existing values""" # use float32 machine precision as cut-off (works for MKL) # https://www.wolframcloud.com/objects/927b2aa5-de9c-46f5-89fe-c4a58aa4c04b if svd.__class__.__name__=='SvdTuple': (s, u, v) = (svd.s, svd.u, svd.v) elif svd.__class__.__name__=='SvdWrapper': (s, u, v) = (svd.s, svd.u, svd.v) else: assert False, "Unknown type" max_eigen = tf.reduce_max(s) si = tf.where(s/max_eigen<eps, 0.*s, 1./s) return u @ tf.diag(si) @ tf.transpose(v)
def regularized_inverse2(svd, L=1e-3): """Regularized inverse, working from SVD""" if svd.__class__.__name__=='SvdTuple' or svd.__class__.__name__=='SvdWrapper': (s, u, v) = (svd.s, svd.u, svd.v) else: assert False, "Unknown type" max_eigen = tf.reduce_max(s) # max_eigen = tf.Print(max_eigen, [max_eigen], "max_eigen") #si = 1/(s + L*tf.ones_like(s)/max_eigen) si = 1/(s+L*tf.ones_like(s)) return u @ tf.diag(si) @ tf.transpose(v)
def regularized_inverse3(svd, L=1e-3): """Unbiased version of regularized_inverse2""" if svd.__class__.__name__=='SvdTuple' or svd.__class__.__name__=='SvdWrapper': (s, u, v) = (svd.s, svd.u, svd.v) else: assert False, "Unknown type" if L.__class__.__name__=='Var': L = L.var max_eigen = tf.reduce_max(s) # max_eigen = tf.Print(max_eigen, [max_eigen], "max_eigen") #si = 1/(s + L*tf.ones_like(s)/max_eigen) si = (1+L*tf.ones_like(s))/(s+L*tf.ones_like(s)) return u @ tf.diag(si) @ tf.transpose(v)
def regularized_inverse4(svd, L=1e-3): """Uses relative norm""" if svd.__class__.__name__=='SvdTuple' or svd.__class__.__name__=='SvdWrapper': (s, u, v) = (svd.s, svd.u, svd.v) else: assert False, "Unknown type" if L.__class__.__name__=='Var': L = L.var max_eigen = tf.reduce_max(s) L = L/max_eigen si = (1+L*tf.ones_like(s))/(s+L*tf.ones_like(s)) # si = tf.ones_like(s) return u @ tf.diag(si) @ tf.transpose(v)
def Identity(n, dtype=None, name=None): """Identity matrix of size n.""" if hasattr(n, "shape"): # got a Tensor nn = fix_shape(n.shape) assert nn[0] == nn[1] n = nn[0] if not dtype: dtype = default_dtype return tf.diag(tf.ones((n,), dtype=dtype), name=name)
def pseudo_inverse_stable(svd, eps=1e-7): """pseudo-inverse, accepting existing values""" # use float32 machine precision as cut-off (works for MKL) # https://www.wolframcloud.com/objects/927b2aa5-de9c-46f5-89fe-c4a58aa4c04b if svd.__class__.__name__=='SvdTuple': (s, u, v) = (svd.s, svd.u, svd.v) elif svd.__class__.__name__=='SvdWrapper': (s, u, v) = (svd.s, svd.u, svd.v) else: assert False, "Unknown type" max_eigen = tf.reduce_max(s) si = tf.where(s/max_eigen<eps, 0.*s, tf.pow(s, -0.9)) return u @ tf.diag(si) @ tf.transpose(v) # todo: rename l to L
def cachedGpuIdentityRegularizer(n, Lambda): global regularizer_cache n = int(n) if (n, Lambda) not in regularizer_cache: numpy_diag = Lambda*np.diag(np.ones([n])) numpy_diag = numpy_diag.astype(default_np_dtype) with tf.device("/gpu:0"): regularizer_cache[(n, Lambda)] = tf.constant(numpy_diag) return regularizer_cache[(n, Lambda)] # helper utilities