我们从Python开源项目中,提取了以下38个代码示例,用于说明如何使用tensorflow.matrix_diag_part()。
def top_K_loss(self,sentence,image,K=50,margin=0.4): sim_matrix = tf.matmul(sentence, image, transpose_b=True) s_square = tf.reduce_sum(tf.square(sentence), axis=1) im_square = tf.reduce_sum(tf.square(image), axis=1) d = tf.reshape(s_square,[-1,1]) - 2 * sim_matrix + tf.reshape(im_square, [1, -1]) positive = tf.stack([tf.matrix_diag_part(d)] * K, axis=1) length = tf.shape(d)[-1] d = tf.matrix_set_diag(d, 8 * tf.ones([length])) sen_loss_K ,_ = tf.nn.top_k(-1.0 * d, K, sorted=False) # note: this is negative value im_loss_K,_ = tf.nn.top_k(tf.transpose(-1.0 * d), K, sorted=False) # note: this is negative value sentence_center_loss = tf.nn.relu(positive + sen_loss_K + margin) image_center_loss = tf.nn.relu(positive + im_loss_K + margin) self.d_neg = tf.reduce_mean((sen_loss_K + im_loss_K)/-2.0) self.d_pos =tf.reduce_mean(positive) self.endpoint['debug/im_loss_topK'] = -1.0 * im_loss_K self.endpoint['debug/sen_loss_topK'] = -1.0 * sen_loss_K self.endpoint['debug/d_Matrix'] = d self.endpoint['debug/positive'] = positive self.endpoint['debug/s_center_loss'] = sentence_center_loss self.endpoint['debug/i_center_loss'] = image_center_loss self.endpoint['debug/S'] = sim_matrix self.endpoint['debug/sentence_square'] = s_square self.endpoint['debug/image_square'] = im_square return tf.reduce_sum(sentence_center_loss), tf.reduce_sum(image_center_loss)
def top_K_loss(self,sentence,image,K=50,margin=0.3): sim_matrix = tf.matmul(sentence, image, transpose_b=True) s_square = tf.reduce_sum(tf.square(sentence), axis=1) im_square = tf.reduce_sum(tf.square(image), axis=1) d = tf.reshape(s_square,[-1,1]) - 2 * sim_matrix + tf.reshape(im_square, [1, -1]) positive = tf.stack([tf.matrix_diag_part(d)] * K, axis=1) length = tf.shape(d)[-1] d = tf.matrix_set_diag(d, 8 * tf.ones([length])) sen_loss_K ,_ = tf.nn.top_k(-1.0 * d, K, sorted=False) # note: this is negative value im_loss_K,_ = tf.nn.top_k(tf.transpose(-1.0 * d), K, sorted=False) # note: this is negative value sentence_center_loss = tf.nn.relu(positive + sen_loss_K + margin) image_center_loss = tf.nn.relu(positive + im_loss_K + margin) self.d_neg = tf.reduce_mean((sen_loss_K + im_loss_K)/-2.0) self.d_pos =tf.reduce_mean(positive) self.endpoint['debug/im_loss_topK'] = -1.0 * im_loss_K self.endpoint['debug/sen_loss_topK'] = -1.0 * sen_loss_K self.endpoint['debug/d_Matrix'] = d self.endpoint['debug/positive'] = positive self.endpoint['debug/s_center_loss'] = sentence_center_loss self.endpoint['debug/i_center_loss'] = image_center_loss self.endpoint['debug/S'] = sim_matrix self.endpoint['debug/sentence_square'] = s_square self.endpoint['debug/image_square'] = im_square return tf.reduce_sum(sentence_center_loss), tf.reduce_sum(image_center_loss)
def top_K_loss_margin(self,sentence,image,K=50,margin=0.3): sim_matrix = tf.matmul(sentence, image, transpose_b=True) s_square = tf.reduce_sum(tf.square(sentence), axis=1) im_square = tf.reduce_sum(tf.square(image), axis=1) d = tf.reshape(s_square,[-1,1]) - 2 * sim_matrix + tf.reshape(im_square, [1, -1]) positive = tf.stack([tf.matrix_diag_part(d)] * K, axis=1) length = tf.shape(d)[-1] d = tf.matrix_set_diag(d, 8 * tf.ones([length])) flag =8-7*tf.sign(tf.nn.relu(self.sen_margin-self.sen_similarity)) sen_loss_K ,_ = tf.nn.top_k(-1.0 * d *flag, K, sorted=False) # note: this is negative value im_loss_K,_ = tf.nn.top_k(tf.transpose(-1.0 * d*flag)*tf.sign(tf.nn.relu(self.image_margin-self.im_similarity)), K, sorted=False) # note: this is negative value sentence_center_loss = tf.nn.relu(positive + sen_loss_K + margin) image_center_loss = tf.nn.relu(positive + im_loss_K + margin) self.d_neg = tf.reduce_mean((sen_loss_K + im_loss_K)/-2.0) self.d_pos =tf.reduce_mean(positive) self.endpoint['debug/im_loss_topK'] = -1.0 * im_loss_K self.endpoint['debug/sen_loss_topK'] = -1.0 * sen_loss_K self.endpoint['debug/d_Matrix'] = d self.endpoint['debug/positive'] = positive self.endpoint['debug/s_center_loss'] = sentence_center_loss self.endpoint['debug/i_center_loss'] = image_center_loss self.endpoint['debug/S'] = sim_matrix self.endpoint['debug/sentence_square'] = s_square self.endpoint['debug/image_square'] = im_square return tf.reduce_sum(sentence_center_loss), tf.reduce_sum(image_center_loss)
def top_K_loss(self,sentence,image,K=30,margin=0.3): sim_matrix = tf.matmul(sentence, image,transpose_b=True) bs = tf.shape(sim_matrix)[0] s_square = tf.reduce_sum(tf.square(sentence),axis=1) im_square =tf.reduce_sum(tf.square(image),axis=1) d = tf.reshape(s_square,[-1,1])-2*sim_matrix+tf.reshape(im_square,[1,-1]) positive = tf.stack([tf.matrix_diag_part(d)]*K,1) length = tf.shape(d)[-1] d = tf.matrix_set_diag(d, 100*tf.ones([length])) sen_loss_K ,_= tf.nn.top_k(-d,K,sorted=False) im_loss_K,_=tf.nn.top_k(tf.transpose(-d),K,sorted=False) sentence_center_loss = tf.nn.relu(sen_loss_K + positive +margin) image_center_loss = tf.nn.relu(im_loss_K + positive +margin) self.d_neg =tf.reduce_mean(-sen_loss_K-im_loss_K)/2 self.d_pos = tf.reduce_mean(positive) self.endpoint['debug/sentence_center_loss']=sentence_center_loss self.endpoint['debug/image_center_loss']=image_center_loss self.endpoint['debug/sim_matrix']=sim_matrix self.endpoint['debug/sen_loss_K']=-sen_loss_K self.endpoint['debug/image_loss_K']=-im_loss_K self.endpoint['debug/distance']=d self.endpoint['debug/positive']=positive return tf.reduce_sum(sentence_center_loss),tf.reduce_sum(image_center_loss)
def top_K_loss_margin(self,sentence,image,K=50,margin=0.3): sim_matrix = tf.matmul(sentence, image, transpose_b=True) s_square = tf.reduce_sum(tf.square(sentence), axis=1) im_square = tf.reduce_sum(tf.square(image), axis=1) d = tf.reshape(s_square,[-1,1]) - 2 * sim_matrix + tf.reshape(im_square, [1, -1]) positive = tf.stack([tf.matrix_diag_part(d)] * K, axis=1) length = tf.shape(d)[-1] d = tf.matrix_set_diag(d, 8 * tf.ones([length])) flag =8-7*tf.sign(tf.nn.relu(self.sen_margin-self.sen_similarity)) sen_loss_K ,_ = tf.nn.top_k(-1.0 * d *flag, K, sorted=False) # note: this is negative value im_loss_K,_ = tf.nn.top_k(tf.transpose(-1.0 * d*flag), K, sorted=False) # note: this is negative value sentence_center_loss = tf.nn.relu(positive + sen_loss_K + margin) image_center_loss = tf.nn.relu(positive + im_loss_K + margin) self.d_neg = tf.reduce_mean((sen_loss_K + im_loss_K)/-2.0) self.d_pos =tf.reduce_mean(positive) self.endpoint['debug/im_loss_topK'] = -1.0 * im_loss_K self.endpoint['debug/sen_loss_topK'] = -1.0 * sen_loss_K self.endpoint['debug/d_Matrix'] = d self.endpoint['debug/positive'] = positive self.endpoint['debug/s_center_loss'] = sentence_center_loss self.endpoint['debug/i_center_loss'] = image_center_loss self.endpoint['debug/S'] = sim_matrix self.endpoint['debug/sentence_square'] = s_square self.endpoint['debug/image_square'] = im_square return tf.reduce_sum(sentence_center_loss), tf.reduce_sum(image_center_loss)
def top_K_loss(self, sentence, image, K=30, margin=0.5): sim_matrix = tf.matmul(sentence, image, transpose_b=True) s_square = tf.reduce_sum(tf.square(sentence), axis=1) im_square = tf.reduce_sum(tf.square(image), axis=1) d = tf.reshape(s_square,[-1,1]) - 2 * sim_matrix + tf.reshape(im_square, [1, -1]) positive = tf.stack([tf.matrix_diag_part(d)] * K, axis=1) length = tf.shape(d)[-1] d = tf.matrix_set_diag(d, 8 * tf.ones([length])) sen_loss_K ,_ = tf.nn.top_k(-1.0 * d, K, sorted=False) # note: this is negative value im_loss_K,_ = tf.nn.top_k(tf.transpose(-1.0 * d), K, sorted=False) # note: this is negative value sentence_center_loss = tf.nn.relu(positive + sen_loss_K + margin) image_center_loss = tf.nn.relu(positive + im_loss_K + margin) self.d_neg = (sen_loss_K + im_loss_K)/-2.0 self.d_pos = positive self.endpoint['debug/im_loss_topK'] = -1.0 * im_loss_K self.endpoint['debug/sen_loss_topK'] = -1.0 * sen_loss_K self.endpoint['debug/d_Matrix'] = d self.endpoint['debug/positive'] = positive self.endpoint['debug/s_center_loss'] = sentence_center_loss self.endpoint['debug/i_center_loss'] = image_center_loss self.endpoint['debug/S'] = sim_matrix self.endpoint['debug/sentence_square'] = s_square self.endpoint['debug/image_square'] = im_square return tf.reduce_sum(sentence_center_loss), tf.reduce_sum(image_center_loss)
def top_K_loss(self,sentence,image,K=50,margin=0.1): sim_matrix = tf.matmul(sentence, image, transpose_b=True) s_square = tf.reduce_sum(tf.square(sentence), axis=1) im_square = tf.reduce_sum(tf.square(image), axis=1) d = tf.reshape(s_square,[-1,1]) - 2 * sim_matrix + tf.reshape(im_square, [1, -1]) positive = tf.stack([tf.matrix_diag_part(d)] * K, axis=1) length = tf.shape(d)[-1] d = tf.matrix_set_diag(d, 8 * tf.ones([length])) sen_loss_K ,_ = tf.nn.top_k(-1.0 * d, K, sorted=False) # note: this is negative value im_loss_K,_ = tf.nn.top_k(tf.transpose(-1.0 * d), K, sorted=False) # note: this is negative value sentence_center_loss = tf.nn.relu(positive + sen_loss_K + margin) image_center_loss = tf.nn.relu(positive + im_loss_K + margin) self.d_neg = tf.reduce_mean((sen_loss_K + im_loss_K)/-2.0) self.d_pos =tf.reduce_mean(positive) self.endpoint['debug/im_loss_topK'] = -1.0 * im_loss_K self.endpoint['debug/sen_loss_topK'] = -1.0 * sen_loss_K self.endpoint['debug/d_Matrix'] = d self.endpoint['debug/positive'] = positive self.endpoint['debug/s_center_loss'] = sentence_center_loss self.endpoint['debug/i_center_loss'] = image_center_loss self.endpoint['debug/S'] = sim_matrix self.endpoint['debug/sentence_square'] = s_square self.endpoint['debug/image_square'] = im_square return tf.reduce_sum(sentence_center_loss), tf.reduce_sum(image_center_loss)
def top_K_loss_margin(self,sentence,image,K=50,margin=0.2): sim_matrix = tf.matmul(sentence, image, transpose_b=True) s_square = tf.reduce_sum(tf.square(sentence), axis=1) im_square = tf.reduce_sum(tf.square(image), axis=1) d = 1-tf.sigmoid(sim_matrix) positive = tf.stack([tf.matrix_diag_part(d)] * K, axis=1) length = tf.shape(d)[-1] dd = tf.matrix_set_diag(d, 8 * tf.ones([length])) flag =8-7*tf.sign(tf.nn.relu(self.sen_margin-self.sen_similarity)) sen_loss_K ,_ = tf.nn.top_k(-1.0 * dd *flag, K, sorted=False) # note: this is negative value im_loss_K,_ = tf.nn.top_k(-tf.transpose(1.0 * dd*flag), K, sorted=False) # note: this is negative value sentence_center_loss = -tf.log(1-positive+1e-12)-tf.log(-sen_loss_K+1e-12) image_center_loss = -tf.log(1-positive+1e-12)-tf.log(-im_loss_K+1e-12) self.d_neg = tf.reduce_mean((sen_loss_K + im_loss_K)/-2.0) self.d_pos =tf.reduce_mean(positive) self.endpoint['debug/im_loss_topK'] = -1.0 * im_loss_K self.endpoint['debug/sen_loss_topK'] = -1.0 * sen_loss_K self.endpoint['debug/d_Matrix'] = d self.endpoint['debug/positive'] = positive self.endpoint['debug/s_center_loss'] = sentence_center_loss self.endpoint['debug/i_center_loss'] = image_center_loss self.endpoint['debug/S'] = sim_matrix self.endpoint['debug/sentence_square'] = s_square self.endpoint['debug/image_square'] = im_square return tf.reduce_sum(sentence_center_loss), tf.reduce_sum(image_center_loss)
def multivariate_normal(x, mu, L): """ L is the Cholesky decomposition of the covariance. x and mu are either vectors (ndim=1) or matrices. In the matrix case, we assume independence over the *columns*: the number of rows must match the size of L. """ d = x - mu alpha = tf.matrix_triangular_solve(L, d, lower=True) num_col = 1 if tf.rank(x) == 1 else tf.shape(x)[1] num_col = tf.cast(num_col, settings.float_type) num_dims = tf.cast(tf.shape(x)[0], settings.float_type) ret = - 0.5 * num_dims * num_col * np.log(2 * np.pi) ret += - num_col * tf.reduce_sum(tf.log(tf.matrix_diag_part(L))) ret += - 0.5 * tf.reduce_sum(tf.square(alpha)) return ret
def _define_full_covariance_probs(self, shard_id, shard): """Defines the full covariance probabilties per example in a class. Updates a matrix with dimension num_examples X num_classes. Args: shard_id: id of the current shard. shard: current data shard, 1 X num_examples X dimensions. """ diff = shard - self._means cholesky = tf.cholesky(self._covs + self._min_var) log_det_covs = 2.0 * tf.reduce_sum(tf.log(tf.matrix_diag_part(cholesky)), 1) x_mu_cov = tf.square( tf.matrix_triangular_solve( cholesky, tf.transpose( diff, perm=[0, 2, 1]), lower=True)) diag_m = tf.transpose(tf.reduce_sum(x_mu_cov, 1)) self._probs[shard_id] = -0.5 * ( diag_m + tf.to_float(self._dimensions) * tf.log(2 * np.pi) + log_det_covs)
def _chollogdet(L): """Log det of a cholesky, where L is (..., D, D).""" ldiag = pos(tf.matrix_diag_part(L)) # keep > 0, and no vanashing gradient logdet = 2. * tf.reduce_sum(tf.log(ldiag)) return logdet
def get_L_sym(self, L_vec_var): L = tf.reshape(L_vec_var, (-1, self._action_dim, self._action_dim)) return tf.matrix_band_part(L, -1, 0) - \ tf.matrix_diag(tf.matrix_diag_part(L)) + \ tf.matrix_diag(tf.exp(tf.matrix_diag_part(L)))
def get_e_A_sym(self, P_var, mu_var, policy_mu_var, policy_sigma_var): e_A_var1 = self.get_A_sym(P_var, mu_var, policy_mu_var) e_A_var2 = - 0.5 * tf.reduce_sum(tf.matrix_diag_part( tf.matmul(P_var, policy_sigma_var)), 1) #e_A_var2 = - 0.5 * tf.trace(tf.matmul(P_var, policy_sigma_var)) return e_A_var1 + e_A_var2
def softmax_topK_loss(self,sentence,image,K=50,margin=0.2): sim_matrix = [] self.sparse_loss = tf.reduce_sum(2-(tf.reduce_sum(tf.nn.top_k(sentence, k=20, sorted=False)[0],axis=1)+tf.reduce_sum(tf.nn.top_k(image, k=20, sorted=False)[0],axis=1))) with tf.device('cpu:0'): for i in range(self.batch_size): sim_matrix.append(tf.reduce_sum(tf.abs(sentence-image[i,:]),axis=1)) d=tf.stack(sim_matrix,axis=1) positive = tf.stack([tf.matrix_diag_part(d)] * K, axis=1) length = tf.shape(d)[-1] d = tf.matrix_set_diag(d, 8 * tf.ones([length])) sen_loss_K ,_ = tf.nn.top_k(-1.0 * d, K, sorted=False) # note: this is negative value im_loss_K,_ = tf.nn.top_k(tf.transpose(-1.0 * d), K, sorted=False) # note: this is negative value sentence_center_loss = tf.nn.relu(positive + sen_loss_K + margin) image_center_loss = tf.nn.relu(positive + im_loss_K + margin) self.d_neg = tf.reduce_mean((sen_loss_K + im_loss_K)/-2.0) self.d_pos =tf.reduce_mean(positive) self.endpoint['debug/im_loss_topK'] = -1.0 * im_loss_K self.endpoint['debug/sen_loss_topK'] = -1.0 * sen_loss_K self.endpoint['debug/d_Matrix'] = d self.endpoint['debug/positive'] = positive self.endpoint['debug/s_center_loss'] = sentence_center_loss self.endpoint['debug/i_center_loss'] = image_center_loss self.endpoint['debug/S'] = sim_matrix return tf.reduce_sum(sentence_center_loss), tf.reduce_sum(image_center_loss),self.sparse_loss
def top_K_loss(self, sentence, image, K=50, margin=0.3): # change: K=300, but i choose index 25 to 75 for training. # so, the real 'K' is 50 sim_matrix = tf.matmul(sentence, image, transpose_b=True) s_square = tf.reduce_sum(tf.square(sentence), axis=1) im_square = tf.reduce_sum(tf.square(image), axis=1) d = tf.reshape(s_square,[-1,1]) - 2 * sim_matrix + tf.reshape(im_square, [1, -1]) positive = tf.stack([tf.matrix_diag_part(d)] * K, axis=1) length = tf.shape(d)[-1] d = tf.matrix_set_diag(d, 8 * tf.ones([length])) sen_loss_K ,_ = tf.nn.top_k(-1.0 * d, K, sorted=False) # note: this is negative value im_loss_K,_ = tf.nn.top_k(tf.transpose(-1.0 * d), K, sorted=False) # note: this is negative value #sen_loss_K = sen_loss_K[:, 25:75] #im_loss_K = im_loss_K[:, 25:75] sentence_center_loss = tf.nn.relu(positive + sen_loss_K + margin) image_center_loss = tf.nn.relu(positive + im_loss_K + margin) self.d_neg = (sen_loss_K + im_loss_K)/-2.0 self.d_pos = positive self.endpoint['debug/im_distance_topK'] = -1.0 * im_loss_K self.endpoint['debug/sen_distance_topK'] = -1.0 * sen_loss_K self.endpoint['debug/d_Matrix'] = d self.endpoint['debug/positive'] = positive self.endpoint['debug/s_center_loss'] = sentence_center_loss self.endpoint['debug/i_center_loss'] = image_center_loss self.endpoint['debug/S'] = sim_matrix self.endpoint['debug/sentence_square'] = s_square self.endpoint['debug/image_square'] = im_square return tf.reduce_sum(sentence_center_loss),tf.reduce_sum(image_center_loss)
def top_K_loss(self, sentence, image, K=50, margin=0.5, img_input_feat=None, text_input_feat=None): sim_matrix = tf.matmul(sentence, image, transpose_b=True) s_square = tf.reduce_sum(tf.square(sentence), axis=1) im_square = tf.reduce_sum(tf.square(image), axis=1) d = tf.reshape(s_square,[-1,1]) - 2 * sim_matrix + tf.reshape(im_square, [1, -1]) positive = tf.stack([tf.matrix_diag_part(d)] * K, axis=1) length = tf.shape(d)[-1] d = tf.matrix_set_diag(d, 8 * tf.ones([length])) if img_input_feat is not None: img_input_norm1 = img_input_feat / tf.norm(img_input_feat, axis=-1, keep_dims=True) S_input_img = tf.matmul(img_input_norm1, img_input_norm1, transpose_b=True) img_coeff = 8 - 7 * tf.sign(tf.nn.relu(0.99 - S_input_img)) sen_loss_K ,_ = tf.nn.top_k(-1.0 * d * img_coeff, K, sorted=False) # note: this is negative value self.endpoint['debug/S_input_img'] = S_input_img self.endpoint['debug/img_coeff'] = img_coeff else: sen_loss_K ,_ = tf.nn.top_k(-1.0 * d, K, sorted=False) # note: this is negative value if text_input_feat is not None: text_input_norm1 = text_input_feat / (tf.norm(text_input_feat, axis=-1, keep_dims=True) + 1e-10) S_input_text = tf.matmul(text_input_norm1, text_input_norm1, transpose_b=True) text_coeff = 8 - 7 * tf.sign(tf.nn.relu(0.98 - S_input_text)) im_loss_K,_ = tf.nn.top_k(tf.transpose(-1.0 * d * text_coeff), K, sorted=False) self.endpoint['debug/S_input_text'] = S_input_text self.endpoint['debug/text_coeff'] = text_coeff else: im_loss_K,_ = tf.nn.top_k(tf.transpose(-1.0 * d), K, sorted=False) # note: this is negative value sentence_center_loss = tf.nn.relu(positive + sen_loss_K + margin) image_center_loss = tf.nn.relu(positive + im_loss_K + margin) self.d_neg = (sen_loss_K + im_loss_K)/-2.0 self.d_pos = positive self.endpoint['debug/im_loss_topK'] = -1.0 * im_loss_K self.endpoint['debug/sen_loss_topK'] = -1.0 * sen_loss_K self.endpoint['debug/d_Matrix'] = d self.endpoint['debug/positive'] = positive self.endpoint['debug/s_center_loss'] = sentence_center_loss self.endpoint['debug/i_center_loss'] = image_center_loss self.endpoint['debug/S'] = sim_matrix self.endpoint['debug/sentence_square'] = s_square self.endpoint['debug/image_square'] = im_square return tf.reduce_sum(sentence_center_loss), tf.reduce_sum(image_center_loss)
def _build_likelihood(self): """ q_alpha, q_lambda are variational parameters, size N x R This method computes the variational lower bound on the likelihood, which is: E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)] with q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) . """ K = self.kern.K(self.X) K_alpha = tf.matmul(K, self.q_alpha) f_mean = K_alpha + self.mean_function(self.X) # compute the variance for each of the outputs I = tf.tile(tf.expand_dims(tf.eye(self.num_data, dtype=settings.float_type), 0), [self.num_latent, 1, 1]) A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \ tf.expand_dims(tf.transpose(self.q_lambda), 2) * K L = tf.cholesky(A) Li = tf.matrix_triangular_solve(L, I) tmp = Li / tf.expand_dims(tf.transpose(self.q_lambda), 1) f_var = 1. / tf.square(self.q_lambda) - tf.transpose(tf.reduce_sum(tf.square(tmp), 1)) # some statistics about A are used in the KL A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.matrix_diag_part(L))) trAi = tf.reduce_sum(tf.square(Li)) KL = 0.5 * (A_logdet + trAi - self.num_data * self.num_latent + tf.reduce_sum(K_alpha * self.q_alpha)) v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y) return tf.reduce_sum(v_exp) - KL
def _build_likelihood(self): """ Construct a tensorflow function to compute the bound on the marginal likelihood. For a derivation of the terms in here, see the associated SGPR notebook. """ num_inducing = len(self.feature) num_data = tf.cast(tf.shape(self.Y)[0], settings.float_type) output_dim = tf.cast(tf.shape(self.Y)[1], settings.float_type) err = self.Y - self.mean_function(self.X) Kdiag = self.kern.Kdiag(self.X) Kuf = self.feature.Kuf(self.kern, self.X) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) L = tf.cholesky(Kuu) sigma = tf.sqrt(self.likelihood.variance) # Compute intermediate matrices A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma AAT = tf.matmul(A, A, transpose_b=True) B = AAT + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) Aerr = tf.matmul(A, err) c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma # compute log marginal bound bound = -0.5 * num_data * output_dim * np.log(2 * np.pi) bound += tf.negative(output_dim) * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB))) bound -= 0.5 * num_data * output_dim * tf.log(self.likelihood.variance) bound += -0.5 * tf.reduce_sum(tf.square(err)) / self.likelihood.variance bound += 0.5 * tf.reduce_sum(tf.square(c)) bound += -0.5 * output_dim * tf.reduce_sum(Kdiag) / self.likelihood.variance bound += 0.5 * output_dim * tf.reduce_sum(tf.matrix_diag_part(AAT)) return bound
def eKxz(self, Z, Xmu, Xcov): """ Also known as phi_1: <K_{x, Z}>_{q(x)}. :param Z: MxD inducing inputs :param Xmu: X mean (NxD) :param Xcov: NxDxD :return: NxM """ # use only active dimensions Xcov = self._slice_cov(Xcov) Z, Xmu = self._slice(Z, Xmu) D = tf.shape(Xmu)[1] if self.ARD: lengthscales = self.lengthscales else: lengthscales = tf.zeros((D,), dtype=settings.float_type) + self.lengthscales vec = tf.expand_dims(Xmu, 2) - tf.expand_dims(tf.transpose(Z), 0) # NxDxM chols = tf.cholesky(tf.expand_dims(tf.matrix_diag(lengthscales ** 2), 0) + Xcov) Lvec = tf.matrix_triangular_solve(chols, vec) q = tf.reduce_sum(Lvec ** 2, [1]) chol_diags = tf.matrix_diag_part(chols) # N x D half_log_dets = tf.reduce_sum(tf.log(chol_diags), 1) - tf.reduce_sum(tf.log(lengthscales)) # N, return self.variance * tf.exp(-0.5 * q - tf.expand_dims(half_log_dets, 1))
def test_MatrixDiagPart(self): if td._tf_version[:2] >= (0, 12): t = tf.matrix_diag_part(self.random(3, 4, 4, 5)) self.check(t)
def _build_likelihood(self): """ Construct a tensorflow function to compute the bound on the marginal likelihood. """ # FITC approximation to the log marginal likelihood is # log ( normal( y | mean, K_fitc ) ) # where K_fitc = Qff + diag( \nu ) # where Qff = Kfu Kuu^{-1} Kuf # with \nu_i = Kff_{i,i} - Qff_{i,i} + \sigma^2 # We need to compute the Mahalanobis term -0.5* err^T K_fitc^{-1} err # (summed over functions). # We need to deal with the matrix inverse term. # K_fitc^{-1} = ( Qff + \diag( \nu ) )^{-1} # = ( V^T V + \diag( \nu ) )^{-1} # Applying the Woodbury identity we obtain # = \diag( \nu^{-1} ) - \diag( \nu^{-1} ) V^T ( I + V \diag( \nu^{-1} ) V^T )^{-1) V \diag(\nu^{-1} ) # Let \beta = \diag( \nu^{-1} ) err # and let \alpha = V \beta # then Mahalanobis term = -0.5* ( \beta^T err - \alpha^T Solve( I + V \diag( \nu^{-1} ) V^T, alpha ) ) err, nu, Luu, L, alpha, beta, gamma = self._build_common_terms() mahalanobisTerm = -0.5 * tf.reduce_sum(tf.square(err) / tf.expand_dims(nu, 1)) \ + 0.5 * tf.reduce_sum(tf.square(gamma)) # We need to compute the log normalizing term -N/2 \log 2 pi - 0.5 \log \det( K_fitc ) # We need to deal with the log determinant term. # \log \det( K_fitc ) = \log \det( Qff + \diag( \nu ) ) # = \log \det( V^T V + \diag( \nu ) ) # Applying the determinant lemma we obtain # = \log [ \det \diag( \nu ) \det( I + V \diag( \nu^{-1} ) V^T ) ] # = \log [ \det \diag( \nu ) ] + \log [ \det( I + V \diag( \nu^{-1} ) V^T ) ] constantTerm = -0.5 * self.num_data * tf.log(tf.constant(2. * np.pi, settings.float_type)) logDeterminantTerm = -0.5 * tf.reduce_sum(tf.log(nu)) - tf.reduce_sum(tf.log(tf.matrix_diag_part(L))) logNormalizingTerm = constantTerm + logDeterminantTerm return mahalanobisTerm + logNormalizingTerm * self.num_latent
def _build_likelihood(self): """ Construct a tensorflow function to compute the bound on the marginal likelihood. """ num_inducing = tf.shape(self.Z)[0] psi0 = tf.reduce_sum(self.kern.eKdiag(self.X_mean, self.X_var), 0) psi1 = self.kern.eKxz(self.Z, self.X_mean, self.X_var) psi2 = tf.reduce_sum(self.kern.eKzxKxz(self.Z, self.X_mean, self.X_var), 0) Kuu = self.kern.K(self.Z) + tf.eye(num_inducing, dtype=settings.float_type) * settings.numerics.jitter_level L = tf.cholesky(Kuu) sigma2 = self.likelihood.variance sigma = tf.sqrt(sigma2) # Compute intermediate matrices A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma tmp = tf.matrix_triangular_solve(L, psi2, lower=True) AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2 B = AAT + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) log_det_B = 2. * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB))) c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma # KL[q(x) || p(x)] dX_var = self.X_var if len(self.X_var.get_shape()) == 2 else tf.matrix_diag_part(self.X_var) NQ = tf.cast(tf.size(self.X_mean), settings.float_type) D = tf.cast(tf.shape(self.Y)[1], settings.float_type) KL = -0.5 * tf.reduce_sum(tf.log(dX_var)) \ + 0.5 * tf.reduce_sum(tf.log(self.X_prior_var)) \ - 0.5 * NQ \ + 0.5 * tf.reduce_sum((tf.square(self.X_mean - self.X_prior_mean) + dX_var) / self.X_prior_var) # compute log marginal bound ND = tf.cast(tf.size(self.Y), settings.float_type) bound = -0.5 * ND * tf.log(2 * np.pi * sigma2) bound += -0.5 * D * log_det_B bound += -0.5 * tf.reduce_sum(tf.square(self.Y)) / sigma2 bound += 0.5 * tf.reduce_sum(tf.square(c)) bound += -0.5 * D * (tf.reduce_sum(psi0) / sigma2 - tf.reduce_sum(tf.matrix_diag_part(AAT))) bound -= KL return bound
def _build_graph(self, raw_weights, raw_means, raw_covars, raw_inducing_inputs, train_inputs, train_outputs, num_train, test_inputs): # First transform all raw variables into their internal form. # Use softmax(raw_weights) to keep all weights normalized. weights = tf.exp(raw_weights) / tf.reduce_sum(tf.exp(raw_weights)) if self.diag_post: # Use exp(raw_covars) so as to guarantee the diagonal matrix remains positive definite. covars = tf.exp(raw_covars) else: # Use vec_to_tri(raw_covars) so as to only optimize over the lower triangular portion. # We note that we will always operate over the cholesky space internally. covars_list = [None] * self.num_components for i in xrange(self.num_components): mat = util.vec_to_tri(raw_covars[i, :, :]) diag_mat = tf.matrix_diag(tf.matrix_diag_part(mat)) exp_diag_mat = tf.matrix_diag(tf.exp(tf.matrix_diag_part(mat))) covars_list[i] = mat - diag_mat + exp_diag_mat covars = tf.stack(covars_list, 0) # Both inducing inputs and the posterior means can vary freely so don't change them. means = raw_means inducing_inputs = raw_inducing_inputs # Build the matrices of covariances between inducing inputs. kernel_mat = [self.kernels[i].kernel(inducing_inputs[i, :, :]) for i in xrange(self.num_latent)] kernel_chol = tf.stack([tf.cholesky(k) for k in kernel_mat], 0) # Now build the objective function. entropy = self._build_entropy(weights, means, covars) cross_ent = self._build_cross_ent(weights, means, covars, kernel_chol) ell = self._build_ell(weights, means, covars, inducing_inputs, kernel_chol, train_inputs, train_outputs) batch_size = tf.to_float(tf.shape(train_inputs)[0]) nelbo = -((batch_size / num_train) * (entropy + cross_ent) + ell) # Build the leave one out loss function. loo_loss = self._build_loo_loss(weights, means, covars, inducing_inputs, kernel_chol, train_inputs, train_outputs) # Finally, build the prediction function. predictions = self._build_predict(weights, means, covars, inducing_inputs, kernel_chol, test_inputs) return nelbo, loo_loss, predictions