我们从Python开源项目中,提取了以下28个代码示例,用于说明如何使用tensorflow.diag_part()。
def decov_loss(xs, name='decov_loss'): """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf 'Reducing Overfitting In Deep Networks by Decorrelating Representation' Args: xs: 4-D `tensor` [batch_size, height, width, channels], input Returns: a `float` decov loss """ with tf.name_scope(name): x = tf.reshape(xs, [int(xs.get_shape()[0]), -1]) m = tf.reduce_mean(x, 0, True) z = tf.expand_dims(x - m, 2) corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0, 2, 1])), 0) corr_frob_sqr = tf.reduce_sum(tf.square(corr)) corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr))) loss = 0.5 * (corr_frob_sqr - corr_diag_sqr) return loss
def _mix_rbf_kernel(X, Y, sigmas, wts=None): if wts is None: wts = [1] * len(sigmas) XX = tf.matmul(X, X, transpose_b=True) XY = tf.matmul(X, Y, transpose_b=True) YY = tf.matmul(Y, Y, transpose_b=True) X_sqnorms = tf.diag_part(XX) Y_sqnorms = tf.diag_part(YY) r = lambda x: tf.expand_dims(x, 0) c = lambda x: tf.expand_dims(x, 1) K_XX, K_XY, K_YY = 0, 0, 0 for sigma, wt in zip(sigmas, wts): gamma = 1 / (2 * sigma**2) K_XX += wt * tf.exp(-gamma * (-2 * XX + c(X_sqnorms) + r(X_sqnorms))) K_XY += wt * tf.exp(-gamma * (-2 * XY + c(X_sqnorms) + r(Y_sqnorms))) K_YY += wt * tf.exp(-gamma * (-2 * YY + c(Y_sqnorms) + r(Y_sqnorms))) return K_XX, K_XY, K_YY, tf.reduce_sum(wts)
def decov_loss(xs): """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf 'Reducing Overfitting In Deep Networks by Decorrelating Representation' """ x = tf.reshape(xs, [int(xs.get_shape()[0]), -1]) m = tf.reduce_mean(x, 0, True) z = tf.expand_dims(x-m, 2) corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0) corr_frob_sqr = tf.reduce_sum(tf.square(corr)) corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr))) loss = 0.5*(corr_frob_sqr - corr_diag_sqr) return loss
def build_graph(self, goal, critic): self.ph_stc_diff_st =\ graph.Placeholder(np.float32, shape=(None, cfg.d), name="ph_stc_diff_st") s_diff_normalized = tf.nn.l2_normalize(self.ph_stc_diff_st.node, dim=1) cosine_similarity = tf.matmul(s_diff_normalized, goal.node, transpose_b=True) cosine_similarity = tf.diag_part(cosine_similarity) # manager's advantage (R-V): R = ri + cfg.wGAMMA * R; AdvM = R - ViM self.ph_discounted_reward =\ graph.Placeholder(np.float32, shape=(None,), name="ph_m_discounted_reward") advantage = self.ph_discounted_reward.node - critic.node manager_loss = tf.reduce_sum(advantage * cosine_similarity) return manager_loss
def test(self): with self.test_context() as session: for k in self.kernels: k.initialize(session=session, force=True) X = tf.placeholder(tf.float64, [30, self.dim]) rng = np.random.RandomState(1) X_data = rng.randn(30, self.dim) k1 = k.Kdiag(X) k2 = tf.diag_part(k.K(X)) k1, k2 = session.run([k1, k2], feed_dict={X: X_data}) self.assertTrue(np.allclose(k1, k2))
def compute_upper_bound(self): num_data = tf.cast(tf.shape(self.Y)[0], settings.float_type) Kdiag = self.kern.Kdiag(self.X) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) Kuf = self.feature.Kuf(self.kern, self.X) L = tf.cholesky(Kuu) LB = tf.cholesky(Kuu + self.likelihood.variance ** -1.0 * tf.matmul(Kuf, Kuf, transpose_b=True)) LinvKuf = tf.matrix_triangular_solve(L, Kuf, lower=True) # Using the Trace bound, from Titsias' presentation c = tf.reduce_sum(Kdiag) - tf.reduce_sum(LinvKuf ** 2.0) # Kff = self.kern.K(self.X) # Qff = tf.matmul(Kuf, LinvKuf, transpose_a=True) # Alternative bound on max eigenval: # c = tf.reduce_max(tf.reduce_sum(tf.abs(Kff - Qff), 0)) corrected_noise = self.likelihood.variance + c const = -0.5 * num_data * tf.log(2 * np.pi * self.likelihood.variance) logdet = tf.reduce_sum(tf.log(tf.diag_part(L))) - tf.reduce_sum(tf.log(tf.diag_part(LB))) LC = tf.cholesky(Kuu + corrected_noise ** -1.0 * tf.matmul(Kuf, Kuf, transpose_b=True)) v = tf.matrix_triangular_solve(LC, corrected_noise ** -1.0 * tf.matmul(Kuf, self.Y), lower=True) quad = -0.5 * corrected_noise ** -1.0 * tf.reduce_sum(self.Y ** 2.0) + 0.5 * tf.reduce_sum(v ** 2.0) return const + logdet + quad
def _create_variables(self, data, initial_means=None): """Initializes GMM algorithm. Args: data: a list of Tensors with data, each row is a new example. initial_means: a Tensor with a matrix of means. """ first_shard = data[0] # Initialize means: num_classes X 1 X dimensions. if initial_means is not None: self._means = tf.Variable(tf.expand_dims(initial_means, 1), name=self.CLUSTERS_VARIABLE, validate_shape=False, dtype=tf.float32) else: # Sample data randomly self._means = tf.Variable(tf.expand_dims( _init_clusters_random(data, self._num_classes, self._random_seed), 1), name=self.CLUSTERS_VARIABLE, validate_shape=False) # Initialize covariances. if self._covariance_type == FULL_COVARIANCE: cov = _covariance(first_shard, False) + self._min_var # A matrix per class, num_classes X dimensions X dimensions covs = tf.tile( tf.expand_dims(cov, 0), [self._num_classes, 1, 1]) elif self._covariance_type == DIAG_COVARIANCE: cov = _covariance(first_shard, True) + self._min_var # A diagonal per row, num_classes X dimensions. covs = tf.tile(tf.expand_dims(tf.diag_part(cov), 0), [self._num_classes, 1]) self._covs = tf.Variable(covs, name='clusters_covs', validate_shape=False) # Mixture weights, representing the probability that a randomly # selected unobservable data (in EM terms) was generated by component k. self._alpha = tf.Variable(tf.tile([1.0 / self._num_classes], [self._num_classes]))
def dot(self, X, Y, name='dot_op'): with tf.name_scope(name) as scope: dot_op = tf.diag_part(tf.matmul(X, Y, transpose_b=True)) return dot_op
def _assemble_graph(self): self._create_placeholders() tf.set_random_seed(self._random_seed + 1) A_var = tf.Variable( initial_value=tf.random_uniform( shape=[self._emb_dim, self._vocab_dim], minval=-1, maxval=1, seed=(self._random_seed + 2) ) ) B_var = tf.Variable( initial_value=tf.random_uniform( shape=[self._emb_dim, self._vocab_dim], minval=-1, maxval=1, seed=(self._random_seed + 3) ) ) self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step') cont_mult = tf.transpose(tf.matmul(A_var, tf.transpose(self.context_batch))) resp_mult = tf.matmul(B_var, tf.transpose(self.response_batch)) neg_resp_mult = tf.matmul(B_var, tf.transpose(self.neg_response_batch)) pos_raw_f = tf.diag_part(tf.matmul(cont_mult, resp_mult)) neg_raw_f = tf.diag_part(tf.matmul(cont_mult, neg_resp_mult)) self.f_pos = pos_raw_f self.f_neg = neg_raw_f self.loss = tf.reduce_sum(tf.nn.relu(self.f_neg - self.f_pos + self._margin))
def decov_loss(xs): """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf 'Reducing Overfitting In Deep Networks by Decorrelating Representation' """ x = tf.reshape(xs, [int(xs.get_shape()[0]), -1]) m = tf.reduce_mean(x, 0, True) z = tf.expand_dims(x-m, 2) corr = tf.reduce_mean(tf.batch_matmul(z, tf.transpose(z, perm=[0,2,1])), 0) corr_frob_sqr = tf.reduce_sum(tf.square(corr)) corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr))) loss = 0.5*(corr_frob_sqr - corr_diag_sqr) return loss
def decov_loss(xs): """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf 'Reducing Overfitting In Deep Networks by Decorrelating Representation' """ x = tf.reshape(xs, [int(xs.get_shape()[0]), -1]) m = tf.reduce_mean(x, 0, True) z = tf.expand_dims(x - m, 2) corr = tf.reduce_mean(tf.batch_matmul(z, tf.transpose(z, perm=[0, 2, 1])), 0) corr_frob_sqr = tf.reduce_sum(tf.square(corr)) corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr))) loss = 0.5 * (corr_frob_sqr - corr_diag_sqr) return loss
def decov_loss(xs): """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf 'Reducing Overfitting In Deep Networks by Decorrelating Representation' """ x = tf.reshape(xs, [int(xs.get_shape()[0]), -1]) m = tf.reduce_mean(x, 0, True) z = tf.expand_dims(x - m, 2) corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0, 2, 1])), 0) corr_frob_sqr = tf.reduce_sum(tf.square(corr)) corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr))) loss = 0.5 * (corr_frob_sqr - corr_diag_sqr) return loss
def log_cholesky_det(chol): return 2 * tf.reduce_sum(tf.log(tf.diag_part(chol)))
def test_DiagPart(self): t = tf.diag_part(self.random(3, 3)) self.check(t)
def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False): m = tf.cast(K_XX.get_shape()[0], tf.float32) # Assumes X, Y are same shape ### Get the various sums of kernels that we'll use # Kts drop the diagonal, but we don't need to compute them explicitly if const_diagonal is not False: const_diagonal = tf.cast(const_diagonal, tf.float32) diag_X = diag_Y = const_diagonal sum_diag_X = sum_diag_Y = m * const_diagonal sum_diag2_X = sum_diag2_Y = m * const_diagonal**2 else: diag_X = tf.diag_part(K_XX) diag_Y = tf.diag_part(K_YY) sum_diag_X = tf.reduce_sum(diag_X) sum_diag_Y = tf.reduce_sum(diag_Y) sum_diag2_X = sq_sum(diag_X) sum_diag2_Y = sq_sum(diag_Y) Kt_XX_sums = tf.reduce_sum(K_XX, 1) - diag_X Kt_YY_sums = tf.reduce_sum(K_YY, 1) - diag_Y K_XY_sums_0 = tf.reduce_sum(K_XY, 0) K_XY_sums_1 = tf.reduce_sum(K_XY, 1) Kt_XX_sum = tf.reduce_sum(Kt_XX_sums) Kt_YY_sum = tf.reduce_sum(Kt_YY_sums) K_XY_sum = tf.reduce_sum(K_XY_sums_0) Kt_XX_2_sum = sq_sum(K_XX) - sum_diag2_X Kt_YY_2_sum = sq_sum(K_YY) - sum_diag2_Y K_XY_2_sum = sq_sum(K_XY) if biased: mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m) + (Kt_YY_sum + sum_diag_Y) / (m * m) - 2 * K_XY_sum / (m * m)) else: mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * (m-1)) + (Kt_YY_sum + sum_diag_Y) / (m * (m-1)) - 2 * K_XY_sum / (m * m)) var_est = ( 2 / (m**2 * (m-1)**2) * ( 2 * sq_sum(Kt_XX_sums) - Kt_XX_2_sum + 2 * sq_sum(Kt_YY_sums) - Kt_YY_2_sum) - (4*m-6) / (m**3 * (m-1)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2) + 4*(m-2) / (m**3 * (m-1)**2) * ( sq_sum(K_XY_sums_1) + sq_sum(K_XY_sums_0)) - 4 * (m-3) / (m**3 * (m-1)**2) * K_XY_2_sum - (8*m - 12) / (m**5 * (m-1)) * K_XY_sum**2 + 8 / (m**3 * (m-1)) * ( 1/m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum - dot(Kt_XX_sums, K_XY_sums_1) - dot(Kt_YY_sums, K_XY_sums_0)) ) return mmd2, var_est
def _define_maximization_operation(self, num_batches): """Maximization operations.""" # TODO(xavigonzalvo): some of these operations could be moved to C++. # Compute the effective number of data points assigned to component k. with tf.control_dependencies(self._w): points_in_k = tf.squeeze(tf.add_n(self._points_in_k), squeeze_dims=[0]) # Update alpha. if 'w' in self._params: final_points_in_k = points_in_k / num_batches num_examples = tf.to_float(tf.reduce_sum(final_points_in_k)) self._alpha_op = self._alpha.assign( final_points_in_k / (num_examples + MEPS)) else: self._alpha_op = tf.no_op() self._train_ops = [self._alpha_op] # Update means. points_in_k_expanded = tf.reshape(points_in_k, [self._num_classes, 1, 1]) if 'm' in self._params: self._means_op = self._means.assign( tf.div(tf.add_n(self._w_mul_x), points_in_k_expanded + MEPS)) else: self._means_op = tf.no_op() # means are (num_classes x 1 x dims) # Update covariances. with tf.control_dependencies([self._means_op]): b = tf.add_n(self._w_mul_x2) / (points_in_k_expanded + MEPS) new_covs = [] for k in range(self._num_classes): mean = self._means.ref()[k, :, :] square_mean = tf.matmul(mean, mean, transpose_a=True) new_cov = b[k, :, :] - square_mean + self._min_var if self._covariance_type == FULL_COVARIANCE: new_covs.append(tf.expand_dims(new_cov, 0)) elif self._covariance_type == DIAG_COVARIANCE: new_covs.append(tf.expand_dims(tf.diag_part(new_cov), 0)) new_covs = tf.concat(0, new_covs) if 'c' in self._params: # Train operations don't need to take care of the means # because covariances already depend on it. with tf.control_dependencies([self._means_op, new_covs]): self._train_ops.append( tf.assign(self._covs, new_covs, validate_shape=False))
def _define_maximization_operation(self, num_batches): """Maximization operations.""" # TODO(xavigonzalvo): some of these operations could be moved to C++. # Compute the effective number of data points assigned to component k. with tf.control_dependencies(self._w): points_in_k = tf.squeeze(tf.add_n(self._points_in_k), squeeze_dims=[0]) # Update alpha. if 'w' in self._params: final_points_in_k = points_in_k / num_batches num_examples = tf.to_float(tf.reduce_sum(final_points_in_k)) self._alpha_op = self._alpha.assign( final_points_in_k / (num_examples + MEPS)) else: self._alpha_op = tf.no_op() self._train_ops = [self._alpha_op] # Update means. points_in_k_expanded = tf.reshape(points_in_k, [self._num_classes, 1, 1]) if 'm' in self._params: self._means_op = self._means.assign( tf.div(tf.add_n(self._w_mul_x), points_in_k_expanded + MEPS)) else: self._means_op = tf.no_op() # means are (num_classes x 1 x dims) # Update covariances. with tf.control_dependencies([self._means_op]): b = tf.add_n(self._w_mul_x2) / (points_in_k_expanded + MEPS) new_covs = [] for k in range(self._num_classes): mean = self._means.value()[k, :, :] square_mean = tf.matmul(mean, mean, transpose_a=True) new_cov = b[k, :, :] - square_mean + self._min_var if self._covariance_type == FULL_COVARIANCE: new_covs.append(tf.expand_dims(new_cov, 0)) elif self._covariance_type == DIAG_COVARIANCE: new_covs.append(tf.expand_dims(tf.diag_part(new_cov), 0)) new_covs = tf.concat(0, new_covs) if 'c' in self._params: # Train operations don't need to take care of the means # because covariances already depend on it. with tf.control_dependencies([self._means_op, new_covs]): self._train_ops.append( tf.assign(self._covs, new_covs, validate_shape=False))
def likelihood(self, hyp, X_batch, y_batch, monitor=False): M = self.M Z = self.Z m = self.m S = self.S jitter = self.jitter jitter_cov = self.jitter_cov N = tf.shape(X_batch)[0] logsigma_n = hyp[-1] sigma_n = tf.exp(logsigma_n) # Compute K_u_inv K_u = kernel_tf(Z, Z, hyp[:-1]) L = tf.cholesky(K_u + np.eye(M)*jitter_cov) K_u_inv = tf.matrix_triangular_solve(tf.transpose(L), tf.matrix_triangular_solve(L, np.eye(M), lower=True), lower=False) K_u_inv_op = self.K_u_inv.assign(K_u_inv) # Compute mu psi = kernel_tf(Z, X_batch, hyp[:-1]) K_u_inv_m = tf.matmul(K_u_inv, m) MU = tf.matmul(tf.transpose(psi), K_u_inv_m) # Compute cov Alpha = tf.matmul(K_u_inv, psi) COV = kernel_tf(X_batch, X_batch, hyp[:-1]) - tf.matmul(tf.transpose(psi), tf.matmul(K_u_inv,psi)) + \ tf.matmul(tf.transpose(Alpha), tf.matmul(S,Alpha)) # Compute COV_inv LL = tf.cholesky(COV + tf.eye(N, dtype=tf.float64)*sigma_n + tf.eye(N, dtype=tf.float64)*jitter) COV_inv = tf.matrix_triangular_solve(tf.transpose(LL), tf.matrix_triangular_solve(LL, tf.eye(N, dtype=tf.float64), lower=True), lower=False) # Compute cov(Z, X) cov_ZX = tf.matmul(S,Alpha) # Update m and S alpha = tf.matmul(COV_inv, tf.transpose(cov_ZX)) m_new = m + tf.matmul(cov_ZX, tf.matmul(COV_inv, y_batch-MU)) S_new = S - tf.matmul(cov_ZX, alpha) if monitor == False: m_op = self.m.assign(m_new) S_op = self.S.assign(S_new) # Compute NLML K_u_inv_m = tf.matmul(K_u_inv, m_new) NLML = 0.5*tf.matmul(tf.transpose(m_new), K_u_inv_m) + tf.reduce_sum(tf.log(tf.diag_part(L))) + 0.5*np.log(2.*np.pi)*tf.cast(M, tf.float64) train = self.optimizer.minimize(NLML) nlml_op = self.nlml.assign(NLML[0,0]) return tf.group(*[train, m_op, S_op, nlml_op, K_u_inv_op])
def block_Lanczos(Sigma_func,B_,n_mc_smps): """ block Lanczos method to approx Sigma^1/2 * B, with B matrix of N(0,1)'s. Used to generate multiple approximate large normal draws. """ n = tf.shape(B_)[0] #dim of the multivariate normal s = n_mc_smps #number of samples to draw k = tf.div(n,500) + 3 #number of Lanczos iterations betas = tf.zeros([1,s]) alphas = tf.zeros([0,s]) D = tf.zeros([s,n,1]) B_norms = tf.norm(B_,axis=0) D = tf.concat([D,tf.expand_dims(tf.transpose(B_/B_norms),2)],2) def cond(j,alphas,betas,D): return j < k+1 #TODO: use block-CG in place of Sigma def body(j,alphas,betas,D): d_j = tf.squeeze(tf.slice(D,[0,0,j],[-1,-1,1])) d = Sigma_func(tf.transpose(d_j)) - (tf.slice(betas,[j-1,0],[1,-1])* tf.transpose(tf.squeeze(tf.slice(D,[0,0,j-1],[-1,-1,1])))) alphas = tf.concat([alphas,[tf.diag_part(tf.matmul(d_j,d))]],0) d = d - tf.slice(alphas,[j-1,0],[1,-1])*tf.transpose(d_j) betas = tf.concat([betas,[tf.norm(d,axis=0)]],0) D = tf.concat([D,tf.expand_dims(tf.transpose(d/tf.slice(betas,[j,0],[1,-1])),2)],2) return j+1,alphas,betas,D j = tf.constant(1) j,alphas,betas,D = tf.while_loop(cond,body,loop_vars=[j,alphas,betas,D], shape_invariants=[j.get_shape(),tf.TensorShape([None,None]), tf.TensorShape([None,None]),tf.TensorShape([None,None,None])]) D_ = tf.slice(D,[0,0,1],[-1,-1,k]) ##TODO: replace loop H = tf.zeros([0,k,k]) for ss in range(s): this_beta = tf.diag(tf.squeeze(tf.slice(betas,[1,ss],[k-1,1]))) #build out tridiagonal H: alphas_1:k on main, betas_2:k on off this_H = (tf.diag(tf.squeeze(tf.slice(alphas,[0,ss],[-1,1]))) + tf.pad(this_beta,[[1,0],[0,1]]) + tf.pad(this_beta,[[0,1],[1,0]])) H = tf.concat([H,tf.expand_dims(this_H,0)],0) E,V = tf.self_adjoint_eig(H) E_sqrt = tf.zeros([0,k,k]) #TODO: replace loop for ss in range(s): #ensure positive definite E_sqrt = tf.concat([E_sqrt,tf.expand_dims(tf.diag(tf.squeeze(tf.sqrt(tf.maximum(tf.slice(E,[ss,0],[1,-1]),1e-6)))),0)],0) sq_H = tf.matmul(V,tf.matmul(E_sqrt,tf.transpose(V,perm=[0,2,1]))) e1 = tf.expand_dims(tf.transpose(tf.tile(tf.slice(tf.eye(k),[0,0],[-1,1]),[1,s])),2) out = B_norms*tf.transpose(tf.squeeze(tf.matmul(D_,tf.matmul(sq_H,e1)))) return out