Python tensorflow 模块,diag_part() 实例源码


项目:tefla    作者:openAGI    | 项目源码 | 文件源码
def decov_loss(xs, name='decov_loss'):
    """Decov loss as described in
    'Reducing Overfitting In Deep Networks by Decorrelating Representation'

        xs: 4-D `tensor` [batch_size, height, width, channels], input

        a `float` decov loss
    with tf.name_scope(name):
        x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
        m = tf.reduce_mean(x, 0, True)
        z = tf.expand_dims(x - m, 2)
        corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0, 2, 1])), 0)
        corr_frob_sqr = tf.reduce_sum(tf.square(corr))
        corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
        loss = 0.5 * (corr_frob_sqr - corr_diag_sqr)
        return loss
项目:opt-mmd    作者:dougalsutherland    | 项目源码 | 文件源码
def _mix_rbf_kernel(X, Y, sigmas, wts=None):
    if wts is None:
        wts = [1] * len(sigmas)

    XX = tf.matmul(X, X, transpose_b=True)
    XY = tf.matmul(X, Y, transpose_b=True)
    YY = tf.matmul(Y, Y, transpose_b=True)

    X_sqnorms = tf.diag_part(XX)
    Y_sqnorms = tf.diag_part(YY)

    r = lambda x: tf.expand_dims(x, 0)
    c = lambda x: tf.expand_dims(x, 1)

    K_XX, K_XY, K_YY = 0, 0, 0
    for sigma, wt in zip(sigmas, wts):
        gamma = 1 / (2 * sigma**2)
        K_XX += wt * tf.exp(-gamma * (-2 * XX + c(X_sqnorms) + r(X_sqnorms)))
        K_XY += wt * tf.exp(-gamma * (-2 * XY + c(X_sqnorms) + r(Y_sqnorms)))
        K_YY += wt * tf.exp(-gamma * (-2 * YY + c(Y_sqnorms) + r(Y_sqnorms)))

    return K_XX, K_XY, K_YY, tf.reduce_sum(wts)
项目:tensorflow-DDT    作者:wangchao66    | 项目源码 | 文件源码
def _mix_rbf_kernel(X, Y, sigmas, wts=None):
    if wts is None:
        wts = [1] * len(sigmas)

    XX = tf.matmul(X, X, transpose_b=True)
    XY = tf.matmul(X, Y, transpose_b=True)
    YY = tf.matmul(Y, Y, transpose_b=True)

    X_sqnorms = tf.diag_part(XX)
    Y_sqnorms = tf.diag_part(YY)

    r = lambda x: tf.expand_dims(x, 0)
    c = lambda x: tf.expand_dims(x, 1)

    K_XX, K_XY, K_YY = 0, 0, 0
    for sigma, wt in zip(sigmas, wts):
        gamma = 1 / (2 * sigma**2)
        K_XX += wt * tf.exp(-gamma * (-2 * XX + c(X_sqnorms) + r(X_sqnorms)))
        K_XY += wt * tf.exp(-gamma * (-2 * XY + c(X_sqnorms) + r(Y_sqnorms)))
        K_YY += wt * tf.exp(-gamma * (-2 * YY + c(Y_sqnorms) + r(Y_sqnorms)))

    return K_XX, K_XY, K_YY, tf.reduce_sum(wts)
项目:facerecognition    作者:guoxiaolu    | 项目源码 | 文件源码
def decov_loss(xs):
    """Decov loss as described in
    'Reducing Overfitting In Deep Networks by Decorrelating Representation'
    x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
    m = tf.reduce_mean(x, 0, True)
    z = tf.expand_dims(x-m, 2)
    corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
    corr_frob_sqr = tf.reduce_sum(tf.square(corr))
    corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
    loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
    return loss
项目:relaax    作者:deeplearninc    | 项目源码 | 文件源码
def build_graph(self, goal, critic):
        self.ph_stc_diff_st =\
            graph.Placeholder(np.float32, shape=(None, cfg.d), name="ph_stc_diff_st")
        s_diff_normalized = tf.nn.l2_normalize(self.ph_stc_diff_st.node, dim=1)

        cosine_similarity = tf.matmul(s_diff_normalized, goal.node, transpose_b=True)
        cosine_similarity = tf.diag_part(cosine_similarity)

        # manager's advantage (R-V): R = ri + cfg.wGAMMA * R; AdvM = R - ViM
        self.ph_discounted_reward =\
            graph.Placeholder(np.float32, shape=(None,), name="ph_m_discounted_reward")
        advantage = self.ph_discounted_reward.node - critic.node

        manager_loss = tf.reduce_sum(advantage * cosine_similarity)
        return manager_loss
项目:faceNet_RealTime    作者:jack55436001    | 项目源码 | 文件源码
def decov_loss(xs):
    """Decov loss as described in
    'Reducing Overfitting In Deep Networks by Decorrelating Representation'
    x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
    m = tf.reduce_mean(x, 0, True)
    z = tf.expand_dims(x-m, 2)
    corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
    corr_frob_sqr = tf.reduce_sum(tf.square(corr))
    corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
    loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
    return loss
项目:icyface_api    作者:bupticybee    | 项目源码 | 文件源码
def decov_loss(xs):
    """Decov loss as described in
    'Reducing Overfitting In Deep Networks by Decorrelating Representation'
    x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
    m = tf.reduce_mean(x, 0, True)
    z = tf.expand_dims(x-m, 2)
    corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
    corr_frob_sqr = tf.reduce_sum(tf.square(corr))
    corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
    loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
    return loss
项目:GPflow    作者:GPflow    | 项目源码 | 文件源码
def test(self):
        with self.test_context() as session:
            for k in self.kernels:
                k.initialize(session=session, force=True)
                X = tf.placeholder(tf.float64, [30, self.dim])
                rng = np.random.RandomState(1)
                X_data = rng.randn(30, self.dim)
                k1 = k.Kdiag(X)
                k2 = tf.diag_part(k.K(X))
                k1, k2 =[k1, k2], feed_dict={X: X_data})
                self.assertTrue(np.allclose(k1, k2))
项目:GPflow    作者:GPflow    | 项目源码 | 文件源码
def compute_upper_bound(self):
        num_data = tf.cast(tf.shape(self.Y)[0], settings.float_type)

        Kdiag = self.kern.Kdiag(self.X)
        Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
        Kuf = self.feature.Kuf(self.kern, self.X)

        L = tf.cholesky(Kuu)
        LB = tf.cholesky(Kuu + self.likelihood.variance ** -1.0 * tf.matmul(Kuf, Kuf, transpose_b=True))

        LinvKuf = tf.matrix_triangular_solve(L, Kuf, lower=True)
        # Using the Trace bound, from Titsias' presentation
        c = tf.reduce_sum(Kdiag) - tf.reduce_sum(LinvKuf ** 2.0)
        # Kff = self.kern.K(self.X)
        # Qff = tf.matmul(Kuf, LinvKuf, transpose_a=True)

        # Alternative bound on max eigenval:
        # c = tf.reduce_max(tf.reduce_sum(tf.abs(Kff - Qff), 0))
        corrected_noise = self.likelihood.variance + c

        const = -0.5 * num_data * tf.log(2 * np.pi * self.likelihood.variance)
        logdet = tf.reduce_sum(tf.log(tf.diag_part(L))) - tf.reduce_sum(tf.log(tf.diag_part(LB)))

        LC = tf.cholesky(Kuu + corrected_noise ** -1.0 * tf.matmul(Kuf, Kuf, transpose_b=True))
        v = tf.matrix_triangular_solve(LC, corrected_noise ** -1.0 * tf.matmul(Kuf, self.Y), lower=True)
        quad = -0.5 * corrected_noise ** -1.0 * tf.reduce_sum(self.Y ** 2.0) + 0.5 * tf.reduce_sum(v ** 2.0)

        return const + logdet + quad
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def _create_variables(self, data, initial_means=None):
    """Initializes GMM algorithm.

      data: a list of Tensors with data, each row is a new example.
      initial_means: a Tensor with a matrix of means.
    first_shard = data[0]
    # Initialize means: num_classes X 1 X dimensions.
    if initial_means is not None:
      self._means = tf.Variable(tf.expand_dims(initial_means, 1),
                                validate_shape=False, dtype=tf.float32)
      # Sample data randomly
      self._means = tf.Variable(tf.expand_dims(
          _init_clusters_random(data, self._num_classes, self._random_seed), 1),

    # Initialize covariances.
    if self._covariance_type == FULL_COVARIANCE:
      cov = _covariance(first_shard, False) + self._min_var
      # A matrix per class, num_classes X dimensions X dimensions
      covs = tf.tile(
          tf.expand_dims(cov, 0), [self._num_classes, 1, 1])
    elif self._covariance_type == DIAG_COVARIANCE:
      cov = _covariance(first_shard, True) + self._min_var
      # A diagonal per row, num_classes X dimensions.
      covs = tf.tile(tf.expand_dims(tf.diag_part(cov), 0),
                     [self._num_classes, 1])
    self._covs = tf.Variable(covs, name='clusters_covs', validate_shape=False)
    # Mixture weights, representing the probability that a randomly
    # selected unobservable data (in EM terms) was generated by component k.
    self._alpha = tf.Variable(tf.tile([1.0 / self._num_classes],
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def _create_variables(self, data, initial_means=None):
    """Initializes GMM algorithm.

      data: a list of Tensors with data, each row is a new example.
      initial_means: a Tensor with a matrix of means.
    first_shard = data[0]
    # Initialize means: num_classes X 1 X dimensions.
    if initial_means is not None:
      self._means = tf.Variable(tf.expand_dims(initial_means, 1),
                                validate_shape=False, dtype=tf.float32)
      # Sample data randomly
      self._means = tf.Variable(tf.expand_dims(
          _init_clusters_random(data, self._num_classes, self._random_seed), 1),

    # Initialize covariances.
    if self._covariance_type == FULL_COVARIANCE:
      cov = _covariance(first_shard, False) + self._min_var
      # A matrix per class, num_classes X dimensions X dimensions
      covs = tf.tile(
          tf.expand_dims(cov, 0), [self._num_classes, 1, 1])
    elif self._covariance_type == DIAG_COVARIANCE:
      cov = _covariance(first_shard, True) + self._min_var
      # A diagonal per row, num_classes X dimensions.
      covs = tf.tile(tf.expand_dims(tf.diag_part(cov), 0),
                     [self._num_classes, 1])
    self._covs = tf.Variable(covs, name='clusters_covs', validate_shape=False)
    # Mixture weights, representing the probability that a randomly
    # selected unobservable data (in EM terms) was generated by component k.
    self._alpha = tf.Variable(tf.tile([1.0 / self._num_classes],
项目:hyperstar    作者:nlpub    | 项目源码 | 文件源码
def dot(self, X, Y, name='dot_op'):
        with tf.name_scope(name) as scope:
            dot_op = tf.diag_part(tf.matmul(X, Y, transpose_b=True))
            return dot_op
项目:facenet    作者:davidsandberg    | 项目源码 | 文件源码
def decov_loss(xs):
    """Decov loss as described in
    'Reducing Overfitting In Deep Networks by Decorrelating Representation'
    x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
    m = tf.reduce_mean(x, 0, True)
    z = tf.expand_dims(x-m, 2)
    corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
    corr_frob_sqr = tf.reduce_sum(tf.square(corr))
    corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
    loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
    return loss
项目:supervised-embedding-model    作者:sld    | 项目源码 | 文件源码
def _assemble_graph(self):
        tf.set_random_seed(self._random_seed + 1)

        A_var = tf.Variable(
                shape=[self._emb_dim, self._vocab_dim],
                minval=-1, maxval=1, seed=(self._random_seed + 2)
        B_var = tf.Variable(
                shape=[self._emb_dim, self._vocab_dim],
                minval=-1, maxval=1, seed=(self._random_seed + 3)
        self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')

        cont_mult = tf.transpose(tf.matmul(A_var, tf.transpose(self.context_batch)))
        resp_mult = tf.matmul(B_var, tf.transpose(self.response_batch))
        neg_resp_mult = tf.matmul(B_var, tf.transpose(self.neg_response_batch))

        pos_raw_f = tf.diag_part(tf.matmul(cont_mult, resp_mult))
        neg_raw_f = tf.diag_part(tf.matmul(cont_mult, neg_resp_mult))
        self.f_pos = pos_raw_f
        self.f_neg = neg_raw_f

        self.loss = tf.reduce_sum(tf.nn.relu(self.f_neg - self.f_pos + self._margin))
项目:DP_for_FaceNet    作者:guchinoma    | 项目源码 | 文件源码
def decov_loss(xs):
    """Decov loss as described in
    'Reducing Overfitting In Deep Networks by Decorrelating Representation'
    x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
    m = tf.reduce_mean(x, 0, True)
    z = tf.expand_dims(x-m, 2)
    corr = tf.reduce_mean(tf.batch_matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
    corr_frob_sqr = tf.reduce_sum(tf.square(corr))
    corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
    loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
    return loss
项目:DP_for_FaceNet    作者:guchinoma    | 项目源码 | 文件源码
def decov_loss(xs):
    """Decov loss as described in
    'Reducing Overfitting In Deep Networks by Decorrelating Representation'
    x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
    m = tf.reduce_mean(x, 0, True)
    z = tf.expand_dims(x-m, 2)
    corr = tf.reduce_mean(tf.batch_matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
    corr_frob_sqr = tf.reduce_sum(tf.square(corr))
    corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
    loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
    return loss
项目:Face-Recognition    作者:aswl01    | 项目源码 | 文件源码
def decov_loss(xs):
    """Decov loss as described in
    'Reducing Overfitting In Deep Networks by Decorrelating Representation'
    x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
    m = tf.reduce_mean(x, 0, True)
    z = tf.expand_dims(x - m, 2)
    corr = tf.reduce_mean(tf.batch_matmul(z, tf.transpose(z, perm=[0, 2, 1])), 0)
    corr_frob_sqr = tf.reduce_sum(tf.square(corr))
    corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
    loss = 0.5 * (corr_frob_sqr - corr_diag_sqr)
    return loss
项目:Face-Recognition    作者:aswl01    | 项目源码 | 文件源码
def decov_loss(xs):
    """Decov loss as described in
    'Reducing Overfitting In Deep Networks by Decorrelating Representation'
    x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
    m = tf.reduce_mean(x, 0, True)
    z = tf.expand_dims(x - m, 2)
    corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0, 2, 1])), 0)
    corr_frob_sqr = tf.reduce_sum(tf.square(corr))
    corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
    loss = 0.5 * (corr_frob_sqr - corr_diag_sqr)
    return loss
项目:AutoGP    作者:ebonilla    | 项目源码 | 文件源码
def log_cholesky_det(chol):
    return 2 * tf.reduce_sum(tf.log(tf.diag_part(chol)))
项目:real-time-face-recognition    作者:iwantooxxoox    | 项目源码 | 文件源码
def decov_loss(xs):
    """Decov loss as described in
    'Reducing Overfitting In Deep Networks by Decorrelating Representation'
    x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
    m = tf.reduce_mean(x, 0, True)
    z = tf.expand_dims(x-m, 2)
    corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
    corr_frob_sqr = tf.reduce_sum(tf.square(corr))
    corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
    loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
    return loss
项目:personalized-dialog    作者:chaitjo    | 项目源码 | 文件源码
def _assemble_graph(self):
        tf.set_random_seed(self._random_seed + 1)

        A_var = tf.Variable(
                shape=[self._emb_dim, self._vocab_dim],
                minval=-1, maxval=1, seed=(self._random_seed + 2)
        B_var = tf.Variable(
                shape=[self._emb_dim, self._vocab_dim],
                minval=-1, maxval=1, seed=(self._random_seed + 3)
        self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')

        cont_mult = tf.transpose(tf.matmul(A_var, tf.transpose(self.context_batch)))
        resp_mult = tf.matmul(B_var, tf.transpose(self.response_batch))
        neg_resp_mult = tf.matmul(B_var, tf.transpose(self.neg_response_batch))

        pos_raw_f = tf.diag_part(tf.matmul(cont_mult, resp_mult))
        neg_raw_f = tf.diag_part(tf.matmul(cont_mult, neg_resp_mult))
        self.f_pos = pos_raw_f
        self.f_neg = neg_raw_f

        self.loss = tf.reduce_sum(tf.nn.relu(self.f_neg - self.f_pos + self._margin))
项目:tfdeploy    作者:riga    | 项目源码 | 文件源码
def test_DiagPart(self):
        t = tf.diag_part(self.random(3, 3))
项目:opt-mmd    作者:dougalsutherland    | 项目源码 | 文件源码
def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
    m = tf.cast(K_XX.get_shape()[0], tf.float32)  # Assumes X, Y are same shape

    ### Get the various sums of kernels that we'll use
    # Kts drop the diagonal, but we don't need to compute them explicitly
    if const_diagonal is not False:
        const_diagonal = tf.cast(const_diagonal, tf.float32)
        diag_X = diag_Y = const_diagonal
        sum_diag_X = sum_diag_Y = m * const_diagonal
        sum_diag2_X = sum_diag2_Y = m * const_diagonal**2
        diag_X = tf.diag_part(K_XX)
        diag_Y = tf.diag_part(K_YY)

        sum_diag_X = tf.reduce_sum(diag_X)
        sum_diag_Y = tf.reduce_sum(diag_Y)

        sum_diag2_X = sq_sum(diag_X)
        sum_diag2_Y = sq_sum(diag_Y)

    Kt_XX_sums = tf.reduce_sum(K_XX, 1) - diag_X
    Kt_YY_sums = tf.reduce_sum(K_YY, 1) - diag_Y
    K_XY_sums_0 = tf.reduce_sum(K_XY, 0)
    K_XY_sums_1 = tf.reduce_sum(K_XY, 1)

    Kt_XX_sum = tf.reduce_sum(Kt_XX_sums)
    Kt_YY_sum = tf.reduce_sum(Kt_YY_sums)
    K_XY_sum = tf.reduce_sum(K_XY_sums_0)

    Kt_XX_2_sum = sq_sum(K_XX) - sum_diag2_X
    Kt_YY_2_sum = sq_sum(K_YY) - sum_diag2_Y
    K_XY_2_sum  = sq_sum(K_XY)

    if biased:
        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m)
              + (Kt_YY_sum + sum_diag_Y) / (m * m)
              - 2 * K_XY_sum / (m * m))
        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * (m-1))
              + (Kt_YY_sum + sum_diag_Y) / (m * (m-1))
              - 2 * K_XY_sum / (m * m))

    var_est = (
          2 / (m**2 * (m-1)**2) * (
              2 * sq_sum(Kt_XX_sums) - Kt_XX_2_sum
            + 2 * sq_sum(Kt_YY_sums) - Kt_YY_2_sum)
        - (4*m-6) / (m**3 * (m-1)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2)
        + 4*(m-2) / (m**3 * (m-1)**2) * (
              sq_sum(K_XY_sums_1) + sq_sum(K_XY_sums_0))
        - 4 * (m-3) / (m**3 * (m-1)**2) * K_XY_2_sum
        - (8*m - 12) / (m**5 * (m-1)) * K_XY_sum**2
        + 8 / (m**3 * (m-1)) * (
              1/m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum
            - dot(Kt_XX_sums, K_XY_sums_1)
            - dot(Kt_YY_sums, K_XY_sums_0))

    return mmd2, var_est
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def _define_maximization_operation(self, num_batches):
    """Maximization operations."""
    # TODO(xavigonzalvo): some of these operations could be moved to C++.
    # Compute the effective number of data points assigned to component k.
    with tf.control_dependencies(self._w):
      points_in_k = tf.squeeze(tf.add_n(self._points_in_k), squeeze_dims=[0])
      # Update alpha.
      if 'w' in self._params:
        final_points_in_k = points_in_k / num_batches
        num_examples = tf.to_float(tf.reduce_sum(final_points_in_k))
        self._alpha_op = self._alpha.assign(
            final_points_in_k / (num_examples + MEPS))
        self._alpha_op = tf.no_op()
      self._train_ops = [self._alpha_op]

      # Update means.
      points_in_k_expanded = tf.reshape(points_in_k,
                                        [self._num_classes, 1, 1])
      if 'm' in self._params:
        self._means_op = self._means.assign(
            tf.div(tf.add_n(self._w_mul_x), points_in_k_expanded + MEPS))
        self._means_op = tf.no_op()
      # means are (num_classes x 1 x dims)

      # Update covariances.
      with tf.control_dependencies([self._means_op]):
        b = tf.add_n(self._w_mul_x2) / (points_in_k_expanded + MEPS)
        new_covs = []
        for k in range(self._num_classes):
          mean = self._means.ref()[k, :, :]
          square_mean = tf.matmul(mean, mean, transpose_a=True)
          new_cov = b[k, :, :] - square_mean + self._min_var
          if self._covariance_type == FULL_COVARIANCE:
            new_covs.append(tf.expand_dims(new_cov, 0))
          elif self._covariance_type == DIAG_COVARIANCE:
            new_covs.append(tf.expand_dims(tf.diag_part(new_cov), 0))
        new_covs = tf.concat(0, new_covs)
        if 'c' in self._params:
          # Train operations don't need to take care of the means
          # because covariances already depend on it.
          with tf.control_dependencies([self._means_op, new_covs]):
                tf.assign(self._covs, new_covs, validate_shape=False))
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def _define_maximization_operation(self, num_batches):
    """Maximization operations."""
    # TODO(xavigonzalvo): some of these operations could be moved to C++.
    # Compute the effective number of data points assigned to component k.
    with tf.control_dependencies(self._w):
      points_in_k = tf.squeeze(tf.add_n(self._points_in_k), squeeze_dims=[0])
      # Update alpha.
      if 'w' in self._params:
        final_points_in_k = points_in_k / num_batches
        num_examples = tf.to_float(tf.reduce_sum(final_points_in_k))
        self._alpha_op = self._alpha.assign(
            final_points_in_k / (num_examples + MEPS))
        self._alpha_op = tf.no_op()
      self._train_ops = [self._alpha_op]

      # Update means.
      points_in_k_expanded = tf.reshape(points_in_k,
                                        [self._num_classes, 1, 1])
      if 'm' in self._params:
        self._means_op = self._means.assign(
            tf.div(tf.add_n(self._w_mul_x), points_in_k_expanded + MEPS))
        self._means_op = tf.no_op()
      # means are (num_classes x 1 x dims)

      # Update covariances.
      with tf.control_dependencies([self._means_op]):
        b = tf.add_n(self._w_mul_x2) / (points_in_k_expanded + MEPS)
        new_covs = []
        for k in range(self._num_classes):
          mean = self._means.value()[k, :, :]
          square_mean = tf.matmul(mean, mean, transpose_a=True)
          new_cov = b[k, :, :] - square_mean + self._min_var
          if self._covariance_type == FULL_COVARIANCE:
            new_covs.append(tf.expand_dims(new_cov, 0))
          elif self._covariance_type == DIAG_COVARIANCE:
            new_covs.append(tf.expand_dims(tf.diag_part(new_cov), 0))
        new_covs = tf.concat(0, new_covs)
        if 'c' in self._params:
          # Train operations don't need to take care of the means
          # because covariances already depend on it.
          with tf.control_dependencies([self._means_op, new_covs]):
                tf.assign(self._covs, new_covs, validate_shape=False))
项目:ParametricGP-in-Python    作者:maziarraissi    | 项目源码 | 文件源码
def likelihood(self, hyp, X_batch, y_batch, monitor=False): 
        M = self.M

        Z = self.Z

        m = self.m
        S = self.S

        jitter = self.jitter
        jitter_cov = self.jitter_cov

        N = tf.shape(X_batch)[0]

        logsigma_n = hyp[-1]
        sigma_n = tf.exp(logsigma_n)

        # Compute K_u_inv
        K_u = kernel_tf(Z, Z, hyp[:-1])    
        L = tf.cholesky(K_u + np.eye(M)*jitter_cov)        
        K_u_inv = tf.matrix_triangular_solve(tf.transpose(L), tf.matrix_triangular_solve(L, np.eye(M), lower=True), lower=False)

        K_u_inv_op = self.K_u_inv.assign(K_u_inv)

        # Compute mu
        psi = kernel_tf(Z, X_batch, hyp[:-1])    
        K_u_inv_m = tf.matmul(K_u_inv, m)   
        MU = tf.matmul(tf.transpose(psi), K_u_inv_m)

        # Compute cov
        Alpha = tf.matmul(K_u_inv, psi)
        COV = kernel_tf(X_batch, X_batch, hyp[:-1]) - tf.matmul(tf.transpose(psi), tf.matmul(K_u_inv,psi)) + \
                tf.matmul(tf.transpose(Alpha), tf.matmul(S,Alpha))

        # Compute COV_inv
        LL = tf.cholesky(COV  + tf.eye(N, dtype=tf.float64)*sigma_n + tf.eye(N, dtype=tf.float64)*jitter) 
        COV_inv = tf.matrix_triangular_solve(tf.transpose(LL), tf.matrix_triangular_solve(LL, tf.eye(N, dtype=tf.float64), lower=True), lower=False)

        # Compute cov(Z, X)
        cov_ZX = tf.matmul(S,Alpha)

        # Update m and S
        alpha = tf.matmul(COV_inv, tf.transpose(cov_ZX))
        m_new = m + tf.matmul(cov_ZX, tf.matmul(COV_inv, y_batch-MU))
        S_new = S - tf.matmul(cov_ZX, alpha)

        if monitor == False:
            m_op = self.m.assign(m_new)
            S_op = self.S.assign(S_new)

        # Compute NLML
        K_u_inv_m = tf.matmul(K_u_inv, m_new)

        NLML = 0.5*tf.matmul(tf.transpose(m_new), K_u_inv_m) + tf.reduce_sum(tf.log(tf.diag_part(L))) + 0.5*np.log(2.*np.pi)*tf.cast(M, tf.float64)

        train = self.optimizer.minimize(NLML)

        nlml_op = self.nlml.assign(NLML[0,0])

        return*[train, m_op, S_op, nlml_op, K_u_inv_op])
项目:MGP-RNN    作者:jfutoma    | 项目源码 | 文件源码
def block_Lanczos(Sigma_func,B_,n_mc_smps):
    block Lanczos method to approx Sigma^1/2 * B, with B matrix of N(0,1)'s.
    Used to generate multiple approximate large normal draws.

    n = tf.shape(B_)[0] #dim of the multivariate normal
    s = n_mc_smps #number of samples to draw
    k = tf.div(n,500) + 3 #number of Lanczos iterations

    betas = tf.zeros([1,s])
    alphas = tf.zeros([0,s])
    D = tf.zeros([s,n,1])
    B_norms = tf.norm(B_,axis=0)
    D = tf.concat([D,tf.expand_dims(tf.transpose(B_/B_norms),2)],2)

    def cond(j,alphas,betas,D):
        return j < k+1

    #TODO: use block-CG in place of Sigma
    def body(j,alphas,betas,D):  
        d_j = tf.squeeze(tf.slice(D,[0,0,j],[-1,-1,1]))
        d = Sigma_func(tf.transpose(d_j)) - (tf.slice(betas,[j-1,0],[1,-1])*
        alphas = tf.concat([alphas,[tf.diag_part(tf.matmul(d_j,d))]],0)
        d = d - tf.slice(alphas,[j-1,0],[1,-1])*tf.transpose(d_j)
        betas = tf.concat([betas,[tf.norm(d,axis=0)]],0)
        D = tf.concat([D,tf.expand_dims(tf.transpose(d/tf.slice(betas,[j,0],[1,-1])),2)],2)
        return j+1,alphas,betas,D

    j = tf.constant(1)
    j,alphas,betas,D = tf.while_loop(cond,body,loop_vars=[j,alphas,betas,D],

    D_ = tf.slice(D,[0,0,1],[-1,-1,k])

    ##TODO: replace loop
    H = tf.zeros([0,k,k])

    for ss in range(s):
        this_beta = tf.diag(tf.squeeze(tf.slice(betas,[1,ss],[k-1,1])))
        #build out tridiagonal H: alphas_1:k on main, betas_2:k on off 
        this_H = (tf.diag(tf.squeeze(tf.slice(alphas,[0,ss],[-1,1]))) +
                  tf.pad(this_beta,[[1,0],[0,1]]) +
        H = tf.concat([H,tf.expand_dims(this_H,0)],0)    

    E,V = tf.self_adjoint_eig(H)
    E_sqrt = tf.zeros([0,k,k])
    #TODO: replace loop
    for ss in range(s): 
        #ensure positive definite
        E_sqrt = tf.concat([E_sqrt,tf.expand_dims(tf.diag(tf.squeeze(tf.sqrt(tf.maximum(tf.slice(E,[ss,0],[1,-1]),1e-6)))),0)],0)
    sq_H = tf.matmul(V,tf.matmul(E_sqrt,tf.transpose(V,perm=[0,2,1])))

    e1 = tf.expand_dims(tf.transpose(tf.tile(tf.slice(tf.eye(k),[0,0],[-1,1]),[1,s])),2)
    out = B_norms*tf.transpose(tf.squeeze(tf.matmul(D_,tf.matmul(sq_H,e1))))
    return out
项目:tensorflow-DDT    作者:wangchao66    | 项目源码 | 文件源码
def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
    m = tf.cast(K_XX.get_shape()[0], tf.float32)  # Assumes X, Y are same shape

    ### Get the various sums of kernels that we'll use
    # Kts drop the diagonal, but we don't need to compute them explicitly
    if const_diagonal is not False:
        const_diagonal = tf.cast(const_diagonal, tf.float32)
        diag_X = diag_Y = const_diagonal
        sum_diag_X = sum_diag_Y = m * const_diagonal
        sum_diag2_X = sum_diag2_Y = m * const_diagonal**2
        diag_X = tf.diag_part(K_XX)
        diag_Y = tf.diag_part(K_YY)

        sum_diag_X = tf.reduce_sum(diag_X)
        sum_diag_Y = tf.reduce_sum(diag_Y)

        sum_diag2_X = sq_sum(diag_X)
        sum_diag2_Y = sq_sum(diag_Y)

    Kt_XX_sums = tf.reduce_sum(K_XX, 1) - diag_X
    Kt_YY_sums = tf.reduce_sum(K_YY, 1) - diag_Y
    K_XY_sums_0 = tf.reduce_sum(K_XY, 0)
    K_XY_sums_1 = tf.reduce_sum(K_XY, 1)

    Kt_XX_sum = tf.reduce_sum(Kt_XX_sums)
    Kt_YY_sum = tf.reduce_sum(Kt_YY_sums)
    K_XY_sum = tf.reduce_sum(K_XY_sums_0)

    Kt_XX_2_sum = sq_sum(K_XX) - sum_diag2_X
    Kt_YY_2_sum = sq_sum(K_YY) - sum_diag2_Y
    K_XY_2_sum  = sq_sum(K_XY)

    if biased:
        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m)
              + (Kt_YY_sum + sum_diag_Y) / (m * m)
              - 2 * K_XY_sum / (m * m))
        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * (m-1))
              + (Kt_YY_sum + sum_diag_Y) / (m * (m-1))
              - 2 * K_XY_sum / (m * m))

    var_est = (
          2 / (m**2 * (m-1)**2) * (
              2 * sq_sum(Kt_XX_sums) - Kt_XX_2_sum
            + 2 * sq_sum(Kt_YY_sums) - Kt_YY_2_sum)
        - (4*m-6) / (m**3 * (m-1)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2)
        + 4*(m-2) / (m**3 * (m-1)**2) * (
              sq_sum(K_XY_sums_1) + sq_sum(K_XY_sums_0))
        - 4 * (m-3) / (m**3 * (m-1)**2) * K_XY_2_sum
        - (8*m - 12) / (m**5 * (m-1)) * K_XY_sum**2
        + 8 / (m**3 * (m-1)) * (
              1/m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum
            - dot(Kt_XX_sums, K_XY_sums_1)
            - dot(Kt_YY_sums, K_XY_sums_0))

    return mmd2, var_est