def ae(x):
    if nonlinearity_name == 'relu':
        f = tf.nn.relu
    elif nonlinearity_name == 'elu':
        f = tf.nn.elu
    elif nonlinearity_name == 'gelu':
        # def gelu(x):
        #     return tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.)
        # f = gelu
        def gelu_fast(_x):
            return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
        f = gelu_fast
    elif nonlinearity_name == 'silu':
        def silu(_x):
            return _x * tf.sigmoid(_x)
        f = silu
    # elif nonlinearity_name == 'soi':
    #     def soi_map(x):
    #         u = tf.random_uniform(tf.shape(x))
    #         mask = tf.to_float(tf.less(u, (1 + tf.erf(x / tf.sqrt(2.))) / 2.))
    #         return tf.cond(is_training, lambda: tf.mul(mask, x),
    #                        lambda: tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.))
    h1 = f(tf.matmul(x, W['1']) + b['1'])
    h2 = f(tf.matmul(h1, W['2']) + b['2'])
    h3 = f(tf.matmul(h2, W['3']) + b['3'])
    h4 = f(tf.matmul(h3, W['4']) + b['4'])
    h5 = f(tf.matmul(h4, W['5']) + b['5'])
    h6 = f(tf.matmul(h5, W['6']) + b['6'])
    h7 = f(tf.matmul(h6, W['7']) + b['7'])
    return tf.matmul(h7, W['8']) + b['8']
def _smooth_l1_loss(self, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=1.0, dim=[1]):
    sigma_2 = sigma ** 2
    box_diff = bbox_pred - bbox_targets
    in_box_diff = bbox_inside_weights * box_diff
    abs_in_box_diff = tf.abs(in_box_diff)
    smoothL1_sign = tf.stop_gradient(tf.to_float(tf.less(abs_in_box_diff, 1. / sigma_2)))
    in_loss_box = tf.pow(in_box_diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
                  + (abs_in_box_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
    out_loss_box = bbox_outside_weights * in_loss_box
    loss_box = tf.reduce_mean(tf.reduce_sum(
    return loss_box
def _anneal_weight(init_val, final_val, anneal_type, global_step, anneal_steps, hold_for=0., steps_div=1.,

        val, final, step, hold_for, anneal_steps, steps_div = (tf.cast(i, dtype) for i in
                                                               (init_val, final_val, global_step, hold_for, anneal_steps, steps_div))
        step = tf.maximum(step - hold_for, 0.)

        if anneal_type == 'exp':
            decay_rate = tf.pow(final / val, steps_div / anneal_steps)
            val = tf.train.exponential_decay(val, step, steps_div, decay_rate)

        elif anneal_type == 'linear':
            val = final + (val - final) * (1. - step / anneal_steps)
            raise NotImplementedError

        anneal_weight = tf.maximum(final, val)
        return anneal_weight
def _embed_sentences(self):
        """Tensorflow implementation of Simple but Tough-to-Beat Baseline"""
        # Get word features
        word_embeddings = self._get_embedding()
        word_feats      = tf.nn.embedding_lookup(word_embeddings, self.input)
        # Get marginal estimates and scaling term
        batch_size = tf.shape(word_feats)[0]
        a = tf.pow(10.0, self._get_a_exp())
        p = tf.constant(self.marginals, dtype=tf.float32, name='marginals')
        q = tf.reshape(
            a / (a + tf.nn.embedding_lookup(p, self.input)),
            (batch_size, self.mx_len, 1)
        # Compute initial sentence embedding
        z = tf.reshape(1.0 / tf.to_float(self.input_lengths), (batch_size, 1))
        S = z * tf.reduce_sum(q * word_feats, axis=1)
        # Compute common component
        S_centered = S - tf.reduce_mean(S, axis=0)
        _, _, V = tf.svd(S_centered, full_matrices=False, compute_uv=True)
        self.tf_ccx = tf.stop_gradient(tf.gather(tf.transpose(V), 0))
        # Common component removal
        ccx = tf.reshape(self._get_common_component(), (1, self.d))
        sv = {'embeddings': word_embeddings, 'a': a, 'p': p, 'ccx': ccx}
        return S - tf.matmul(S, ccx * tf.transpose(ccx)), sv
def adam_updates(params, cost_or_grads, lr=0.001, mom1=0.9, mom2=0.999):
    ''' Adam optimizer '''
    updates = []
    if type(cost_or_grads) is not list:
        grads = tf.gradients(cost_or_grads, params)
        grads = cost_or_grads
    t = tf.Variable(1., 'adam_t')
    for p, g in zip(params, grads):
        mg = tf.Variable(tf.zeros(p.get_shape()), + '_adam_mg')
        if mom1 > 0:
            v = tf.Variable(tf.zeros(p.get_shape()), + '_adam_v')
            v_t = mom1 * v + (1. - mom1) * g
            v_hat = v_t / (1. - tf.pow(mom1, t))
            v_hat = g
        mg_t = mom2 * mg + (1. - mom2) * tf.square(g)
        mg_hat = mg_t / (1. - tf.pow(mom2, t))
        g_t = v_hat / tf.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
def tune(self, acceptance_rate, fresh_start):
        def adapt_stepsize():
            new_step = tf.assign(self.step, (1 - fresh_start) * self.step + 1)
            rate1 = tf.div(1.0, new_step + self.t0)
            new_h_bar = tf.assign(
                self.h_bar, (1 - fresh_start) * (1 - rate1) * self.h_bar +
                rate1 * ( - acceptance_rate))
            log_epsilon = - tf.sqrt(new_step) / self.gamma * new_h_bar
            rate = tf.pow(new_step, -self.kappa)
            new_log_epsilon_bar = tf.assign(
                rate * log_epsilon + (1 - fresh_start) * (1 - rate) *
            with tf.control_dependencies([new_log_epsilon_bar]):
                new_log_epsilon = tf.identity(log_epsilon)

            return tf.exp(new_log_epsilon)

        c = tf.cond(self.adapt_step_size,
                    lambda: tf.exp(self.log_epsilon_bar))

        return c
def update(self, x):
        # x: (chain_dims data_dims)
        new_t = tf.assign(self.t, self.t + 1)
        weight = (1 - self.decay) / (1 - tf.pow(self.decay, new_t))
        # incr: (chain_dims data_dims)
        incr = [weight * (q - mean) for q, mean in zip(x, self.mean)]
        # mean: (1,...,1 data_dims)
        update_mean = [mean.assign_add(
            tf.reduce_mean(i, axis=self.chain_axes, keep_dims=True))
            for mean, i in zip(self.mean, incr)]
        # var: (1,...,1 data_dims)
        new_var = [
            (1 - weight) * var +
            tf.reduce_mean(i * (q - mean), axis=self.chain_axes,
            for var, i, q, mean in zip(self.var, incr, x, update_mean)]

        update_var = [tf.assign(var, n_var)
                      for var, n_var in zip(self.var, new_var)]
        return update_var
def __init__(self, n_features, lenscale=1.0, p=1, variational=False,
        """Create an instance of an arc cosine kernel layer."""
        # Setup random weights
        if variational:
            kern = RBFVariational(lenscale=lenscale,
            kern = RBF(lenscale=lenscale)
        super().__init__(n_features=n_features, kernel=kern)

        # Kernel order
        assert isinstance(p, int) and p >= 0
        if p == 0:
            self.pfunc = tf.sign
        elif p == 1:
            self.pfunc = lambda x: x
            self.pfunc = lambda x: tf.pow(x, p)
项目:probabilistic_line_search    作者:ProbabilisticNumerics    | 项目源码 | 文件源码
def _MatMulGradMom(op, W, out_grad, batch_size, mom=2):
  """Computes gradient moment for a weight matrix through a MatMul operation.

  Assumes ``Z=tf.matmul(A, W)``, where ``W`` is a d1xd2 weight matrix, ``A``
  are the nxd1 activations of the previous layer (n being the batch size).
  ``out_grad`` is the gradient w.r.t. ``Z``, as computed by ``tf.gradients()``.
  No transposes in the MatMul operation allowed.

      :op: The MatMul operation
      :W: The weight matrix (the tensor, not the variable)
      :out_grad: The tensor of gradient w.r.t. to the output of the op
      :batch_size: Batch size n (constant integer or scalar int tf.Tensor)
      :mom: Integer moment desired (defaults to 2)"""

  assert op.type == "MatMul"
  t_a, t_b = op.get_attr("transpose_a"), op.get_attr("transpose_b")
  assert W is op.inputs[1] and not t_a and not t_b

  A = op.inputs[0]
  out_grad_pow = tf.pow(out_grad, mom)
  A_pow = tf.pow(A, mom)
  return tf.mul(batch_size, tf.matmul(A_pow, out_grad_pow, transpose_a=True))
def get_cubic_root(self):
    # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2
    # where x = sqrt(mu).
    # We substitute x, which is sqrt(mu), with x = y + 1.
    # It gives y^3 + py = q
    # where p = (D^2 h_min^2)/(2*C) and q = -p.
    # We use the Vieta's substution to compute the root.
    # There is only one real solution y (which is in [0, 1] ).
    # assert_array = \
    #   [tf.Assert(tf.logical_not(tf.is_nan(self._dist_to_opt_avg) ), [self._dist_to_opt_avg,]), 
    #   tf.Assert(tf.logical_not(tf.is_nan(self._h_min) ), [self._h_min,]), 
    #   tf.Assert(tf.logical_not(tf.is_nan(self._grad_var) ), [self._grad_var,]),
    #   tf.Assert(tf.logical_not(tf.is_inf(self._dist_to_opt_avg) ), [self._dist_to_opt_avg,]), 
    #   tf.Assert(tf.logical_not(tf.is_inf(self._h_min) ), [self._h_min,]), 
    #   tf.Assert(tf.logical_not(tf.is_inf(self._grad_var) ), [self._grad_var,])]
    # with tf.control_dependencies(assert_array):
    # EPS in the numerator to prevent momentum being exactly one in case of 0 gradient
    p = (self._dist_to_opt_avg + EPS)**2 * (self._h_min + EPS)**2 / 2 / (self._grad_var + EPS)
    w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0
    w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0/3.0)
    y = w - p / 3.0 / (w + EPS)
    x = y + 1
    return x
def lppool(inpOp, pnorm, kH, kW, dH, dW, padding, name):
    with tf.variable_scope(name):
        if pnorm == 2:
            pwr = tf.square(inpOp)
            pwr = tf.pow(inpOp, pnorm)

        subsamp = tf.nn.avg_pool(pwr,
                              ksize=[1, kH, kW, 1],
                              strides=[1, dH, dW, 1],
        subsamp_sum = tf.multiply(subsamp, kH*kW)

        if pnorm == 2:
            out = tf.sqrt(subsamp_sum)
            out = tf.pow(subsamp_sum, 1/pnorm)

    return out
def update_target_network(source_network, target_network, update_rate):
        target_network_update = []
        for v in source_network.variables():
            # this is equivalent to target = (1-alpha) * target + alpha * source
            # print ("source: " + + " : " + str(v.get_shape()))
        for v in target_network.variables():
            # this is equivalent to target = (1-alpha) * target + alpha * source
            # print ("target: " + + " : " + str(v.get_shape()))
        for v_source, v_target in zip(source_network.variables(), target_network.variables()):
            # this is equivalent to target = (1-alpha) * target + alpha * source
            update_op = v_target.assign_sub(update_rate * (v_target - v_source))

#    def concat_nn_input(self, input1, input2):
#        return tf.concat(1, [input1, input2])

#    def add_pow_values(self, values):
#        return self.concat_nn_input(values, 0.01 * tf.pow(values, [2 for i in range(self.action_size)]))
def loss_with_spring(self):
        margin = 5.0
        labels_t = self.y_
        labels_f = tf.subtract(1.0, self.y_, name="1-yi")          # labels_ = !labels;
        eucd2 = tf.pow(tf.subtract(self.o1, self.o2), 2)
        eucd2 = tf.reduce_sum(eucd2, 1)
        eucd = tf.sqrt(eucd2+1e-6, name="eucd")
        C = tf.constant(margin, name="C")
        # yi*||CNN(p1i)-CNN(p2i)||^2 + (1-yi)*max(0, C-||CNN(p1i)-CNN(p2i)||^2)
        pos = tf.multiply(labels_t, eucd2, name="yi_x_eucd2")
        # neg = tf.multiply(labels_f, tf.subtract(0.0,eucd2), name="yi_x_eucd2")
        # neg = tf.multiply(labels_f, tf.maximum(0.0, tf.subtract(C,eucd2)), name="Nyi_x_C-eucd_xx_2")
        neg = tf.multiply(labels_f, tf.pow(tf.maximum(tf.subtract(C, eucd), 0), 2), name="Nyi_x_C-eucd_xx_2")
        losses = tf.add(pos, neg, name="losses")
        loss = tf.reduce_mean(losses, name="loss")
        return loss
def lppool(inpOp, pnorm, kH, kW, dH, dW, padding):
  global pool_counter
  global parameters
  name = 'pool' + str(pool_counter)
  pool_counter += 1

  with tf.name_scope('lppool'):
    if pnorm == 2:
      pwr = tf.square(inpOp)
      pwr = tf.pow(inpOp, pnorm)

    subsamp = tf.nn.avg_pool(pwr,
                          ksize=[1, kH, kW, 1],
                          strides=[1, dH, dW, 1],
    subsamp_sum = tf.mul(subsamp, kH*kW)

    if pnorm == 2:
      out = tf.sqrt(subsamp_sum)
      out = tf.pow(subsamp_sum, 1/pnorm)

  return out
def adam_updates(params, cost_or_grads, lr=0.001, mom1=0.9, mom2=0.999):
    ''' Adam optimizer '''
    updates = []
    if type(cost_or_grads) is not list:
        grads = tf.gradients(cost_or_grads, params)
        grads = cost_or_grads
    t = tf.Variable(1., 'adam_t')
    for p, g in zip(params, grads):
        mg = tf.Variable(tf.zeros(p.get_shape()), + '_adam_mg')
        if mom1>0:
            v = tf.Variable(tf.zeros(p.get_shape()), + '_adam_v')
            v_t = mom1*v + (1. - mom1)*g
            v_hat = v_t / (1. - tf.pow(mom1,t))
            v_hat = g
        mg_t = mom2*mg + (1. - mom2)*tf.square(g)
        mg_hat = mg_t / (1. - tf.pow(mom2,t))
        g_t = v_hat / tf.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
def weighted_loss(y_true, y_softmax_conv, weight):
    """Compute weighted loss function per pixel.
    Loss = (1 - softmax(logits)) * targets * weight + softmax(logits) * (1 - targets) * weight

        y_true: [batch_size, depth, height, width, 1]
        weight_map: [batch_size, depth, height, width, 1]
        y_softmax_conv: [batch_size, depth, height, width, 2]
    y_true = tf.to_float(tf.reshape(y_true[..., 0], [-1]))
    weight = tf.to_float(tf.reshape(weight[..., 0], [-1]))
    y_conv = tf.to_float(tf.reshape(y_softmax_conv[..., 1], [-1]))

    loss_pos = 1 / 2 * tf.pow((1 - y_conv), 2) * y_true * weight
    loss_neg = 1 / 2 * tf.pow(y_conv, 2) * (1 - y_true) * weight

    return tf.reduce_mean(loss_pos + loss_neg)
def apply_update(self, optimizer, grads_and_vars):
        (grads, vars) = zip(*grads_and_vars)

        # Gradient clipping
        if CustomTrainer.GRADIENT_CLIP in self.train_hypers:
            grads, global_norm = clip_ops.clip_by_global_norm(grads,
        # Gradient noise
        if CustomTrainer.GRADIENT_NOISE in self.train_hypers:
            sigma_sqr = self.train_hypers[CustomTrainer.GRADIENT_NOISE]
            if CustomTrainer.GRADIENT_NOISE_DECAY in self.train_hypers:
                sigma_sqr /= tf.pow(1.0 + tf.to_float(self.global_step),
            grads_tmp = []
            for g in grads:
                if g is not None:
                    noisy_grad = g + tf.sqrt(sigma_sqr)*tf.random_normal(tf.shape(g))
            grads = grads_tmp

        train_op = optimizer.apply_gradients(zip(grads, vars), global_step=self.global_step)
        return train_op
def __init__(self, lin, lout, iniRange, graph= None):

        if graph!=None:
            with graph.as_default():

                self.v = tf.Variable(tf.random_uniform([lin, lout], iniRange[0], iniRange[1]))
                self.g = tf.Variable(tf.random_uniform([lout], -1.0,1.0))
                self.pow2 = tf.fill([lin, lout],2.0)
                self.v_norm = tf.sqrt(tf.reduce_sum(tf.pow(self.v, self.pow2),0))
                self.tile_div = tf.tile(tf.expand_dims(tf.div(self.g, self.v_norm),0),[lin, 1])
                self.w = tf.mul(self.tile_div, self.v)
            self.v = tf.Variable(tf.random_uniform([lin, lout], -1/math.sqrt(lin), 1/math.sqrt(lin)))
            self.g = tf.Variable(tf.random_uniform([lout], -1.0,1.0))
            self.pow2 = tf.fill([lin, lout],2.0)
            self.v_norm = tf.sqrt(tf.reduce_sum(tf.pow(self.v, self.pow2),0))
            self.tile_div = tf.tile(tf.expand_dims(tf.div(self.g, self.v_norm),0),[lin, 1])
            self.w = tf.mul(self.tile_div, self.v)
def lppool(inpOp, pnorm, kH, kW, dH, dW, padding, name):
    with tf.variable_scope(name):
        if pnorm == 2:
            pwr = tf.square(inpOp)
            pwr = tf.pow(inpOp, pnorm)

        subsamp = tf.nn.avg_pool(pwr,
                              ksize=[1, kH, kW, 1],
                              strides=[1, dH, dW, 1],
        subsamp_sum = tf.multiply(subsamp, kH*kW)

        if pnorm == 2:
            out = tf.sqrt(subsamp_sum)
            out = tf.pow(subsamp_sum, 1/pnorm)

    return out
def gauss(mean, stddev, ksize):
    """Use Tensorflow to compute a Gaussian Kernel.

    mean : float
        Mean of the Gaussian (e.g. 0.0).
    stddev : float
        Standard Deviation of the Gaussian (e.g. 1.0).
    ksize : int
        Size of kernel (e.g. 16).

    kernel : np.ndarray
        Computed Gaussian Kernel using Tensorflow.
    g = tf.Graph()
    with tf.Session(graph=g):
        x = tf.linspace(-3.0, 3.0, ksize)
        z = (tf.exp(tf.neg(tf.pow(x - mean, 2.0) /
                           (2.0 * tf.pow(stddev, 2.0)))) *
             (1.0 / (stddev * tf.sqrt(2.0 * 3.1415))))
        return z.eval()
def gauss(mean, stddev, ksize):
    """Use Tensorflow to compute a Gaussian Kernel.

    mean : float
        Mean of the Gaussian (e.g. 0.0).
    stddev : float
        Standard Deviation of the Gaussian (e.g. 1.0).
    ksize : int
        Size of kernel (e.g. 16).

    kernel : np.ndarray
        Computed Gaussian Kernel using Tensorflow.
    g = tf.Graph()
    with tf.Session(graph=g):
        x = tf.linspace(-3.0, 3.0, ksize)
        z = (tf.exp(tf.neg(tf.pow(x - mean, 2.0) /
                           (2.0 * tf.pow(stddev, 2.0)))) *
             (1.0 / (stddev * tf.sqrt(2.0 * 3.1415))))
        return z.eval()
def adam_updates(params, cost_or_grads, lr=0.001, B1=0.9, B2=0.999):
    ''' Adam optimizer '''
    updates = []
    if type(cost_or_grads) is not list:
        grads = tf.gradients(cost_or_grads, params)
        grads = cost_or_grads
    t = tf.Variable(1., 'adam_t')
    for p, g in zip(params, grads):
        v = tf.Variable(tf.zeros(p.get_shape()), + '_adam_v')
        if B1>0:
            m = tf.Variable(tf.zeros(p.get_shape()), + '_adam_m')
            m_t = B1*m + (1. - B1)*g
            m_hat = m_t / (1. - tf.pow(B1,t))
            m_hat = g
        v_t = B2*v + (1. - B2)*tf.square(g)
        v_hat = v_t / (1. - tf.pow(B2,t))
        g_t = m_hat / tf.sqrt(v_hat + 1e-8)        
        p_t = p - lr * g_t
def address(M0, w0, head):
    # Content focusing
    # Compute cosine similarity
    key = tf.expand_dims(head["key"], 1)
    key_matches = tf.batch_matmul(key, tf.transpose(M0, [0, 2, 1]))
    key_matches = tf.squeeze(key_matches)
    key_mag = tf.expand_dims(NTMCell.magnitude(head["key"], 1), 1)
    M_col_mag = NTMCell.magnitude(M0, 2)
    cosine_sim = key_matches / (key_mag * M_col_mag)
    # Compute content weights
    wc = tf.nn.softmax(head["key_str"] * cosine_sim)

    # Location focusing
    wg = head["interp"] * wc + (1 - head["interp"]) * w0
    ws = rotate.ntm_rotate(wg, head["shift"])
    ws_pow = tf.pow(ws, head["sharp"])

    w1 = ws_pow / tf.reduce_sum(ws_pow, 1, keep_dims=True)

    return w1
def build_model(self):

        self.input_y = tf.placeholder(tf.float32, [None,self.num_class], name="input_y") # 1*1, 1doc
        self.one_hot = tf.reshape(tf.cast(tf.one_hot(tf.cast(self.input_y, tf.int32), 2,0,1), tf.float32), [-1,2])

        self.recon_loss = -tf.reduce_sum(tf.log(0.0001 + tf.gather(self.p_xi_h, self.x_id)))
        self.KL = -0.5 * tf.reduce_sum(1.0 + self.hlogvar - tf.pow(self.hmean, 2)\
                  - tf.exp(self.hlogvar), reduction_indices = 1)
        self.loss = tf.reduce_mean(0.0001 * self.KL + self.recon_loss)

        self.optimizer = tf.train.AdamOptimizer(self.learning_rate,0.9)
        self.grads_and_vars = self.optimizer.compute_gradients(self.loss)
        self.capped_gvs = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in self.grads_and_vars] 
        self.train_op = self.optimizer.apply_gradients(self.capped_gvs)
        #self.optimizer = tf.train.AdamOptimizer(self.learning_rate,beta1=0.9).minimize(self.loss)

        self.init = tf.initialize_all_variables()
项目:Texture-Synthesis    作者:mohamedkeid    | 项目源码 | 文件源码
    with tf.name_scope('get_total_variation'):
        # Get the dimensions of the variable image
        height = shape[1]
        width = shape[2]
        size = reduce(lambda a, b: a * b, shape) ** 2

        # Disjoin the variable image and evaluate the total variation
        x_cropped = x[:, :height - 1, :width - 1, :]
        left_term = tf.square(x[:, 1:, :width - 1, :] - x_cropped)
        right_term = tf.square(x[:, :height - 1, 1:, :] - x_cropped)
        smoothed_terms = tf.pow(left_term + right_term, TOTAL_VARIATION_SMOOTHING / 2.)
        return tf.reduce_sum(smoothed_terms) / size

def get_total_variation(x, shape):
项目:DP_for_FaceNet    作者:guchinoma    | 项目源码 | 文件源码
    with tf.variable_scope(name):
        if pnorm == 2:
            pwr = tf.square(inpOp)
            pwr = tf.pow(inpOp, pnorm)

        subsamp = tf.nn.avg_pool(pwr,
                              ksize=[1, kH, kW, 1],
                              strides=[1, dH, dW, 1],
        subsamp_sum = tf.mul(subsamp, kH*kW)

        if pnorm == 2:
            out = tf.sqrt(subsamp_sum)
            out = tf.pow(subsamp_sum, 1/pnorm)

    return out
def chi2(exp, obs):
        Compute CHI^2 statistics of non-zero expected elements
    zero = tf.constant(0, dtype=tf.float32)
    mask = tf.not_equal(exp, zero)

    def masking(tensor, mask):
        return tf.boolean_mask(tensor, mask)

    stat = tf.reduce_sum(
                tf.subtract(masking(obs, mask), masking(exp, mask)),
            masking(exp, mask)),

    return stat
项目:ndm    作者:jurcicek    | 项目源码 | 文件源码
        lr = (self._lr_t *
              math_ops.sqrt(1 - self._beta2_power)
              / (1 - self._beta1_power))
        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - self._beta1_t)
        m_t = m * self._beta1_t
        m_t = m_t + m_scaled_g_values
        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v")
        v_scaled_g_values = tf.pow(grad, 2) * (1 - self._beta2_t)
        v_t = v * self._beta2_t
        v_t = v_t + v_scaled_g_values
        v_sqrt = tf.pow(v_t, self._pow_t)
        var_update = state_ops.assign_sub(var,
                                          lr * m_t / (v_sqrt + self._epsilon_t),
        # regularization
        var_update = state_ops.assign_sub(var_update,
                                          self._dense_regularization * var,

        return*[var_update, m_t, v_t])
def scaled_dot_product_attention_simple(q, k, v, bias, name=None):
  """scaled dot-product attention.  One head.  One spatial dimension.

    q: a Tensor with shape [batch, length_q, depth_k]
    k: a Tensor with shape [batch, length_kv, depth_k]
    v: a Tensor with shape [batch, length_kv, depth_v]
    bias: optional Tensor broadcastable to [batch, length_q, length_kv]
    name: an optional string

    A Tensor.
  with tf.variable_scope(
      name, default_name="scaled_dot_product_attention_simple"):
    scalar = tf.rsqrt(tf.to_float(common_layers.shape_list(q)[2]))
    logits = tf.matmul(q * scalar, k, transpose_b=True)
    if bias is not None:
      logits += bias
    weights = tf.nn.softmax(logits, name="attention_weights")
        "attention", tf.expand_dims(tf.pow(weights, 0.2), 3), max_outputs=1)
    return tf.matmul(weights, v)
项目:vae-style-transfer    作者:sunsided    | 项目源码 | 文件源码
    """Use Tensorflow to compute a Gaussian Kernel.

    mean : float
        Mean of the Gaussian (e.g. 0.0).
    stddev : float
        Standard Deviation of the Gaussian (e.g. 1.0).
    ksize : int
        Size of kernel (e.g. 16).

    kernel : np.ndarray
        Computed Gaussian Kernel using Tensorflow.
    g = tf.Graph()
    with tf.Session(graph=g):
        x = tf.linspace(-3.0, 3.0, ksize)
        z = (tf.exp(tf.neg(tf.pow(x - mean, 2.0) /
                           (2.0 * tf.pow(stddev, 2.0)))) *
             (1.0 / (stddev * tf.sqrt(2.0 * 3.1415))))
        return z.eval()
项目:Phy-Net    作者:loliverhennigh    | 项目源码 | 文件源码
    ''' Adam optimizer '''
    updates = []
    if type(cost_or_grads) is not list:
        grads = tf.gradients(cost_or_grads, params)
        grads = cost_or_grads
    t = tf.Variable(1., 'adam_t')
    for p, g in zip(params, grads):
        mg = tf.Variable(tf.zeros(p.get_shape()), + '_adam_mg')
        if mom1>0:
            v = tf.Variable(tf.zeros(p.get_shape()), + '_adam_v')
            v_t = mom1*v + (1. - mom1)*g
            v_hat = v_t / (1. - tf.pow(mom1,t))
            v_hat = g
        mg_t = mom2*mg + (1. - mom2)*tf.square(g)
        mg_hat = mg_t / (1. - tf.pow(mom2,t))
        g_t = v_hat / tf.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
项目:cabs    作者:ProbabilisticNumerics    | 项目源码 | 文件源码
  """Computes gradient moment for a weight matrix through a MatMul operation.

  Assumes ``Z=tf.matmul(A, W)``, where ``W`` is a d1xd2 weight matrix, ``A``
  are the nxd1 activations of the previous layer (n being the batch size).
  ``out_grad`` is the gradient w.r.t. ``Z``, as computed by ``tf.gradients()``.
  No transposes in the MatMul operation allowed.

      :op: The MatMul operation
      :W: The weight matrix (the tensor, not the variable)
      :out_grad: The tensor of gradient w.r.t. to the output of the op
      :batch_size: Batch size n (constant integer or scalar int tf.Tensor)
      :mom: Integer moment desired (defaults to 2)"""

  assert op.type == "MatMul"
  t_a, t_b = op.get_attr("transpose_a"), op.get_attr("transpose_b")
  assert W is op.inputs[1] and not t_a and not t_b

  A = op.inputs[0]
  out_grad_pow = tf.pow(out_grad, mom)
  A_pow = tf.pow(A, mom)
  return tf.multiply(batch_size, tf.matmul(A_pow, out_grad_pow, transpose_a=True))
项目:Style-Transfer-Algorithm    作者:mohamedkeid    | 项目源码 | 文件源码
    with tf.name_scope('get_total_variation'):
        # Get the dimensions of the variable image
        height = shape[1]
        width = shape[2]
        size = reduce(lambda a, b: a * b, shape) ** 2

        # Disjoin the variable image and evaluate the total variation
        x_cropped = x[:, :height - 1, :width - 1, :]
        left_term = tf.square(x[:, 1:, :width - 1, :] - x_cropped)
        right_term = tf.square(x[:, :height - 1, 1:, :] - x_cropped)
        smoothed_terms = tf.pow(left_term + right_term, TOTAL_VARIATION_SMOOTHING / 2.)
        return tf.reduce_sum(smoothed_terms) / size

def get_total_variation(x, shape):
    """Connect two cores given the inputs, synaptic weights, and output dimension.
    Inputs can be output from a previous core or spike inputs"""
    input_dim = int(input.get_shape()[1])

    s, axon_types, axon_weights = synapse_weight((input_dim, output_dim), name)
    b = leak_bias([output_dim], name)
    c = synapse_connection([input_dim, output_dim], name)

    xc = tf.reshape(input, (-1, input_dim, 1)) * c
    mu = b + tf.reduce_sum(xc * s, 1)
    sigma2 = tf.reduce_sum(xc * (1. - xc) * tf.pow(s, 2), 1)

    # Output is proba that each neuron fires
    x0 = tf.zeros_like(mu)
    output = normal_ccdf(x0, mu, sigma2)

    return output, b, c, axon_types, axon_weights, s
项目:Neural-Art    作者:msokoloff1    | 项目源码 | 文件源码
    adjustedImage = model.bgr

    yPlusOne = tf.slice(adjustedImage, [0,0,1,0], [1,imageShape[0],(imageShape[1]-1),imageShape[2]])
    xPlusOne = tf.slice(adjustedImage, [0,1,0,0], [1,(imageShape[0]-1),imageShape[1],imageShape[2]])

    inputNoiseYadj = tf.slice(adjustedImage,[0,0,0,0],[1,imageShape[0],(imageShape[1]-1),imageShape[2]])
    inputNoiseXadj = tf.slice(adjustedImage, [0,0,0,0], [1,(imageShape[0]-1),imageShape[1],imageShape[2]])

    lambdaBeta = (sigma**beta) / (imageShape[0]*imageShape[1]*((a*B)**beta))
    error1 = tf.slice(tf.square(yPlusOne-inputNoiseYadj), [0,0,0,0], [1,(imageShape[0]-1),(imageShape[1]-1), imageShape[2]])
    error2 = tf.slice(tf.square(xPlusOne-inputNoiseXadj), [0,0,0,0], [1,(imageShape[0]-1),(imageShape[1]-1), imageShape[2]])

    return lambdaBeta*tf.reduce_sum( tf.pow((error1+error2),(beta/2) ))
项目:real-nvp    作者:taesung89    | 项目源码 | 文件源码
    ''' Adam optimizer '''
    updates = []
    if type(cost_or_grads) is not list:
        grads = tf.gradients(cost_or_grads, params)
        grads = cost_or_grads
    t = tf.Variable(1., 'adam_t')
    for p, g in zip(params, grads):
        mg = tf.Variable(tf.zeros(p.get_shape()), + '_adam_mg')
        if mom1>0:
            v = tf.Variable(tf.zeros(p.get_shape()), + '_adam_v')
            v_t = mom1*v + (1. - mom1)*g
            v_hat = v_t / (1. - tf.pow(mom1,t))
            v_hat = g
        mg_t = mom2*mg + (1. - mom2)*tf.square(g)
        mg_hat = mg_t / (1. - tf.pow(mom2,t))
        g_t = v_hat / tf.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
项目:Neural-Turing-Machine    作者:camigord    | 项目源码 | 文件源码
        Sharpens the final weights

        after_conv_shift: Tensor (batch_size, memory_locations, number_of_keys)
            weights after circular Convolution
        sharp_gamma: Tensor (batch_size, number_of_keys)
            scalar to sharpen the final weights

        Returns: Tensor (batch_size, memory_locations, number_of_keys)
            final weights
        sharp_gamma = tf.expand_dims(sharp_gamma,1)
        powed_conv_w = tf.pow(after_conv_shift, sharp_gamma)
        return powed_conv_w / tf.expand_dims(tf.reduce_sum(powed_conv_w,1),1)
项目:odin_old    作者:trungnt13    | 项目源码 | 文件源码
                prior_mean=0., prior_logsigma=0.,
    ''' KL-divergence between two gaussians.
    Useful for Variational AutoEncoders. Use this as an activation regularizer
    mean, logsigma: parameters of the input distributions
    prior_mean, prior_logsigma: paramaters of the desired distribution (note the
        log on logsigma)
    regularizer_scale: Rescales the regularization cost. Keep this 1 for most cases.

    origin implementation from seya:
    Copyright (c) Philip Bachman
    gauss_klds = 0.5 * (prior_logsigma - logsigma +
            ((tf.exp(logsigma) + pow((mean_ - prior_mean), 2.0)) / tf.exp(prior_logsigma)) - 1.0)
    return mean(gauss_klds)
项目:DP-VAE    作者:thudzj    | 项目源码 | 文件源码
    yt_expand = tf.expand_dims(yt, 0)
    mean_yt = tf.reshape(mean_yt, [s, FLAGS.batch_size, 784])
    xt = tf.reshape(xt, [1, s, FLAGS.batch_size, FLAGS.hidden_size])
    # p_ygivenx = tf.reduce_prod(tf.pow(mean_yt, yt_expand) * tf.pow(1 - mean_yt, 1 - yt_expand), axis=2)
    v = alpha / (alpha + beta)
    pi = tf.concat(0, [v, [1.0]]) * tf.concat(0, [[1.0], tf.cumprod(1 - v)])
    p_x = gaussian_mixture_pdf(eta_mu, tf.square(eta_sigma) + tf.square(sigma_px), xt, pi)
    log_p_y_s = tf.reduce_sum(yt_expand * tf.log(mean_yt + epsilon) \
        + (1.0 - yt_expand) * tf.log(1.0 - mean_yt + epsilon), 2) \
        + tf.log(p_x) \
        + 0.5 * tf.reduce_sum(tf.square(eps), 2)
    log_p_y_s_max = tf.reduce_max(log_p_y_s, reduction_indices=0)
    log_p_y = tf.log(tf.reduce_mean(tf.exp(log_p_y_s - log_p_y_s_max), 0)) + log_p_y_s_max
    return tf.reduce_mean(log_p_y)

    def f(e_list):
        return tf.multiply(tf.sign(e_list), tf.pow(tf.abs(e_list), 0.5))
    # ??tf.layers?????flatten
    # dense1 = tf.layers.dense(tf.contrib.layers.flatten(relu5), activation=tf.nn.relu, units=50)
    if not isinstance(inputs, ops.Tensor):
        inputs = ops.convert_to_tensor(inputs, dtype='float')
        # dim_list = inputs.get_shape().as_list()
        # flatten_shape = dim_list[1] if len(dim_list) <= 2 else reduce(lambda x, y: x * y, dim_list[1:])
        # reshaped = tf.reshape(inputs, [dim_list[0], flatten_shape])
    if len(inputs.shape) > 2:
        inputs = tf.contrib.layers.flatten(inputs)
    flatten_shape = inputs.shape[1]
    weights = tf.get_variable('weights', shape=[flatten_shape, units], initializer=w_i)
    w_noise = tf.get_variable('w_noise', [flatten_shape, units], initializer=w_i, collections=c_names)
    if noisy_distribution == 'independent':
        weights += tf.multiply(tf.random_normal(shape=w_noise.shape), w_noise)
    elif noisy_distribution == 'factorised':
        noise_1 = f(tf.random_normal(tf.TensorShape([flatten_shape, 1]), dtype=tf.float32))  # ???????????????
        noise_2 = f(tf.random_normal(tf.TensorShape([1, units]), dtype=tf.float32))
        weights += tf.multiply(noise_1 * noise_2, w_noise)
    dense = tf.matmul(inputs, weights)
    if bias_shape is not None:
        assert bias_shape[0] == units
        biases = tf.get_variable('biases', shape=bias_shape, initializer=b_i)
        b_noise = tf.get_variable('b_noise', [1, units], initializer=b_i, collections=c_names)
        if noisy_distribution == 'independent':
            biases += tf.multiply(tf.random_normal(shape=b_noise.shape), b_noise)
        elif noisy_distribution == 'factorised':
            biases += tf.multiply(noise_2, b_noise)
        return activation(dense + biases) if activation is not None else dense + biases
    return activation(dense) if activation is not None else dense

            return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
            return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
项目:tensorbuilder    作者:cgarciae    | 项目源码 | 文件源码
    size = int(tensor.get_shape()[1])
    pows = [ tf.pow(tensor[:, n], n + 1) for n in range(size) ]
    return tf.transpose(tf.pack(pows))
项目:vae-npvc    作者:JeremyCCHsu    | 项目源码 | 文件源码
    # EPS = tf.constant(1e-10)
    k = tf.shape(y)[-1]
    k = tf.cast(k, tf.float32)
    # y = y + EPS
    # y = tf.divide(y, tf.reduce_sum(y, -1, keep_dims=True))
    y = normalize_to_unit_sum(y)
    sum_p_over_y = tf.reduce_sum(tf.divide(p, tf.pow(y, tau)), -1)
    logp = tf.lgamma(k)
    logp = logp + (k - 1) * tf.log(tau)
    logp = logp - k * tf.log(sum_p_over_y)
    logp = logp + sum_p_over_y
    return logp
def _smooth_l1_loss(self, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=1.0, dim=[1]):
    sigma_2 = sigma ** 2
    box_diff = bbox_pred - bbox_targets
    in_box_diff = bbox_inside_weights * box_diff
    abs_in_box_diff = tf.abs(in_box_diff)
    smoothL1_sign = tf.stop_gradient(tf.to_float(tf.less(abs_in_box_diff, 1. / sigma_2)))
    in_loss_box = tf.pow(in_box_diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
                  + (abs_in_box_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
    out_loss_box = bbox_outside_weights * in_loss_box
    loss_box = tf.reduce_mean(tf.reduce_sum(
    return loss_box
项目:TFCommon    作者:MU94W    | 项目源码 | 文件源码
    words = tf.cast(tf.size(label), tf.float32)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=logit)
    cross_entropy = tf.divide(tf.reduce_sum(cross_entropy), words)
    perplex = tf.pow(2.0, cross_entropy)
    return perplex
项目:KATE    作者:hugochan    | 项目源码 | 文件源码
    def loss(y_true, y_pred):
        # try:
        #     x = K.binary_crossentropy(y_pred, y_true)
        #     # y = tf.Variable(feature_weights.astype('float32'))
        #     # z =, y)
        #     y_true = tf.pow(y_true + 1e-5, .75)
        #     y2 = tf.div(y_true, tf.reshape(K.sum(y_true, 1), [-1, 1]))
        #     z = K.sum(tf.mul(x, y2), 1)
        # except Exception as e:
        #     print e
        #     import pdb;pdb.set_trace()
        # return z
        return, y_true), K.variable(feature_weights.astype('float32')))
    return loss
项目:section-detection    作者:gulfaraz    | 项目源码 | 文件源码
    X1 = tf.expand_dims(tf.transpose(input_X), 0)
    X2 = tf.expand_dims(input_X, 0)
    C = init_C

    sbs_C = tf.TensorArray(dtype=tf.float32, size=10000, infer_shape=False)
    sbs_C = sbs_C.write(0, init_C)

    def _mean_shift_step(C):
        C = tf.expand_dims(C, 2)
        Y = tf.reduce_sum(tf.pow((C - X1) / window_radius, 2), axis=1)
        gY = tf.exp(-Y)
        num = tf.reduce_sum(tf.expand_dims(gY, 2) * X2, axis=1)
        denom = tf.reduce_sum(gY, axis=1, keep_dims=True)
        C = num / denom
        return C

    if n_updates > 0:
        for i in range(n_updates):
            C = _mean_shift_step(C)
            sbs_C = sbs_C.write(i + 1, C)
        def _mean_shift(i, C, sbs_C, max_diff):
            new_C = _mean_shift_step(C)
            max_diff = tf.reshape(tf.reduce_max(tf.sqrt(tf.reduce_sum(tf.pow(new_C - C, 2), axis=1))), [])
            sbs_C = sbs_C.write(i + 1, new_C)
            return i + 1, new_C, sbs_C, max_diff

        def _cond(i, C, sbs_C, max_diff):
            return max_diff > 1e-5

        n_updates, C, sbs_C, _ = tf.while_loop(cond=_cond,
                                       loop_vars=(tf.constant(0), C, sbs_C, tf.constant(1e10)))

        n_updates = tf.Print(n_updates, [n_updates])

    return C, sbs_C.gather(tf.range(n_updates + 1))
项目:main_loop_tf    作者:fvisin    | 项目源码 | 文件源码
        if self.cfg.grad_noise_decay is None:
            grad_noise_scale = self.cfg.grad_noise_scale
        elif self.cfg.grad_noise_decay == 'annealing':
            Adds annealed gaussian noise to the gradients at
            every time step, by decaying the variance at each
            time step
            g_t <- g_t + N(0, sigma_t^2)
            sigma_t^2 = eta / (1 + t)^gamma

            with eta selected from {0.01, 0.3, 1.0) and
            gamma = 0.55
            See: "Adding gradient noise improves learning
            for very deep networks",
            eta = self.cfg.grad_noise_scale ** 0.5
            gamma = 0.55 / 2
            grad_noise_scale = eta * tf.pow(tf.cast(
                self.global_step + 1, self.cfg._FLOATX), -gamma)
        elif self.cfg.grad_noise_decay == 'neural_gpu':
            if self.prev_err is None:
                grad_noise_scale = self.cfg.grad_noise_scale
                eta = self.cfg.grad_noise_scale
                gamma = 0.55
                grad_noise_scale = eta * tf.sqrt(
                    self.prev_err * tf.pow(tf.cast(
                        self.global_step + 1, self.cfg._FLOATX), -gamma))
            # Raise ValueError
            raise NotImplementedError('Unknown value of '
                                      'cfg.grad_noise_decay: %s' %

        return grad_noise_scale
项目:keras    作者:GeekLiB    | 项目源码 | 文件源码
    '''Element-wise exponentiation.
    return tf.pow(x, a)