我们从Python开源项目中,提取了以下23个代码示例,用于说明如何使用tensorflow.assign_sub()。
def _apply_dense(self, grad, weight): learning_rate_t = tf.cast(self._lr_t, weight.dtype.base_dtype) mu_t = tf.cast(self._mu_t, weight.dtype.base_dtype) norm_t = tf.cast(self._norm_t, weight.dtype.base_dtype) momentum = self.get_slot(weight, "a") norm = self.get_slot(weight, "n") if momentum.get_shape().ndims == 2: momentum_mean = tf.reduce_mean(momentum, axis=1, keep_dims=True) elif momentum.get_shape().ndims == 1: momentum_mean = momentum else: momentum_mean = momentum norm_update = learning_rate_t / norm + norm norm_t = tf.assign(norm_t, norm_update) momentum_update = (grad / norm_t) + (mu_t * momentum_mean) momentum_t = tf.assign(momentum, momentum_update, use_locking=self._use_locking) weight_update = learning_rate_t * momentum_t weight_t = tf.assign_sub( weight, weight_update, use_locking=self._use_locking) return tf.group(*[weight_t, norm_t, momentum_t])
def _apply_dense(self, grad, var): lr_t = tf.cast(self._lr_t, var.dtype.base_dtype) beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype) if var.dtype.base_dtype == tf.float16: eps = 1e-7 # Can't use 1e-8 due to underflow -- not sure if it makes a big difference. else: eps = 1e-8 v = self.get_slot(var, "v") v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad) m = self.get_slot(var, "m") m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad))) g_t = v_t / m_t var_update = tf.assign_sub(var, lr_t * g_t) return tf.group(*[var_update, m_t, v_t])
def _apply_dense(self, grad, var): lr_t = tf.cast(self._lr_t, var.dtype.base_dtype) beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype) if var.dtype.base_dtype == tf.float16: # Can't use 1e-8 due to underflow eps = 1e-7 else: eps = 1e-8 v = self.get_slot(var, "v") v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad) m = self.get_slot(var, "m") m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad))) g_t = v_t / m_t var_update = tf.assign_sub(var, lr_t * g_t) return tf.group(*[var_update, m_t, v_t])
def apply_updates(self, model, grads): """ Updates the model parameters based on the given gradients, using momentum """ update_ops = [] mom_ops = [] if isinstance(self._learning_rate, list): lrs = self._learning_rate print('d') else: lrs = [self._learning_rate for p in model.model_params] with tf.name_scope('CDLearning/updates'): for param, grad, mv, lr in zip(model.model_params, grads, self._momentum_vector, lrs): mv = tf.assign(mv, self._momentum * mv + grad * lr) update_ops.append(tf.assign_sub(param, mv)) return update_ops, mom_ops
def _apply_dense(self, grad, var): lr_t = tf.cast(self._lr_t, var.dtype.base_dtype) beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype) if var.dtype.base_dtype == tf.float16: eps = 1e-7 # Can't use 1e-8 due to underflow -- not sure if it makes a big difference. else: eps = 1e-8 m = self.get_slot(var, "m") m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad))) g_t = grad / m_t var_update = tf.assign_sub(var, lr_t * g_t) return tf.group(*[var_update, m_t])
def update_sub(x, decrement): return tf.assign_sub(x, decrement)
def _initAssignSubFetch(self, x, y, use_gpu=False): """Initialize a param to init, and compute param -= y.""" with self.test_session(force_gpu=use_gpu): p = tf.Variable(x) sub = tf.assign_sub(p, y) p.initializer.run() new_value = sub.eval() return p.eval(), new_value
def testAssignUpdate(self): var = state_ops.variable_op([1, 2], tf.complex64) added = tf.assign_add(var, [[2.0+2.0j, 3.0+3.0j]]) self.assertEqual([1, 2], added.get_shape()) subbed = tf.assign_sub(var, [[12.0+12.0j, 13.0+13.0j]]) self.assertEqual([1, 2], subbed.get_shape())
def testAssignUpdateNoVarShape(self): var = state_ops.variable_op([1, 2], tf.complex64, set_shape=False) added = tf.assign_add(var, [[2.0+2.0j, 3.0+3.0j]]) self.assertEqual([1, 2], added.get_shape()) subbed = tf.assign_sub(var, [[12.0+12.0j, 13.0+13.0j]]) self.assertEqual([1, 2], subbed.get_shape())
def testAssignUpdateNoValueShape(self): var = state_ops.variable_op([1, 2], tf.complex64) added = tf.assign_add(var, self._NewShapelessTensor()) self.assertEqual([1, 2], added.get_shape()) subbed = tf.assign_sub(var, self._NewShapelessTensor()) self.assertEqual([1, 2], subbed.get_shape())
def testAssignUpdateNoShape(self): var = state_ops.variable_op([1, 2], tf.complex64, set_shape=False) added = tf.assign_add(var, self._NewShapelessTensor()) self.assertEqual(tensor_shape.unknown_shape(), added.get_shape()) subbed = tf.assign_sub(var, self._NewShapelessTensor()) self.assertEqual(tensor_shape.unknown_shape(), subbed.get_shape())
def _clip_dense(self, var): with self._maybe_colocate_with(var): updated_var_value = var._ref() normalized_var = tf.clip_by_norm( updated_var_value, self._max_norm, self._vars_to_clip_dims[var]) delta = updated_var_value - normalized_var with tf.colocate_with(var): return var.assign_sub(delta, use_locking=self._use_locking)
def _apply_sparse_shared(self, grad, var, indices, scatter_add): beta1_power = tf.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = tf.cast(self._beta2_power, var.dtype.base_dtype) lr_t = tf.cast(self._lr_t, var.dtype.base_dtype) beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = tf.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * tf.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = tf.assign(m, m * beta1_t, use_locking=self._use_locking) with tf.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # m_bar = (1 - beta1) * g_t + beta1 * m_t m_bar = m_scaled_g_values + beta1_t * m_t # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = tf.assign(v, v * beta2_t, use_locking=self._use_locking) with tf.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) v_sqrt = tf.sqrt(v_t) var_update = tf.assign_sub( var, lr * m_bar / (v_sqrt + epsilon_t), use_locking=self._use_locking) return tf.group(*[var_update, m_bar, v_t])
def _finish(self, caches): """""" if self.clip > 0: S_t = [cache['s_t'] for cache in caches] S_t, _ = tf.clip_by_global_norm(S_t, self.clip) for cache, s_t in zip(caches, S_t): cache['s_t'] = s_t for cache in caches: x_tm1 = cache['x_tm1'] s_t = cache['s_t'] updates = cache['updates'] with tf.name_scope('update_' + x_tm1.op.name), tf.device(x_tm1.device): if 'idxs' in cache: idxs = cache['idxs'] x_t = tf.scatter_sub(x_tm1, idxs, s_t) if self.chi > 0: x_t_ = tf.gather(x_t, idxs) x_bar_t, t_x_bar = self._sparse_moving_average(x_tm1, idxs, x_t_, 'x', beta=self.chi) else: x_t = tf.assign_sub(x_tm1, s_t) if self.chi > 0: x_bar_t, t_x_bar = self._dense_moving_average(x_tm1, x_t, 'x', beta=self.chi) updates.append(x_t) if self.chi > 0: updates.extend([x_bar_t, t_x_bar]) update_ops = [tf.group(*cache['updates']) for cache in caches] return tf.group(*update_ops, name='update') #==============================================================
def _op_apply_delta(self): assigns = [] for var, delta in zip(self.variables, self._unpack(self._op_delta)): assigns.append(tf.assign_sub(var, delta)) return tf.group(*assigns)
def batch_norm(x, is_training, gamma=None, beta=None, axes=[0, 1, 2], eps=1e-10, name="bn_out", decay=0.99, dtype=tf.float32): """Applies batch normalization. Collect mean and variances on x except the last dimension. And apply normalization as below: x_ = gamma * (x - mean) / sqrt(var + eps) + beta Args: x: Input tensor, [B, ...]. n_out: Integer, depth of input variable. gamma: Scaling parameter. beta: Bias parameter. axes: Axes to collect statistics. eps: Denominator bias. Returns: normed: Batch-normalized variable. mean: Mean used for normalization (optional). """ n_out = x.get_shape()[-1] try: n_out = int(n_out) shape = [n_out] except: shape = None emean = tf.get_variable( "ema_mean", shape=shape, trainable=False, dtype=dtype, initializer=tf.constant_initializer( 0.0, dtype=dtype)) evar = tf.get_variable( "ema_var", shape=shape, trainable=False, dtype=dtype, initializer=tf.constant_initializer( 1.0, dtype=dtype)) if is_training: mean, var = tf.nn.moments(x, axes, name="moments") ema_mean_op = tf.assign_sub(emean, (emean - mean) * (1 - decay)) ema_var_op = tf.assign_sub(evar, (evar - var) * (1 - decay)) normed = tf.nn.batch_normalization( x, mean, var, beta, gamma, eps, name=name) return normed, [ema_mean_op, ema_var_op] else: normed = tf.nn.batch_normalization( x, emean, evar, beta, gamma, eps, name=name) return normed, None