我们从Python开源项目中,提取了以下48个代码示例,用于说明如何使用tensorflow.control_dependencies()。
def omniglot(): sess = tf.InteractiveSession() """ def wrapper(v): return tf.Print(v, [v], message="Printing v") v = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='Matrix') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) temp = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='temp') temp = wrapper(v) #with tf.control_dependencies([temp]): temp.eval() print 'Hello'""" def update_tensor(V, dim2, val): # Update tensor V, with index(:,dim2[:]) by val[:] val = tf.cast(val, V.dtype) def body(_, (v, d2, chg)): d2_int = tf.cast(d2, tf.int32) return tf.slice(tf.concat_v2([v[:d2_int],[chg] ,v[d2_int+1:]], axis=0), [0], [v.get_shape().as_list()[0]]) Z = tf.scan(body, elems=(V, dim2, val), initializer=tf.constant(1, shape=V.get_shape().as_list()[1:], dtype=tf.float32), name="Scan_Update") return Z
def value_transition(self, curr_state, next_symbols, batch_size): first_value_token = self.num_functions + self.num_begin_tokens + self.num_control_tokens num_value_tokens = self.output_size - first_value_token with tf.name_scope('grammar_transition'): adjusted_next_symbols = tf.where(next_symbols >= self.num_control_tokens, next_symbols + (first_value_token - self.num_control_tokens), next_symbols) assert1 = tf.Assert(tf.reduce_all(tf.logical_and(next_symbols < num_value_tokens, next_symbols >= 0)), [curr_state, next_symbols]) with tf.control_dependencies([assert1]): transitions = tf.gather(tf.constant(self.transition_matrix), curr_state) assert transitions.get_shape()[1:] == (self.output_size,) indices = tf.stack((tf.range(0, batch_size), adjusted_next_symbols), axis=1) next_state = tf.gather_nd(transitions, indices) assert2 = tf.Assert(tf.reduce_all(next_state >= 0), [curr_state, adjusted_next_symbols, next_state]) with tf.control_dependencies([assert2]): return tf.identity(next_state)
def append_apply_gradients_ops(self, gradient_state, opt, grads, training_ops, loss_scale_params): device_grads = gradient_state # From 2nd result of preprocess_device_grads. def get_apply_gradients_ops_func(): """Returns a list of ops for updating gradients.""" apply_gradients_ops = [] # For each variable, apply the combined gradients for this server on # the parameter server, and then wait for all other servers to do this. for i, (g, v) in enumerate(grads): apply_gradient_op = opt.apply_gradients([(g, v)]) barrier = self.benchmark_cnn.add_sync_queues_and_barrier( 'replicate_variable_%s' % i, [apply_gradient_op]) with tf.control_dependencies([barrier]): with tf.device(self.benchmark_cnn.cpu_device): updated_value = v.read_value() for my_d in range(len(self.benchmark_cnn.devices)): apply_gradients_ops.append( device_grads[my_d][i][1].assign(updated_value)) return apply_gradients_ops variable_mgr_util.append_gradients_with_loss_scale( training_ops, get_apply_gradients_ops_func, loss_scale_params, self.grad_has_inf_nan)
def batch_norm_layer(self, to_be_normalized, is_training): if is_training: train_phase = tf.constant(1) else: train_phase = tf.constant(-1) beta = tf.Variable(tf.constant(0.0, shape=[to_be_normalized.shape[-1]]), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=[to_be_normalized.shape[-1]]), name='gamma', trainable=True) # axises = np.arange(len(to_be_normalized.shape) - 1) # change to apply tensorflow 1.3 axises = [0,1,2] print("start nn.moments") print("axises : " + str(axises)) batch_mean, batch_var = tf.nn.moments(to_be_normalized, axises, name='moments') print("nn.moments successful") ema = tf.train.ExponentialMovingAverage(decay=0.5) def mean_var_with_update(): ema_apply_op = ema.apply([batch_mean, batch_var]) with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = tf.cond(train_phase > 0, mean_var_with_update, lambda: (ema.average(batch_mean), ema.average(batch_var))) # if is training --> update normed = tf.nn.batch_normalization(to_be_normalized, mean, var, beta, gamma, 1e-3) return normed
def input_norm(xs): fc_mean, fc_var = tf.nn.moments( xs, axes=[0], ) scale = tf.Variable(tf.ones([1])) shift = tf.Variable(tf.zeros([1])) epsilon = 0.001 # apply moving average for mean and var when train on batch ema = tf.train.ExponentialMovingAverage(decay=0.5) def mean_var_with_update(): ema_apply_op = ema.apply([fc_mean, fc_var]) with tf.control_dependencies([ema_apply_op]): return tf.identity(fc_mean), tf.identity(fc_var) mean, var = mean_var_with_update() xs = tf.nn.batch_normalization(xs, mean, var, shift, scale, epsilon) return xs
def batch_norm(Wx_plus_b,out_size): fc_mean, fc_var = tf.nn.moments( Wx_plus_b, axes=[0], # the dimension you wanna normalize, here [0] for batch # for image, you wanna do [0, 1, 2] for [batch, height, width] but not channel ) scale = tf.Variable(tf.ones([out_size])) shift = tf.Variable(tf.zeros([out_size])) epsilon = 0.001 # apply moving average for mean and var when train on batch ema = tf.train.ExponentialMovingAverage(decay=0.5) def mean_var_with_update(): ema_apply_op = ema.apply([fc_mean, fc_var]) with tf.control_dependencies([ema_apply_op]): return tf.identity(fc_mean), tf.identity(fc_var) mean, var = mean_var_with_update() Wx_plus_b = tf.nn.batch_normalization(Wx_plus_b, mean, var, shift, scale, epsilon) return Wx_plus_b
def build_model(self): Gen=GeneratorTypes[self.gan_type] config=self.config self.gen=Gen(config.batch_size,config.gen_hidden_size,config.gen_z_dim) with tf.variable_scope('Disc') as scope: self.D1 = Discriminator(self.data.X, config.disc_hidden_size) scope.reuse_variables() self.D2 = Discriminator(self.gen.X, config.disc_hidden_size) d_var = tf.contrib.framework.get_variables(scope) d_loss_real=tf.reduce_mean( sxe(self.D1,1) ) d_loss_fake=tf.reduce_mean( sxe(self.D2,0) ) self.loss_d = d_loss_real + d_loss_fake self.loss_g = tf.reduce_mean( sxe(self.D2,1) ) optimizer=tf.train.AdamOptimizer g_optimizer=optimizer(self.config.lr_gen) d_optimizer=optimizer(self.config.lr_disc) self.opt_d = d_optimizer.minimize(self.loss_d,var_list= d_var) self.opt_g = g_optimizer.minimize(self.loss_g,var_list= self.gen.tr_var, global_step=self.gen.step) with tf.control_dependencies([self.inc_step]): self.train_op=tf.group(self.opt_d,self.opt_g)
def __call__(self, x, train=True): shape = x.get_shape().as_list() if train: with tf.variable_scope(self.name) as scope: self.beta = tf.get_variable("beta", [shape[-1]], initializer=tf.constant_initializer(0.)) self.gamma = tf.get_variable("gamma", [shape[-1]], initializer=tf.random_normal_initializer(1., 0.02)) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema_apply_op = self.ema.apply([batch_mean, batch_var]) self.ema_mean, self.ema_var = self.ema.average(batch_mean), self.ema.average(batch_var) with tf.control_dependencies([ema_apply_op]): mean, var = tf.identity(batch_mean), tf.identity(batch_var) else: mean, var = self.ema_mean, self.ema_var normed = tf.nn.batch_norm_with_global_normalization( x, mean, var, self.beta, self.gamma, self.epsilon, scale_after_normalization=True) return normed # standard convolution layer
def apply_gradients(self, grads_and_vars, global_step=None): """Apply gradients to model variables specified in `grads_and_vars`. `apply_gradients` returns an op that calls `tf.train.Optimizer.apply_gradients` and then zeros the gradient variables stored in `self.grads_and_vars`. Args: grads_and_vars (list): Description. global_step (None, optional): tensorflow global_step variable. Returns: (tf.Operation): Applies gradient update to model followed by an internal gradient zeroing operation to `self.grads_and_vars`. """ self.mini_flag = tf.assign(self.mini_flag, tf.constant([0], dtype = tf.float32)) # grads_and_vars = self.aggregate_gradients(grads_and_vars, method='average') with tf.control_dependencies([self.mini_flag]): optimize = self._optimizer.apply_gradients(grads_and_vars, global_step=global_step) #return [optimize, self.zero_grad()] return optimize
def build_all(self, param_avg=False): """Build all nodes.""" if self._has_built_all: raise Exception('Only call build_all or build_eval once.') self._has_built_all = True with tf.device(self.get_device_fn()): with tf.variable_scope(self.name): inp_var = self.build_input() output_var = self.build(inp_var) loss_var = self.build_loss(inp_var, output_var) train_step = self.build_optim(loss_var) if param_avg: ema_op, avg_var = self.get_average_var() self._avg_var = avg_var with tf.control_dependencies([train_step, ema_op]): train_step = tf.no_op(name='train_step') self.register_var('train_step', train_step) return self
def build_model(self): self.build_memory() self.W = tf.Variable(tf.random_normal([self.edim, self.nwords], stddev=self.init_std)) z = tf.matmul(self.hid[-1], self.W) self.loss = tf.nn.softmax_cross_entropy_with_logits(z, self.target) self.lr = tf.Variable(self.current_lr) self.opt = tf.train.GradientDescentOptimizer(self.lr) params = [self.A, self.B, self.C, self.T_A, self.T_B, self.W] grads_and_vars = self.opt.compute_gradients(self.loss,params) clipped_grads_and_vars = [(tf.clip_by_norm(gv[0], self.max_grad_norm), gv[1]) \ for gv in grads_and_vars] inc = self.global_step.assign_add(1) with tf.control_dependencies([inc]): self.optim = self.opt.apply_gradients(clipped_grads_and_vars) tf.initialize_all_variables().run() self.saver = tf.train.Saver()
def normalize(self, x, train=True): """ Returns a batch-normalized version of x. """ if train is not None: mean, variance = tf.nn.moments(x, [0, 1, 2]) assign_mean = self.mean.assign(mean) assign_variance = self.variance.assign(variance) with tf.control_dependencies([assign_mean, assign_variance]): return tf.nn.batch_norm_with_global_normalization(x, mean, variance, self.beta, self.gamma, self.epsilon, self.scale_after_norm) else: mean = self.ewma_trainer.average(self.mean) variance = self.ewma_trainer.average(self.variance) local_beta = tf.identity(self.beta) local_gamma = tf.identity(self.gamma) return tf.nn.batch_norm_with_global_normalization(x, mean, variance, local_beta, local_gamma, self.epsilon, self.scale_after_norm)
def train(self, loss, global_step): """ Return a training step for the tensorflow graph Args: loss : loss to do sgd on global_step : which step are we at """ opt = tf.train.AdamOptimizer(self.learning_rate) grads = opt.compute_gradients(loss) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage(self.moving_avg_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') return train_op
def training_step(loss, optimizer_handle, learning_rate, **kwargs): ''' Creates the optimisation operation which is executed in each training iteration of the network :param loss: The loss to be minimised :param optimizer_handle: A handle to one of the tf optimisers :param learning_rate: Learning rate :param momentum: Optionally, you can also pass a momentum term to the optimiser. :return: The training operation ''' if 'momentum' in kwargs: momentum = kwargs.get('momentum') optimizer = optimizer_handle(learning_rate=learning_rate, momentum=momentum) else: optimizer = optimizer_handle(learning_rate=learning_rate) # The with statement is needed to make sure the tf contrib version of batch norm properly performs its updates update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss) return train_op
def image_reading(path: str, resized_size: Tuple[int, int]=None, data_augmentation: bool=False, padding: bool=False) -> Tuple[tf.Tensor, tf.Tensor]: # Read image image_content = tf.read_file(path, name='image_reader') image = tf.cond(tf.equal(tf.string_split([path], '.').values[1], tf.constant('jpg', dtype=tf.string)), true_fn=lambda: tf.image.decode_jpeg(image_content, channels=1, try_recover_truncated=True), # TODO channels = 3 ? false_fn=lambda: tf.image.decode_png(image_content, channels=1), name='image_decoding') # Data augmentation if data_augmentation: image = augment_data(image) # Padding if padding: with tf.name_scope('padding'): image, img_width = padding_inputs_width(image, resized_size, increment=CONST.DIMENSION_REDUCTION_W_POOLING) # Resize else: image = tf.image.resize_images(image, size=resized_size) img_width = tf.shape(image)[1] with tf.control_dependencies([tf.assert_equal(image.shape[:2], resized_size)]): return image, img_width
def batch_norm(self, X): train_phase = self.train_phase with tf.name_scope('bn'): n_out = X.get_shape()[-1:] beta = tf.Variable(tf.constant(0.0, shape=n_out), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=n_out), name='gamma', trainable=True) # batch_mean, batch_var = tf.nn.moments(X, [0, 1, 2], name='moments') batch_mean, batch_var = tf.nn.moments(X, [0, 1, 2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.5) def mean_var_with_update(): ema_apply_op = ema.apply([batch_mean, batch_var]) with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = tf.cond(train_phase, mean_var_with_update, lambda: (ema.average(batch_mean), ema.average(batch_var))) normed = tf.nn.batch_normalization(X, mean, var, beta, gamma, 1e-3) return normed
def batch_norm(x, n_out, phase_train, scope='bn', decay=0.9, eps=1e-5, stddev=0.02): """ Code taken from http://stackoverflow.com/a/34634291/2267819 """ with tf.variable_scope(scope): beta = tf.get_variable(name='beta', shape=[n_out], initializer=tf.constant_initializer(0.0) , trainable=True) gamma = tf.get_variable(name='gamma', shape=[n_out], initializer=tf.random_normal_initializer(1.0, stddev), trainable=True) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=decay) def mean_var_with_update(): ema_apply_op = ema.apply([batch_mean, batch_var]) with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = tf.cond(phase_train, mean_var_with_update, lambda: (ema.average(batch_mean), ema.average(batch_var))) normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, eps) return normed
def train_simple(total_loss, global_step): with tf.variable_scope('train_op'): # Variables that affect learning rate. num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) tf.summary.scalar('learning_rate', lr) # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # with tf.control_dependencies(update_ops): # opt = tf.train.MomentumOptimizer(lr, 0.9).minimize(total_loss, global_step=global_step) opt = tf.train.AdamOptimizer(lr).minimize(total_loss, global_step=global_step) tf.summary.scalar(total_loss.op.name + ' (raw)', total_loss) return opt, lr
def __init__(self, inputs, outputs, updates=[]): assert type(inputs) in {list, tuple}, 'Input to a TensorFlow backend function should be a list or tuple.' assert type(outputs) in {list, tuple}, 'Output to a TensorFlow backend function should be a list or tuple.' assert type(updates) in {list, tuple}, 'Updates in a TensorFlow backend function should be a list or tuple.' self.inputs = list(inputs) self.outputs = list(outputs) with tf.control_dependencies(self.outputs): updates_ops = [] for update in updates: if type(update) is tuple: p, new_p = update updates_ops.append(tf.assign(p, new_p)) else: # assumed already an op updates_ops.append(update) self.updates_op = tf.group(*updates_ops)
def get_output_for(self, input, phase='train', **kwargs): if phase == 'train': # Calculate the moments based on the individual batch. mean, variance = tf.nn.moments(input, self.axis, shift=self.moving_mean) # Update the moving_mean and moving_variance moments. update_moving_mean = moving_averages.assign_moving_average( self.moving_mean, mean, self.decay) update_moving_variance = moving_averages.assign_moving_average( self.moving_variance, variance, self.decay) # Make sure the updates are computed here. with tf.control_dependencies([update_moving_mean, update_moving_variance]): output = tf.nn.batch_normalization( input, mean, variance, self.beta, self.gamma, self.epsilon) else: output = tf.nn.batch_normalization( input, self.moving_mean, self.moving_variance, self.beta, self.gamma, self.epsilon) output.set_shape(self.input_shape) return output
def batch_norm_wrapper(inputs, is_training, decay = 0.999): scale = tf.Variable(tf.ones([inputs.get_shape()[-1]])) beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]])) pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False) pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False) if is_training is not None: batch_mean, batch_var = tf.nn.moments(inputs,[0]) train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay)) train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay)) with tf.control_dependencies([train_mean, train_var]): return tf.nn.batch_normalization(inputs, train_mean, train_var, beta, scale, epsilon) else: return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, scale, epsilon) ## regularization parameter
def apply_gradients(self, grads_and_vars, global_step=None, name=None): with tf.name_scope(name, self._name) as name: update_op = self._opt.apply_gradients( grads_and_vars, global_step=global_step) add_noise_ops = [] with tf.control_dependencies([update_op]): for grad, var in grads_and_vars: if grad is None: continue with tf.name_scope("sgld_noise_" + var.op.name): if isinstance(grad, tf.Tensor): add_noise_ops.append(self._noise_dense(var)) else: add_noise_ops.append(self._noise_sparse(grad, var)) ## running combined op return tf.group(*([update_op] + add_noise_ops), name=name)
def apply_gradients(self, grads_and_vars, global_step=None, name=None): with tf.name_scope(name, self._name) as name: update_op = self._opt.apply_gradients( grads_and_vars, global_step=global_step) add_noise_ops = [] with tf.control_dependencies([update_op]): for grad, var in grads_and_vars: if grad is None: continue with tf.name_scope("psgld_noise_" + var.op.name): if isinstance(grad, tf.Tensor): add_noise_ops.append(self._noise_dense(var)) else: add_noise_ops.append(self._noise_sparse(grad, var)) ## running combined op return tf.group(*([update_op] + add_noise_ops), name=name)
def __call__(self, x, train=True): shape = x.get_shape().as_list() if train: with tf.variable_scope(self.name) as scope: self.beta = tf.get_variable("beta", [shape[-1]], initializer=tf.constant_initializer(0.)) self.gamma = tf.get_variable("gamma", [shape[-1]], initializer=tf.random_normal_initializer(1., 0.02)) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema_apply_op = self.ema.apply([batch_mean, batch_var]) self.ema_mean, self.ema_var = self.ema.average(batch_mean), self.ema.average(batch_var) with tf.control_dependencies([ema_apply_op]): mean, var = tf.identity(batch_mean), tf.identity(batch_var) else: mean, var = self.ema_mean, self.ema_var normed = tf.nn.batch_norm_with_global_normalization( x, mean, var, self.beta, self.gamma, self.epsilon, scale_after_normalization=True) return normed
def gauss_KL(mu1, logstd1, mu2, logstd2): """ Returns KL divergence among two multivariate Gaussians, component-wise. It assumes the covariance matrix is diagonal. All inputs have shape (n,a). It is not necessary to know the number of actions because reduce_sum will sum over this to get the `d` constant offset. The part consisting of the trace in the formula is blended with the mean difference squared due to the common "denominator" of var2_na. This forumula generalizes for an arbitrary number of actions. I think mu2 and logstd2 should represent the policy before the update. Returns the KL divergence for each of the n components in the minibatch, then we do a reduce_mean outside this. """ var1_na = tf.exp(2.*logstd1) var2_na = tf.exp(2.*logstd2) tmp_matrix = 2.*(logstd2 - logstd1) + (var1_na + tf.square(mu1-mu2))/var2_na - 1 kl_n = tf.reduce_sum(0.5 * tmp_matrix, axis=[1]) # Don't forget the 1/2 !! assert_op = tf.Assert(tf.reduce_all(kl_n >= -0.0000001), [kl_n]) with tf.control_dependencies([assert_op]): kl_n = tf.identity(kl_n) return kl_n
def tune(self, acceptance_rate, fresh_start): def adapt_stepsize(): new_step = tf.assign(self.step, (1 - fresh_start) * self.step + 1) rate1 = tf.div(1.0, new_step + self.t0) new_h_bar = tf.assign( self.h_bar, (1 - fresh_start) * (1 - rate1) * self.h_bar + rate1 * (self.delta - acceptance_rate)) log_epsilon = self.mu - tf.sqrt(new_step) / self.gamma * new_h_bar rate = tf.pow(new_step, -self.kappa) new_log_epsilon_bar = tf.assign( self.log_epsilon_bar, rate * log_epsilon + (1 - fresh_start) * (1 - rate) * self.log_epsilon_bar) with tf.control_dependencies([new_log_epsilon_bar]): new_log_epsilon = tf.identity(log_epsilon) return tf.exp(new_log_epsilon) c = tf.cond(self.adapt_step_size, adapt_stepsize, lambda: tf.exp(self.log_epsilon_bar)) return c
def _adapt_mass(self, t, num_chain_dims): ewmv = ExponentialWeightedMovingVariance( self.mass_decay, self.data_shapes, num_chain_dims) new_mass = tf.cond(self.adapt_mass, lambda: ewmv.get_updated_precision(self.q), lambda: ewmv.precision()) if not isinstance(new_mass, list): new_mass = [new_mass] # print('New mass is = {}'.format(new_mass)) # TODO incorrect shape? # print('New mass={}'.format(new_mass)) # print('q={}, NMS={}'.format(self.q[0].get_shape(), # new_mass[0].get_shape())) with tf.control_dependencies(new_mass): current_mass = tf.cond( tf.less(tf.to_int32(t), self.mass_collect_iters), lambda: [tf.ones(shape) for shape in self.data_shapes], lambda: new_mass) if not isinstance(current_mass, list): current_mass = [current_mass] return current_mass
def assert_rank_at_least(tensor, k, name): """ Whether the rank of `tensor` is at least k. :param tensor: A tensor to be checked. :param k: The least rank allowed. :param name: The name of `tensor` for error message. :return: The checked tensor. """ static_shape = tensor.get_shape() shape_err_msg = '{} should have rank >= {}.'.format(name, k) if static_shape and (static_shape.ndims < k): raise ValueError(shape_err_msg) if not static_shape: _assert_shape_op = tf.assert_rank_at_least( tensor, k, message=shape_err_msg) with tf.control_dependencies([_assert_shape_op]): tensor = tf.identity(tensor) return tensor
def assert_scalar(tensor, name): """ Whether the `tensor` is a scalar (0-D tensor). :param tensor: A tensor to be checked. :param name: The name of `tensor` for error message. :return: The checked tensor. """ static_shape = tensor.get_shape() shape_err_msg = name + " should be a scalar (0-D tensor)." if static_shape and (static_shape.ndims >= 1): raise ValueError(shape_err_msg) else: _assert_shape_op = tf.assert_rank(tensor, 0, message=shape_err_msg) with tf.control_dependencies([_assert_shape_op]): tensor = tf.identity(tensor) return tensor
def batch_norm(x, n_out, phase_train, scope='bn', decay=0.9, eps=1e-5): """ Code taken from http://stackoverflow.com/a/34634291/2267819 """ with tf.variable_scope(scope): beta = tf.get_variable(name='beta', shape=[n_out], initializer=tf.constant_initializer(0.0) , trainable=True) gamma = tf.get_variable(name='gamma', shape=[n_out], initializer=tf.random_normal_initializer(1.0, 0.02), trainable=True) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=decay) def mean_var_with_update(): ema_apply_op = ema.apply([batch_mean, batch_var]) with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = tf.cond(phase_train, mean_var_with_update, lambda: (ema.average(batch_mean), ema.average(batch_var))) normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, eps) return normed
def batch_normalization(x, scope, decay=0.999, eps=1e-6, training=True): ndim = len(x.get_shape().as_list()) fdim = x.get_shape().as_list()[-1] with tf.variable_scope(scope): gamma = tf.get_variable("scale", [fdim], tf.float32, tf.constant_initializer(1.0)) beta = tf.get_variable("offset", [fdim], tf.float32, tf.constant_initializer(0.0)) mean = tf.get_variable("mean", [fdim], tf.float32, tf.constant_initializer(0.0), trainable=False) var = tf.get_variable("variance", [fdim], tf.float32, tf.constant_initializer(1.0), trainable=False) if training: x_mean, x_var = tf.nn.moments(x, range(ndim - 1)) avg_mean = tf.assign(mean, mean * decay + x_mean * (1.0 - decay)) avg_var = tf.assign(var, var * decay + x_var * (1.0 - decay)) with tf.control_dependencies([avg_mean, avg_var]): return tf.nn.batch_normalization(x, x_mean, x_var, beta, gamma, eps) else: return tf.nn.batch_normalization(x, mean, var, beta, gamma, eps)
def batch_normalization_with_mask(x, mask, scope, decay=0.999, eps=1e-6, training=True): ndim = len(x.get_shape().as_list()) fdim = x.get_shape().as_list()[-1] with tf.variable_scope(scope): gamma = tf.get_variable("scale", [fdim], tf.float32, tf.constant_initializer(1.0)) beta = tf.get_variable("offset", [fdim], tf.float32, tf.constant_initializer(0.0)) mean = tf.get_variable("mean", [fdim], tf.float32, tf.constant_initializer(0.0), trainable=False) var = tf.get_variable("variance", [fdim], tf.float32, tf.constant_initializer(1.0), trainable=False) if training: x_mean, x_var = tf.nn.weighted_moments(x, range(ndim - 1), mask) avg_mean = tf.assign(mean, mean * decay + x_mean * (1.0 - decay)) avg_var = tf.assign(var, var * decay + x_var * (1.0 - decay)) with tf.control_dependencies([avg_mean, avg_var]): return tf.nn.batch_normalization(x, x_mean, x_var, beta, gamma, eps) else: return tf.nn.batch_normalization(x, mean, var, beta, gamma, eps)
def model(self, input_text_begin, input_text_end, gene, variation, batch_size, vocabulary_size=VOCABULARY_SIZE, embeddings_size=EMBEDDINGS_SIZE, output_classes=9): # embeddings embeddings = _load_embeddings(vocabulary_size, embeddings_size) # global step self.global_step = training_util.get_or_create_global_step() self.global_step = tf.assign_add(self.global_step, 1) # model with tf.control_dependencies([self.global_step]): with slim.arg_scope(self.text_classification_model.model_arg_scope()): self.outputs = self.text_classification_model.model(input_text_begin, input_text_end, gene, variation, output_classes, embeddings=embeddings, batch_size=batch_size, training=False) # restore only the trainable variables self.saver = tf.train.Saver(var_list=tf_variables.trainable_variables()) return self.outputs
def __call__(self, x): shape = x.get_shape() shp = self.in_dim or shape[-1] with tf.variable_scope(self.name) as scope: self.gamma = tf.get_variable("gamma", [shp], initializer=tf.random_normal_initializer(1., 0.02)) self.beta = tf.get_variable("beta", [shp], initializer=tf.constant_initializer(0.)) self.mean, self.variance = tf.nn.moments(x, [0, 1, 2]) self.mean.set_shape((shp,)) self.variance.set_shape((shp,)) self.ema_apply_op = self.ema.apply([self.mean, self.variance]) if self.train: # with tf.control_dependencies([self.ema_apply_op]): normalized_x = tf.nn.batch_norm_with_global_normalization( x, self.mean, self.variance, self.beta, self.gamma, self.epsilon, scale_after_normalization=True) else: normalized_x = tf.nn.batch_norm_with_global_normalization( x, self.ema.average(self.mean), self.ema.average(self.variance), self.beta, self.gamma, self.epsilon, scale_after_normalization=True) return normalized_x
def __init__(self, config, model): assert isinstance(model, Model) self.config = config self.model = model self.opt = tf.train.AdagradOptimizer(config.init_lr) self.loss = model.get_loss() self.var_list = model.get_var_list() self.global_step = model.get_global_step() self.ema_op = model.ema_op self.summary = model.summary self.grads = self.opt.compute_gradients(self.loss, var_list=self.var_list) opt_op = self.opt.apply_gradients(self.grads, global_step=self.global_step) # Define train op with tf.control_dependencies([opt_op]): self.train_op = tf.group(self.ema_op)
def grad_variance(self): grad_var_ops = [] tensor_to_avg = [] for t, g in zip(self._tvars, self._grads): if isinstance(g, ops.IndexedSlices): tensor_to_avg.append( tf.reshape(tf.unsorted_segment_sum( g.values, g.indices, g.dense_shape[0]), shape=t.get_shape())) else: tensor_to_avg.append(g) avg_op = self._moving_averager.apply(tensor_to_avg) grad_var_ops.append(avg_op) with tf.control_dependencies([avg_op]): self._grad_avg = [ self._moving_averager.average(val) for val in tensor_to_avg] self._grad_avg_squared = [tf.square(val) for val in self._grad_avg] self._grad_var = tf.maximum( tf.constant(EPS, dtype=self._grad_norm_squared_avg.dtype), self._grad_norm_squared_avg - tf.add_n([tf.reduce_sum(val) for val in self._grad_avg_squared] ) ) if self._sparsity_debias: self._grad_var *= self._sparsity_avg return grad_var_ops
def dist_to_opt(self): dist_to_opt_ops = [] # running average of the norm of gradeint self._grad_norm = tf.sqrt(self._grad_norm_squared) avg_op = self._moving_averager.apply([self._grad_norm, ]) dist_to_opt_ops.append(avg_op) with tf.control_dependencies([avg_op]): self._grad_norm_avg = self._moving_averager.average( self._grad_norm) # single iteration distance estimation # note that self._grad_norm_avg is per variable self._dist_to_opt = (self._grad_norm_avg / (self._grad_norm_squared_avg + EPS) ) # running average of distance avg_op = self._moving_averager.apply([self._dist_to_opt]) dist_to_opt_ops.append(avg_op) with tf.control_dependencies([avg_op]): self._dist_to_opt_avg = tf.identity( self._moving_averager.average(self._dist_to_opt)) if self._sparsity_debias: self._dist_to_opt_avg /= (tf.sqrt(self._sparsity_avg) + EPS) return dist_to_opt_ops
def get_cubic_root(self): # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2 # where x = sqrt(mu). # We substitute x, which is sqrt(mu), with x = y + 1. # It gives y^3 + py = q # where p = (D^2 h_min^2)/(2*C) and q = -p. # We use the Vieta's substution to compute the root. # There is only one real solution y (which is in [0, 1] ). # http://mathworld.wolfram.com/VietasSubstitution.html # assert_array = \ # [tf.Assert(tf.logical_not(tf.is_nan(self._dist_to_opt_avg) ), [self._dist_to_opt_avg,]), # tf.Assert(tf.logical_not(tf.is_nan(self._h_min) ), [self._h_min,]), # tf.Assert(tf.logical_not(tf.is_nan(self._grad_var) ), [self._grad_var,]), # tf.Assert(tf.logical_not(tf.is_inf(self._dist_to_opt_avg) ), [self._dist_to_opt_avg,]), # tf.Assert(tf.logical_not(tf.is_inf(self._h_min) ), [self._h_min,]), # tf.Assert(tf.logical_not(tf.is_inf(self._grad_var) ), [self._grad_var,])] # with tf.control_dependencies(assert_array): # EPS in the numerator to prevent momentum being exactly one in case of 0 gradient p = (self._dist_to_opt_avg + EPS)**2 * (self._h_min + EPS)**2 / 2 / (self._grad_var + EPS) w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0 w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0/3.0) y = w - p / 3.0 / (w + EPS) x = y + 1 return x
def update_hyper_param(self): assign_hyper_ops = [] self._mu = tf.identity(tf.cond( self._do_tune, lambda: self.get_mu_tensor(), lambda: self._mu_var)) with tf.control_dependencies([self._mu]): self._lr = tf.identity(tf.cond( self._do_tune, lambda: self.get_lr_tensor(), lambda: self._lr_var)) with tf.control_dependencies([self._mu, self._lr]): if self._use_unsmoothed_lr_mu: assign_hyper_ops.append(tf.assign(self._mu_var, self._mu) ) assign_hyper_ops.append(tf.assign(self._lr_var, self._lr) ) else: self._mu = self._beta * self._mu_var + (1 - self._beta) * self._mu self._lr = self._beta * self._lr_var + (1 - self._beta) * self._lr with tf.control_dependencies([self._mu, self._lr] ): assign_hyper_ops.append(tf.assign(self._mu_var, self._mu) ) assign_hyper_ops.append(tf.assign(self._lr_var, self._lr) ) assign_hyper_op = tf.group(*assign_hyper_ops) return assign_hyper_op
def get_function_init_state(self, function_tokens): next_state = tf.gather(self.function_states, function_tokens - (self.num_begin_tokens + self.num_control_tokens)) assert2 = tf.Assert(tf.reduce_all(next_state >= 0), [function_tokens]) with tf.control_dependencies([assert2]): return tf.identity(next_state)