我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.training.moving_averages.assign_moving_average()。
def batch_norm(data, name): shape_param = data.get_shape()[-1] beta = tf.get_variable(name=name+'_beta', shape=shape_param, dtype=tf.float32, initializer=tf.constant_initializer(0.0, tf.float32)) gamma = tf.get_variable(name=name+'_gamma', shape=shape_param, dtype=tf.float32, initializer=tf.constant_initializer(1.0, tf.float32)) if FLAGS.train_mode: mean_param, variance_param = tf.nn.moments(x=data, axes=[0, 1, 2], name=name+'_moments') moving_mean = tf.get_variable(name=name+'_moving_mean', shape=shape_param, dtype=tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) moving_variance = tf.get_variable(name=name+'_moving_variance', shape=shape_param, dtype=tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) mean = moving_averages.assign_moving_average(variable=moving_mean, value=mean_param, decay=0.9) variance = moving_averages.assign_moving_average(variable=moving_variance, value=variance_param, decay=0.9) else: mean = tf.get_variable(name=name+'_moving_mean', shape=shape_param, dtype=tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) variance = tf.get_variable(name=name+'_moving_variance', shape=shape_param, dtype=tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) tf.summary.scalar(mean.op.name, mean) tf.summary.scalar(variance.op.name, variance) b_norm = tf.nn.batch_normalization(x=data, mean=mean, variance=variance, offset=beta, scale=gamma, variance_epsilon=0.001, name=name) return b_norm
def make_moving_average(name, value, init, decay, log=True): """Creates an exp-moving average of `value` and an update op, which is added to UPDATE_OPS collection. :param name: string, name of the created moving average tf.Variable :param value: tf.Tensor, the value to be averaged :param init: float, an initial value for the moving average :param decay: float between 0 and 1, exponential decay of the moving average :param log: bool, add a summary op if True :return: tf.Tensor, the moving average """ var = tf.get_variable(name, shape=value.get_shape(), initializer=tf.constant_initializer(init), trainable=False) update = moving_averages.assign_moving_average(var, value, decay, zero_debias=False) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update) if log: tf.summary.scalar(name, var) return var
def get_output_for(self, input, phase='train', **kwargs): if phase == 'train': # Calculate the moments based on the individual batch. mean, variance = tf.nn.moments(input, self.axis, shift=self.moving_mean) # Update the moving_mean and moving_variance moments. update_moving_mean = moving_averages.assign_moving_average( self.moving_mean, mean, self.decay) update_moving_variance = moving_averages.assign_moving_average( self.moving_variance, variance, self.decay) # Make sure the updates are computed here. with tf.control_dependencies([update_moving_mean, update_moving_variance]): output = tf.nn.batch_normalization( input, mean, variance, self.beta, self.gamma, self.epsilon) else: output = tf.nn.batch_normalization( input, self.moving_mean, self.moving_variance, self.beta, self.gamma, self.epsilon) output.set_shape(self.input_shape) return output
def moving_average_update(x, value, momentum): """Compute the moving average of a variable. Arguments: x: A Variable. value: A tensor with the same shape as `variable`. momentum: The moving average momentum. Returns: An Operation to update the variable. """ return moving_averages.assign_moving_average( x, value, momentum, zero_debias=False) # LINEAR ALGEBRA
def _adaptive_max_norm(norm, std_factor, decay, global_step, epsilon, name): """Find max_norm given norm and previous average.""" with vs.variable_scope(name, "AdaptiveMaxNorm", [norm]): log_norm = math_ops.log(norm + epsilon) def moving_average(name, value, decay): moving_average_variable = vs.get_variable( name, shape=value.get_shape(), dtype=value.dtype, initializer=init_ops.zeros_initializer, trainable=False) return moving_averages.assign_moving_average( moving_average_variable, value, decay, zero_debias=False) # quicker adaptation at the beginning if global_step is not None: n = math_ops.to_float(global_step) decay = math_ops.minimum(decay, n / (n + 1.)) # update averages mean = moving_average("mean", log_norm, decay) sq_mean = moving_average("sq_mean", math_ops.square(log_norm), decay) variance = sq_mean - math_ops.square(mean) std = math_ops.sqrt(math_ops.maximum(epsilon, variance)) max_norms = math_ops.exp(mean + std_factor*std) return max_norms, mean
def get_output_for(self, input, phase='train', **kwargs): if phase == 'train': # Calculate the moments based on the individual batch. mean, variance = tf.nn.moments( input, self.axis, shift=self.moving_mean) # Update the moving_mean and moving_variance moments. update_moving_mean = moving_averages.assign_moving_average( self.moving_mean, mean, self.decay) update_moving_variance = moving_averages.assign_moving_average( self.moving_variance, variance, self.decay) # Make sure the updates are computed here. with tf.control_dependencies([update_moving_mean, update_moving_variance]): output = tf.nn.batch_normalization( input, mean, variance, self.beta, self.gamma, self.epsilon) else: output = tf.nn.batch_normalization( input, self.moving_mean, self.moving_variance, self.beta, self.gamma, self.epsilon) output.set_shape(self.input_shape) return output
def bn(x, is_training): x_shape = x.get_shape() params_shape = x_shape[-1:] axis = list(range(len(x_shape) - 1)) beta = _get_variable('beta', params_shape, initializer=tf.zeros_initializer()) gamma = _get_variable('gamma', params_shape, initializer=tf.ones_initializer()) moving_mean = _get_variable('moving_mean', params_shape, initializer=tf.zeros_initializer(), trainable=False) moving_variance = _get_variable('moving_variance', params_shape, initializer=tf.ones_initializer(), trainable=False) # These ops will only be preformed when training. mean, variance = tf.nn.moments(x, axis) update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, BN_DECAY) update_moving_variance = moving_averages.assign_moving_average(moving_variance, variance, BN_DECAY) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) mean, variance = control_flow_ops.cond( is_training, lambda: (mean, variance), lambda: (moving_mean, moving_variance)) return tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON)
def _batch_norm_without_layers(self, input_layer, decay, use_scale, epsilon): """Batch normalization on `input_layer` without tf.layers.""" # We make this function as similar as possible to the # tf.contrib.layers.batch_norm, to minimize the differences between using # layers and not using layers. shape = input_layer.shape num_channels = shape[3] if self.data_format == 'NHWC' else shape[1] beta = self.get_variable('beta', [num_channels], tf.float32, tf.float32, initializer=tf.zeros_initializer()) if use_scale: gamma = self.get_variable('gamma', [num_channels], tf.float32, tf.float32, initializer=tf.ones_initializer()) else: gamma = tf.constant(1.0, tf.float32, [num_channels]) # For moving variables, we use tf.get_variable instead of self.get_variable, # since self.get_variable returns the result of tf.cast which we cannot # assign to. moving_mean = tf.get_variable('moving_mean', [num_channels], tf.float32, initializer=tf.zeros_initializer(), trainable=False) moving_variance = tf.get_variable('moving_variance', [num_channels], tf.float32, initializer=tf.ones_initializer(), trainable=False) if self.phase_train: bn, batch_mean, batch_variance = tf.nn.fused_batch_norm( input_layer, gamma, beta, epsilon=epsilon, data_format=self.data_format, is_training=True) mean_update = moving_averages.assign_moving_average( moving_mean, batch_mean, decay=decay, zero_debias=False) variance_update = moving_averages.assign_moving_average( moving_variance, batch_variance, decay=decay, zero_debias=False) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, mean_update) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, variance_update) else: bn, _, _ = tf.nn.fused_batch_norm( input_layer, gamma, beta, mean=moving_mean, variance=moving_variance, epsilon=epsilon, data_format=self.data_format, is_training=False) return bn
def __call__(self, input_layer, epsilon=1e-5, decay=0.9, name="batch_norm", in_dim=None, phase=Phase.train): shape = input_layer.shape shp = in_dim or shape[-1] with tf.variable_scope(name) as scope: self.mean = self.variable('mean', [shp], init=tf.constant_initializer(0.), train=False) self.variance = self.variable('variance', [shp], init=tf.constant_initializer(1.0), train=False) self.gamma = self.variable("gamma", [shp], init=tf.random_normal_initializer(1., 0.02)) self.beta = self.variable("beta", [shp], init=tf.constant_initializer(0.)) if phase == Phase.train: mean, variance = tf.nn.moments(input_layer.tensor, [0, 1, 2]) mean.set_shape((shp,)) variance.set_shape((shp,)) update_moving_mean = moving_averages.assign_moving_average(self.mean, mean, decay) update_moving_variance = moving_averages.assign_moving_average(self.variance, variance, decay) with tf.control_dependencies([update_moving_mean, update_moving_variance]): normalized_x = tf.nn.batch_norm_with_global_normalization( input_layer.tensor, mean, variance, self.beta, self.gamma, epsilon, scale_after_normalization=True) else: normalized_x = tf.nn.batch_norm_with_global_normalization( input_layer.tensor, self.mean, self.variance, self.beta, self.gamma, epsilon, scale_after_normalization=True) return input_layer.with_tensor(normalized_x, parameters=self.vars)
def moving_average_update(variable, value, momentum): return moving_averages.assign_moving_average( variable, value, momentum) # LINEAR ALGEBRA
def bn(x, c): x_shape = x.get_shape() params_shape = x_shape[-1:] axis = list(range(len(x_shape) - 1)) beta = _get_variable('beta', params_shape, initializer=tf.zeros_initializer()) #tf.constant_initializer(0.00, dtype='float') gamma = _get_variable('gamma', params_shape, initializer=tf.ones_initializer()) moving_mean = _get_variable('moving_mean', params_shape, initializer=tf.zeros_initializer(), trainable=False) moving_variance = _get_variable('moving_variance', params_shape, initializer=tf.ones_initializer(), trainable=False) # These ops will only be performed when training. mean, variance = tf.nn.moments(x, axis) update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, BN_DECAY) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, BN_DECAY) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) mean, variance = control_flow_ops.cond( c['is_training'], lambda: (mean, variance), lambda: (moving_mean, moving_variance)) x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON) return x # wrapper for get_variable op
def _bn(self, x, params_init, is_training): x_shape = x.get_shape() axis = list(range(len(x_shape) - 1)) beta = self._get_variable_const('beta', initializer=tf.constant(params_init['bias'])) gamma = self._get_variable_const('gamma', initializer=tf.constant(params_init['weight'])) moving_mean = self._get_variable_const('moving_mean', initializer=tf.constant(params_init['running_mean']), trainable=False) moving_variance = self._get_variable_const('moving_variance', initializer=tf.constant(params_init['running_var']), trainable=False) # mean, variance = tf.nn.moments(x, axis) # update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, BN_DECAY) # update_moving_variance = moving_averages.assign_moving_average(moving_variance, variance, BN_DECAY) # tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) # tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) # # if ~is_training: # mean = moving_mean # variance = moving_variance # else: # ema = tf.train.ExponentialMovingAverage(decay=BN_DECAY) # # def mean_var_with_update(): # ema_apply_op = ema.apply([mean, variance]) # with tf.control_dependencies([ema_apply_op]): # return tf.identity(mean), tf.identity(variance) # mean, variance = mean_var_with_update() # mean, variance = control_flow_ops.cond(is_training, lambda: (mean, variance), # lambda: (moving_mean, moving_variance)) # x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON) x = tf.layers.batch_normalization(x, momentum=BN_DECAY, epsilon=BN_EPSILON, beta_initializer=tf.constant_initializer(params_init['bias']), gamma_initializer=tf.constant_initializer(params_init['weight']), moving_mean_initializer=tf.constant_initializer(params_init['running_mean']), moving_variance_initializer=tf.constant_initializer(params_init['running_var']), training=is_training) return x
def moving_average_update(variable, value, momentum): try: return moving_averages.assign_moving_average( variable, value, momentum, zero_debias=False) except TypeError: return moving_averages.assign_moving_average( variable, value, momentum) # LINEAR ALGEBRA
def _adaptive_max_norm(self, norm, std_factor, decay, global_step, epsilon, name): """Find max_norm given norm and previous average.""" with tf.variable_scope(name, "AdaptiveMaxNorm", [norm]): log_norm = tf.log(norm + epsilon) def moving_average(name, value, decay): moving_average_variable = tf.get_variable(name, shape=value.get_shape(), dtype=value.dtype, initializer=tf.zeros_initializer(), trainable=False) return moving_averages.assign_moving_average(moving_average_variable, value, decay, zero_debias=False) # quicker adaptation at the beginning if global_step is not None: n = tf.to_float(global_step) decay = tf.minimum(decay, n / (n + 1.)) # update averages mean = moving_average("mean", log_norm, decay) sq_mean = moving_average( "sq_mean", tf.square(log_norm), decay) variance = sq_mean - tf.square(mean) std = tf.sqrt(tf.maximum(epsilon, variance)) max_norms = tf.exp(mean + std_factor * std) return max_norms, mean
def batchNorm(self, x, isTraining): shape = x.get_shape() paramsShape = shape[-1:] axis = list(range(len(shape)-1)) with tf.variable_scope('bn'): beta = self.getVariable('beta', paramsShape, initializer=tf.constant_initializer(value=0, dtype=tf.float32)) self.learningRates[beta.name] = 1.0 gamma = self.getVariable('gamma', paramsShape, initializer=tf.constant_initializer(value=1, dtype=tf.float32)) self.learningRates[gamma.name] = 2.0 movingMean = self.getVariable('moving_mean', paramsShape, initializer=tf.constant_initializer(value=0, dtype=tf.float32), trainable=False) movingVariance = self.getVariable('moving_variance', paramsShape, initializer=tf.constant_initializer(value=1, dtype=tf.float32), trainable=False) mean, variance = tf.nn.moments(x, axis) updateMovingMean = moving_averages.assign_moving_average(movingMean, mean, MOVING_AVERAGE_DECAY) updateMovingVariance = moving_averages.assign_moving_average(movingVariance, variance, MOVING_AVERAGE_DECAY) tf.add_to_collection(UPDATE_OPS_COLLECTION, updateMovingMean) tf.add_to_collection(UPDATE_OPS_COLLECTION, updateMovingVariance) mean, variance = control_flow_ops.cond(isTraining, lambda : (mean, variance), lambda : (movingMean, movingVariance)) x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, variance_epsilon=0.001) return x # def batchNormalization(self, inputs, isTraining, name): # with tf.variable_scope('bn'): # output = tf.contrib.layers.batch_norm(inputs, center=True, scale=True, is_training=isTraining, decay=0.997, epsilon=0.0001) # self.learningRates[name+'/bn/BatchNorm/gamma:0'] = 2.0 # self.learningRates[name+'/bn/BatchNorm/beta:0'] = 1.0 # # return output
def _batch_norm(x, name, is_train): """ Apply a batch normalization layer. """ with tf.variable_scope(name): inputs_shape = x.get_shape() axis = list(range(len(inputs_shape) - 1)) param_shape = int(inputs_shape[-1]) moving_mean = tf.get_variable('mean', [param_shape], initializer=tf.constant_initializer(0.0), trainable=False) moving_var = tf.get_variable('variance', [param_shape], initializer=tf.constant_initializer(1.0), trainable=False) beta = tf.get_variable('offset', [param_shape], initializer=tf.constant_initializer(0.0)) gamma = tf.get_variable('scale', [param_shape], initializer=tf.constant_initializer(1.0)) control_inputs = [] def mean_var_with_update(): mean, var = tf.nn.moments(x, axis) update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, 0.99) update_moving_var = moving_averages.assign_moving_average(moving_var, var, 0.99) control_inputs = [update_moving_mean, update_moving_var] return tf.identity(mean), tf.identity(var) def mean_var(): mean = moving_mean var = moving_var return tf.identity(mean), tf.identity(var) mean, var = tf.cond(is_train, mean_var_with_update, mean_var) with tf.control_dependencies(control_inputs): normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3) return normed
def moving_average_update(x, value, momentum): return moving_averages.assign_moving_average( x, value, momentum, zero_debias=False) # LINEAR ALGEBRA
def batchNormalization(x, is_training= True, decay= 0.9, epsilon= 0.001): x_shape = x.get_shape() params_shape = x_shape[-1:] axis = list(range(len(x_shape) - 1)) beta = _get_variable('beta', params_shape, initializer= tf.zeros_initializer) gamma = _get_variable('gamma', params_shape, initializer= tf.ones_initializer) moving_mean = _get_variable('moving_mean', params_shape, initializer= tf.zeros_initializer, trainable= False) moving_variance = _get_variable('moving_variance', params_shape, initializer= tf.ones_initializer, trainable= False) # These ops will only be preformed when training. if is_training: mean, variance = tf.nn.moments(x, axis) update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, decay) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS , update_moving_mean) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS , update_moving_variance) return tf.nn.batch_normalization(x, mean, variance, beta, gamma, epsilon) else: return tf.nn.batch_normalization(x, moving_mean, moving_variance, beta, gamma, epsilon)
def _adaptive_max_norm(norm, std_factor, decay, global_step, epsilon, name): """Find max_norm given norm and previous average.""" with vs.variable_scope(name, "AdaptiveMaxNorm", [norm]): log_norm = math_ops.log(norm + epsilon) def moving_average(name, value, decay): moving_average_variable = vs.get_variable( name, shape=value.get_shape(), dtype=value.dtype, initializer=init_ops.zeros_initializer(), trainable=False) return moving_averages.assign_moving_average( moving_average_variable, value, decay, zero_debias=False) # quicker adaptation at the beginning if global_step is not None: n = math_ops.to_float(global_step) decay = math_ops.minimum(decay, n / (n + 1.)) # update averages mean = moving_average("mean", log_norm, decay) sq_mean = moving_average("sq_mean", math_ops.square(log_norm), decay) variance = sq_mean - math_ops.square(mean) std = math_ops.sqrt(math_ops.maximum(epsilon, variance)) max_norms = math_ops.exp(mean + std_factor * std) return max_norms, mean
def bn(x, c): x_shape = x.get_shape() params_shape = x_shape[-1:] if c['use_bias']: bias = _get_variable('bias', params_shape, initializer=tf.zeros_initializer) return x + bias axis = list(range(len(x_shape) - 1)) beta = _get_variable('beta', params_shape, initializer=tf.zeros_initializer) gamma = _get_variable('gamma', params_shape, initializer=tf.ones_initializer) moving_mean = _get_variable('moving_mean', params_shape, initializer=tf.zeros_initializer, trainable=False) moving_variance = _get_variable('moving_variance', params_shape, initializer=tf.ones_initializer, trainable=False) # These ops will only be preformed when training. mean, variance = tf.nn.moments(x, axis) update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, BN_DECAY) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, BN_DECAY) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) mean, variance = control_flow_ops.cond( c['is_training'], lambda: (mean, variance), lambda: (moving_mean, moving_variance)) x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON) # x.set_shape(inputs.get_shape()) ?? return x
def _batch_norm(self, name, x): """Batch normalization.""" with tf.variable_scope(name): params_shape = [x.get_shape()[-1]] #print x.get_shape(), params_shape beta = tf.get_variable( 'beta', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32)) gamma = tf.get_variable( 'gamma', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32)) if self.mode == 'train': mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments') moving_mean = tf.get_variable( 'moving_mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) moving_variance = tf.get_variable( 'moving_variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_mean, mean, 0.9)) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_variance, variance, 0.9)) else: mean = tf.get_variable( 'moving_mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) variance = tf.get_variable( 'moving_variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) tf.histogram_summary(mean.op.name, mean) tf.histogram_summary(variance.op.name, variance) # elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net. y = tf.nn.batch_normalization( x, mean, variance, beta, gamma, 0.001) y.set_shape(x.get_shape()) return y
def _batch_norm(self, name, x): """Batch normalization.""" with tf.variable_scope(name): params_shape = [x.get_shape()[-1]] beta = tf.get_variable( 'beta', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32)) gamma = tf.get_variable( 'gamma', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32)) if self.mode == 'train': mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments') moving_mean = tf.get_variable( 'moving_mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) moving_variance = tf.get_variable( 'moving_variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_mean, mean, 0.9)) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_variance, variance, 0.9)) else: mean = tf.get_variable( 'moving_mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) variance = tf.get_variable( 'moving_variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) tf.summary.histogram(mean.op.name, mean) tf.summary.histogram(variance.op.name, variance) # elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net. x_bn = tf.nn.batch_normalization(x, mean, variance, beta, gamma, 0.001) x_bn.set_shape(x.get_shape()) return x_bn
def _batch_norm(self, name, x): """Batch normalization.""" with tf.variable_scope(name): params_shape = [x.get_shape()[-1]] beta = tf.get_variable( 'beta', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32)) gamma = tf.get_variable( 'gamma', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32)) if self.mode == 'train': mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments') moving_mean = tf.get_variable( 'moving_mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) moving_variance = tf.get_variable( 'moving_variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_mean, mean, 0.9)) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_variance, variance, 0.9)) else: mean = tf.get_variable( 'moving_mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) variance = tf.get_variable( 'moving_variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) tf.summary.histogram(mean.op.name, mean) tf.summary.histogram(variance.op.name, variance) # epsilon used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net. y = tf.nn.batch_normalization( x, mean, variance, beta, gamma, 0.001) y.set_shape(x.get_shape()) return y
def _build_update_ops(self, mean, variance, is_training): """Builds the moving average update ops when using moving variance. Args: mean: The mean value to update with. variance: The variance value to update with. is_training: Boolean Tensor to indicate if we're currently in training mode. Returns: Tuple of `(update_mean_op, update_variance_op)` when `is_training` is or could be `True`. Returns `None` when `is_training=False`. """ def build_update_ops(): """Builds the exponential moving average update ops.""" update_mean_op = moving_averages.assign_moving_average( variable=self._moving_mean, value=mean, decay=self._decay_rate, zero_debias=False, name="update_moving_mean").op update_variance_op = moving_averages.assign_moving_average( variable=self._moving_variance, value=variance, decay=self._decay_rate, zero_debias=False, name="update_moving_variance").op return update_mean_op, update_variance_op def build_no_ops(): return (tf.no_op(), tf.no_op()) # Only make the ops if we know that `is_training=True`, or the value of # `is_training` is unknown. is_training_const = utils.constant_value(is_training) if is_training_const is None or is_training_const: update_mean_op, update_variance_op = utils.smart_cond( is_training, build_update_ops, build_no_ops, ) return (update_mean_op, update_variance_op) else: return None
def bn(x, c): x_shape = x.get_shape() params_shape = x_shape[-1:] if c['use_bias']: bias = _get_variable('bias', params_shape, initializer=tf.zeros_initializer()) return x + bias axis = list(range(len(x_shape) - 1)) beta = _get_variable('beta', params_shape, initializer=tf.zeros_initializer()) gamma = _get_variable('gamma', params_shape, initializer=tf.ones_initializer()) moving_mean = _get_variable('moving_mean', params_shape, initializer=tf.zeros_initializer(), trainable=False) moving_variance = _get_variable('moving_variance', params_shape, initializer=tf.ones_initializer(), trainable=False) # These ops will only be preformed when training. mean, variance = tf.nn.moments(x, axis) update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, BN_DECAY) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, BN_DECAY) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) mean, variance = control_flow_ops.cond( c['is_training'], lambda: (mean, variance), lambda: (moving_mean, moving_variance)) x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON) #x.set_shape(inputs.get_shape()) ?? return x
def bn(x, c): x_shape = x.get_shape() params_shape = x_shape[-1:] axis = list(range(len(x_shape) - 1)) beta = tf.get_variable('beta', shape=params_shape, initializer=tf.zeros_initializer(), dtype='float32', collections=[tf.GraphKeys.GLOBAL_VARIABLES, GC_VARIABLES], trainable=True) gamma = tf.get_variable('gamma', shape=params_shape, initializer=tf.ones_initializer(), dtype='float32', collections=[tf.GraphKeys.GLOBAL_VARIABLES, GC_VARIABLES], trainable=True) moving_mean = tf.get_variable('moving_mean', shape=params_shape, initializer=tf.zeros_initializer(), dtype='float32', collections=[tf.GraphKeys.GLOBAL_VARIABLES, GC_VARIABLES], trainable=False) moving_variance = tf.get_variable('moving_variance', shape=params_shape, initializer=tf.ones_initializer(), dtype='float32', collections=[tf.GraphKeys.GLOBAL_VARIABLES, GC_VARIABLES], trainable=False) # These ops will only be performed when training. mean, variance = tf.nn.moments(x, axis) update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, BN_DECAY) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, BN_DECAY) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) mean, variance = control_flow_ops.cond( c['is_training'], lambda: (mean, variance), lambda: (moving_mean, moving_variance)) x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON) return x # resnet block
def _batch_norm(self, name, x): """Batch normalization.""" with tf.variable_scope(name): params_shape = [x.get_shape()[-1]] beta = tf.get_variable( 'beta', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32)) gamma = tf.get_variable( 'gamma', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32)) if self.mode == 'train': mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments') moving_mean = tf.get_variable( 'moving_mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) moving_variance = tf.get_variable( 'moving_variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_mean, mean, 0.9)) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_variance, variance, 0.9)) else: mean = tf.get_variable( 'moving_mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) variance = tf.get_variable( 'moving_variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) tf.summary.histogram(mean.op.name, mean) tf.summary.histogram(variance.op.name, variance) # elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net. y = tf.nn.batch_normalization( x, mean, variance, beta, gamma, 0.001) y.set_shape(x.get_shape()) return y
def _bn(x, is_training, hypes): x_shape = x.get_shape() params_shape = x_shape[-1:] axis = list(range(len(x_shape) - 1)) beta = _get_variable('beta', params_shape, initializer=tf.zeros_initializer()) gamma = _get_variable('gamma', params_shape, initializer=tf.ones_initializer()) moving_mean = _get_variable('moving_mean', params_shape, initializer=tf.zeros_initializer(), trainable=False) moving_variance = _get_variable('moving_variance', params_shape, initializer=tf.ones_initializer(), trainable=False) # These ops will only be preformed when training. mean, variance = tf.nn.moments(x, axis) update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, BN_DECAY) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, BN_DECAY) if hypes['use_moving_average_bn']: tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) mean, variance = control_flow_ops.cond( is_training, lambda: (mean, variance), lambda: (moving_mean, moving_variance)) else: mean, variance = mean, variance x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON) # x.set_shape(inputs.get_shape()) ?? return x
def bn(x, c): x_shape = x.get_shape() params_shape = x_shape[-1:] if c['use_bias']: bias = _get_variable('bias', params_shape, initializer=tf.zeros_initializer) return x + bias axis = list(range(len(x_shape) - 1)) beta = _get_variable('beta', params_shape, initializer=tf.zeros_initializer) gamma = _get_variable('gamma', params_shape, initializer=tf.ones_initializer) moving_mean = _get_variable('moving_mean', params_shape, initializer=tf.zeros_initializer, trainable=False) moving_variance = _get_variable('moving_variance', params_shape, initializer=tf.ones_initializer, trainable=False) # These ops will only be preformed when training. mean, variance = tf.nn.moments(x, axis) update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, BN_DECAY) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, BN_DECAY) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) mean, variance = control_flow_ops.cond( c['is_training'], lambda: (mean, variance), lambda: (moving_mean, moving_variance)) x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON) return x
def _batch_norm(self, name, x): """Batch normalization.""" with tf.variable_scope(name): params_shape = [x.get_shape()[-1]] beta = tf.get_variable( 'beta', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) gamma = tf.get_variable( 'gamma', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) factor = tf.get_variable( 'factor', 1, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) if self.bn: mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments') moving_mean = tf.get_variable( 'mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) moving_variance = tf.get_variable( 'variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_mean, mean, 0.9)) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_variance, variance, 0.9)) else: mean = tf.get_variable( 'mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) variance = tf.get_variable( 'variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) # inv_factor = tf.reciprocal(factor) inv_factor = tf.div(1., factor) mean = tf.multiply(inv_factor, mean) variance = tf.multiply(inv_factor, variance) # tf.summary.histogram(mean.op.name, mean) # tf.summary.histogram(variance.op.name, variance) # elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net. y = tf.nn.batch_normalization( x, mean, variance, beta, gamma, 0.001) y.set_shape(x.get_shape()) return y
def weighted_resample(inputs, weights, overall_rate, scope=None, mean_decay=0.999, warmup=10, seed=None): """Performs an approximate weighted resampling of `inputs`. This method chooses elements from `inputs` where each item's rate of selection is proportional to its value in `weights`, and the average rate of selection across all inputs (and many invocations!) is `overall_rate`. Args: inputs: A list of tensors whose first dimension is `batch_size`. weights: A `[batch_size]`-shaped tensor with each batch member's weight. overall_rate: Desired overall rate of resampling. scope: Scope to use for the op. mean_decay: How quickly to decay the running estimate of the mean weight. warmup: Until the resulting tensor has been evaluated `warmup` times, the resampling menthod uses the true mean over all calls as its weight estimate, rather than a decayed mean. seed: Random seed. Returns: A list of tensors exactly like `inputs`, but with an unknown (and possibly zero) first dimension. A tensor containing the effective resampling rate used for each output. """ # Algorithm: Just compute rates as weights/mean_weight * # overall_rate. This way the the average weight corresponds to the # overall rate, and a weight twice the average has twice the rate, # etc. with ops.name_scope(scope, 'weighted_resample', inputs) as opscope: # First: Maintain a running estimated mean weight, with decay # adjusted (by also maintaining an invocation count) during the # warmup period so that at the beginning, there aren't too many # zeros mixed in, throwing the average off. with variable_scope.variable_scope(scope, 'estimate_mean', inputs): count_so_far = variable_scope.get_local_variable( 'resample_count', initializer=0) estimated_mean = variable_scope.get_local_variable( 'estimated_mean', initializer=0.0) count = count_so_far.assign_add(1) real_decay = math_ops.minimum( math_ops.truediv((count - 1), math_ops.minimum(count, warmup)), mean_decay) batch_mean = math_ops.reduce_mean(weights) mean = moving_averages.assign_moving_average( estimated_mean, batch_mean, real_decay, zero_debias=False) # Then, normalize the weights into rates using the mean weight and # overall target rate: rates = weights * overall_rate / mean results = resample_at_rate([rates] + inputs, rates, scope=opscope, seed=seed, back_prop=False) return (results[1:], results[0])
def bn(x, c): x_shape = x.get_shape() params_shape = x_shape[-1:] if c['use_bias']: bias = _get_variable('bias', params_shape, initializer=tf.zeros_initializer) return x + bias axis = list(range(len(x_shape) - 1)) beta = _get_variable('beta', params_shape, initializer=tf.zeros_initializer) gamma = _get_variable('gamma', params_shape, initializer=tf.ones_initializer) moving_mean = _get_variable('moving_mean', params_shape, initializer=tf.zeros_initializer, trainable=False) moving_variance = _get_variable('moving_variance', params_shape, initializer=tf.ones_initializer, trainable=False) # These ops will only be preformed when training. mean, variance = tf.nn.moments(x, axis) update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, BN_DECAY) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, BN_DECAY) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) mean, variance = control_flow_ops.cond( c['is_training'], lambda: (mean, variance), lambda: (moving_mean, moving_variance)) x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON) #x.set_shape(inputs.get_shape()) ?? return x
def batch_normalization(self, input, name, scale_offset=True, relu=False, decay=0.999, moving_vars='moving_vars'): # NOTE: Currently, only inference is supported with tf.variable_scope(name): axis = list(range(len(input.get_shape()) - 1)) shape = [input.get_shape()[-1]] if scale_offset: scale = self.make_var('scale', shape=shape, initializer=tf.ones_initializer(), trainable=self.trainable) offset = self.make_var('offset', shape=shape, initializer=tf.zeros_initializer(), trainable=self.trainable) else: scale, offset = (None, None) # Create moving_mean and moving_variance add them to # GraphKeys.MOVING_AVERAGE_VARIABLES collections. moving_collections = [moving_vars, tf.GraphKeys.MOVING_AVERAGE_VARIABLES] moving_mean = self.make_var('mean', shape, initializer=tf.zeros_initializer(), trainable=False, collections=moving_collections) moving_variance = self.make_var('variance', shape, initializer=tf.ones_initializer(), trainable=False, collections=moving_collections) if self.trainable: # Calculate the moments based on the individual batch. mean, variance = tf.nn.moments(input, axis) update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay) tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) else: # Just use the moving_mean and moving_variance. mean = moving_mean variance = moving_variance output = tf.nn.batch_normalization( input, mean=mean, variance=variance, offset=offset, scale=scale, # TODO: This is the default Caffe batch norm eps # Get the actual eps from parameters variance_epsilon=1e-5, name=name) if relu: output = tf.nn.relu(output) return output
def _batch_norm(self, name, x): """Batch normalization.""" with tf.variable_scope(name): params_shape = [x.get_shape()[-1]] beta = tf.get_variable( 'beta', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32)) gamma = tf.get_variable( 'gamma', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32)) #beta = Qmf_quan(beta, 4, 7) #gamma = Qmf_quan(gamma, 4, 7) if self.mode == 'train': mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments') moving_mean = tf.get_variable( 'moving_mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) moving_variance = tf.get_variable( 'moving_variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) #moving_mean = Qmf_quan(moving_mean, 4, 7) #moving_variance = Qmf_quan(moving_variance, 4, 7) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_mean, mean, 0.9)) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_variance, variance, 0.9)) else: mean = tf.get_variable( 'moving_mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) variance = tf.get_variable( 'moving_variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) #mean = Qmf_quan(mean, 4, 7) #variance = Qmf_quan(variance, 4, 7) tf.summary.histogram(mean.op.name, mean) tf.summary.histogram(variance.op.name, variance) # elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net. y = tf.nn.batch_normalization( x, mean, variance, beta, gamma, 0.001) y.set_shape(x.get_shape()) return y
def _batch_norm_vec(self, name, x): """Batch normalization.""" with tf.variable_scope(name): params_shape = [x.get_shape()[-1]] beta = tf.get_variable( 'beta', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32)) gamma = tf.get_variable( 'gamma', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32)) #beta = Qmf_quan(beta, 4, 7) #gamma = Qmf_quan(gamma, 4, 7) if self.mode == 'train': mean, variance = tf.nn.moments(x, [0], name='moments') moving_mean = tf.get_variable( 'moving_mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) moving_variance = tf.get_variable( 'moving_variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) #moving_mean = Qmf_quan(moving_mean, 4, 7) #moving_variance = Qmf_quan(moving_variance, 4, 7) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_mean, mean, 0.9)) self._extra_train_ops.append(moving_averages.assign_moving_average( moving_variance, variance, 0.9)) else: mean = tf.get_variable( 'moving_mean', params_shape, tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) variance = tf.get_variable( 'moving_variance', params_shape, tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) #mean = Qmf_quan(mean, 4, 7) #variance = Qmf_quan(variance, 4, 7) tf.summary.histogram(mean.op.name, mean) tf.summary.histogram(variance.op.name, variance) # elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net. y = tf.nn.batch_normalization( x, mean, variance, beta, gamma, 0.001) y.set_shape(x.get_shape()) return y
def batch_norm_new(name, input_var, is_train, decay=0.999, epsilon=1e-5): """Batch normalization modified from BatchNormLayer in Tensorlayer. Source: <https://github.com/zsdonghao/tensorlayer/blob/master/tensorlayer/layers.py#L2190> """ inputs_shape = input_var.get_shape() axis = list(range(len(inputs_shape) - 1)) params_shape = inputs_shape[-1:] with tf.variable_scope(name) as scope: # Trainable beta and gamma variables beta = tf.get_variable('beta', shape=params_shape, initializer=tf.zeros_initializer) gamma = tf.get_variable('gamma', shape=params_shape, initializer=tf.random_normal_initializer(mean=1.0, stddev=0.002)) # Moving mean and variance updated during training moving_mean = tf.get_variable('moving_mean', params_shape, initializer=tf.zeros_initializer, trainable=False) moving_variance = tf.get_variable('moving_variance', params_shape, initializer=tf.constant_initializer(1.), trainable=False) # Compute mean and variance along axis batch_mean, batch_variance = tf.nn.moments(input_var, axis, name='moments') # Define ops to update moving_mean and moving_variance update_moving_mean = moving_averages.assign_moving_average(moving_mean, batch_mean, decay, zero_debias=False) update_moving_variance = moving_averages.assign_moving_average(moving_variance, batch_variance, decay, zero_debias=False) # Define a function that : # 1. Update moving_mean & moving_variance with batch_mean & batch_variance # 2. Then return the batch_mean & batch_variance def mean_var_with_update(): with tf.control_dependencies([update_moving_mean, update_moving_variance]): return tf.identity(batch_mean), tf.identity(batch_variance) # Perform different ops for training and testing if is_train: mean, variance = mean_var_with_update() normed = tf.nn.batch_normalization(input_var, mean, variance, beta, gamma, epsilon) else: normed = tf.nn.batch_normalization(input_var, moving_mean, moving_variance, beta, gamma, epsilon) # mean, variance = tf.cond( # is_train, # mean_var_with_update, # Training # lambda: (moving_mean, moving_variance) # Testing - it will use the moving_mean and moving_variance (fixed during test) that are computed during training # ) # normed = tf.nn.batch_normalization(input_var, mean, variance, beta, gamma, epsilon) return normed