我们从Python开源项目中,提取了以下38个代码示例,用于说明如何使用tensorflow.rsqrt()。
def layer_normalization(self,x): """ x should be:[batch_size,sequence_length,d_model] :return: """ filter=x.get_shape()[-1] #last dimension of x. e.g. 512 print("layer_normalization:==================>variable_scope:","layer_normalization"+str(self.layer_index)+self.type) with tf.variable_scope("layer_normalization"+str(self.layer_index)+self.type): # 1. normalize input by using mean and variance according to last dimension mean=tf.reduce_mean(x,axis=-1,keep_dims=True) #[batch_size,sequence_length,1] variance=tf.reduce_mean(tf.square(x-mean),axis=-1,keep_dims=True) #[batch_size,sequence_length,1] norm_x=(x-mean)*tf.rsqrt(variance+1e-6) #[batch_size,sequence_length,d_model] # 2. re-scale normalized input back scale=tf.get_variable("layer_norm_scale",[filter],initializer=tf.ones_initializer) #[filter] bias=tf.get_variable("layer_norm_bias",[filter],initializer=tf.ones_initializer) #[filter] output=norm_x*scale+bias #[batch_size,sequence_length,d_model] return output #[batch_size,sequence_length,d_model]
def layer_normalization(self,x,scope): """ x should be:[batch_size,sequence_length,d_model] :return:[batch_size,sequence_length,d_model] """ filter=x.get_shape()[-1] #last dimension of x. e.g. 512 with tf.variable_scope("layer_normalization"+scope): # 1. normalize input by using mean and variance according to last dimension mean=tf.reduce_mean(x,axis=-1,keep_dims=True) #[batch_size,sequence_length,1] variance=tf.reduce_mean(tf.square(x-mean),axis=-1,keep_dims=True) #[batch_size,sequence_length,1] norm_x=(x-mean)*tf.rsqrt(variance+1e-6) #[batch_size,sequence_length,d_model] # 2. re-scale normalized input back scale=tf.get_variable("layer_norm_scale",[filter],initializer=tf.ones_initializer) #[filter] bias=tf.get_variable("layer_norm_bias",[filter],initializer=tf.ones_initializer) #[filter] output=norm_x*scale+bias #[batch_size,sequence_length,d_model] return output #[batch_size,sequence_length,d_model]
def layer_norm_all(h, base, num_units, scope): # Layer Norm (faster version) # # Performs layer norm on multiple base at once (ie, i, g, j, o for lstm) # # Reshapes h in to perform layer norm in parallel with tf.variable_scope(scope): h_reshape = tf.reshape(h, [-1, base, num_units]) mean = tf.reduce_mean(h_reshape, [2], keep_dims=True) var = tf.reduce_mean(tf.square(h_reshape - mean), [2], keep_dims=True) epsilon = tf.constant(1e-3) rstd = tf.rsqrt(var + epsilon) h_reshape = (h_reshape - mean) * rstd # reshape back to original h = tf.reshape(h_reshape, [-1, base * num_units]) alpha = tf.get_variable('layer_norm_alpha', [4 * num_units], initializer=tf.constant_initializer(1.0), dtype=tf.float32) bias = tf.get_variable('layer_norm_bias', [4 * num_units], initializer=tf.constant_initializer(0.0), dtype=tf.float32) return (h * alpha) + bias
def diet_expert(x, hidden_size, params): """A two-layer feed-forward network with relu activation on hidden layer. Uses diet variables. Recompuets hidden layer on backprop to save activation memory. Args: x: a Tensor with shape [batch, io_size] hidden_size: an integer params: a diet variable HParams object. Returns: a Tensor with shape [batch, io_size] """ @fn_with_diet_vars(params) def diet_expert_internal(x): dim = x.get_shape().as_list()[-1] h = tf.layers.dense( x, hidden_size, activation=tf.nn.relu, use_bias=False) y = tf.layers.dense(h, dim, use_bias=False) y *= tf.rsqrt(tf.to_float(dim * hidden_size)) return y return diet_expert_internal(x)
def __call__(self, support_set, input_image, name, training=False): """ This module calculates the cosine distance between each of the support set embeddings and the target image embeddings. :param support_set: The embeddings of the support set images, tensor of shape [sequence_length, batch_size, 64] :param input_image: The embedding of the target image, tensor of shape [batch_size, 64] :param name: Name of the op to appear on the graph :param training: Flag indicating training or evaluation (True/False) :return: A tensor with cosine similarities of shape [batch_size, sequence_length, 1] """ with tf.name_scope('distance-module' + name), tf.variable_scope('distance-module', reuse=self.reuse): eps = 1e-10 similarities = [] for support_image in tf.unstack(support_set, axis=0): sum_support = tf.reduce_sum(tf.square(support_image), 1, keep_dims=True) support_magnitude = tf.rsqrt(tf.clip_by_value(sum_support, eps, float("inf"))) dot_product = tf.matmul(tf.expand_dims(input_image, 1), tf.expand_dims(support_image, 2)) dot_product = tf.squeeze(dot_product, [1, ]) cosine_similarity = dot_product * support_magnitude similarities.append(cosine_similarity) similarities = tf.concat(axis=1, values=similarities) self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='distance-module') return similarities
def instance_norm(x, shift=True, scale=True, eps=1e-3, scope=None, reuse=None): # Expect a 4-D Tensor C = x._shape_as_list()[-1] with tf.variable_scope(scope, 'instance_norm', reuse=reuse): # Get mean and variance, normalize input m, v = tf.nn.moments(x, [1, 2], keep_dims=True) output = (x - m) * tf.rsqrt(v + eps) if scale: output *= tf.get_variable('gamma', C, initializer=tf.ones_initializer) if shift: output += tf.get_variable('beta', C, initializer=tf.zeros_initializer) return output
def scaled_dot_product_attention_simple(q, k, v, bias, name=None): """scaled dot-product attention. One head. One spatial dimension. Args: q: a Tensor with shape [batch, length_q, depth_k] k: a Tensor with shape [batch, length_kv, depth_k] v: a Tensor with shape [batch, length_kv, depth_v] bias: optional Tensor broadcastable to [batch, length_q, length_kv] name: an optional string Returns: A Tensor. """ with tf.variable_scope( name, default_name="scaled_dot_product_attention_simple"): scalar = tf.rsqrt(tf.to_float(common_layers.shape_list(q)[2])) logits = tf.matmul(q * scalar, k, transpose_b=True) if bias is not None: logits += bias weights = tf.nn.softmax(logits, name="attention_weights") tf.summary.image( "attention", tf.expand_dims(tf.pow(weights, 0.2), 3), max_outputs=1) return tf.matmul(weights, v)
def diet_expert(x, hidden_size, params): """A two-layer feed-forward network with relu activation on hidden layer. Uses diet variables. Recompuets hidden layer on backprop to save activation memory. Args: x: a Tensor with shape [batch, io_size] hidden_size: an integer params: a diet variable HParams object. Returns: a Tensor with shape [batch, io_size] """ @fn_with_diet_vars(params) def diet_expert_internal(x): dim = x.get_shape().as_list()[-1] h = tf.layers.dense(x, hidden_size, activation=tf.nn.relu, use_bias=False) y = tf.layers.dense(h, dim, use_bias=False) y *= tf.rsqrt(tf.to_float(dim * hidden_size)) return y return diet_expert_internal(x)
def norm(x): return x * tf.rsqrt(tf.reduce_mean(tf.square(x), keep_dims=True))
def call(self,inputs): """ inputs in as array which contains the support set the embeddings, the target embedding as the second last value in the array, and true class of target embedding as the last value in the array """ similarities = [] targetembedding = inputs[-2] # embedding of the query image numsupportset = len(inputs)-2 for ii in range(numsupportset): supportembedding = inputs[ii] # embedding for i^{th} member in the support set sum_support = tf.reduce_sum(tf.square(supportembedding), 1, keep_dims=True) supportmagnitude = tf.rsqrt(tf.clip_by_value(sum_support, self.eps, float("inf"))) #reciprocal of the magnitude of the member of the support sum_query = tf.reduce_sum(tf.square(targetembedding), 1, keep_dims=True) querymagnitude = tf.rsqrt(tf.clip_by_value(sum_query, self.eps, float("inf"))) #reciprocal of the magnitude of the query image dot_product = tf.matmul(tf.expand_dims(targetembedding,1),tf.expand_dims(supportembedding,2)) dot_product = tf.squeeze(dot_product,[1]) cosine_similarity = dot_product*supportmagnitude*querymagnitude similarities.append(cosine_similarity) similarities = tf.concat(axis=1,values=similarities) softmax_similarities = tf.nn.softmax(similarities) preds = tf.squeeze(tf.matmul(tf.expand_dims(softmax_similarities,1),inputs[-1])) preds.set_shape((inputs[0].shape[0],self.nway)) return preds
def l2_batch_normalize(x, epsilon=1e-12, scope=None): """ Helper function to normalize a batch of vectors. :param x: the input placeholder :param epsilon: stabilizes division :return: the batch of l2 normalized vector """ with tf.name_scope(scope, "l2_batch_normalize") as scope: x_shape = tf.shape(x) x = tf.contrib.layers.flatten(x) x /= (epsilon + tf.reduce_max(tf.abs(x), 1, keep_dims=True)) square_sum = tf.reduce_sum(tf.square(x), 1, keep_dims=True) x_inv_norm = tf.rsqrt(np.sqrt(epsilon) + square_sum) x_norm = tf.multiply(x, x_inv_norm) return tf.reshape(x_norm, x_shape, scope)
def layer_norm_all(h, batch_size, base, num_units, scope='layer_norm', reuse=False, gamma_start=1.0, epsilon=1e-3, use_bias=True): """Layer Norm (faster version, but not using defun).""" # Performs layer norm on multiple base at once (ie, i, g, j, o for lstm) # Reshapes h in to perform layer norm in parallel h_reshape = tf.reshape(h, [batch_size, base, num_units]) mean = tf.reduce_mean(h_reshape, [2], keep_dims=True) var = tf.reduce_mean(tf.square(h_reshape - mean), [2], keep_dims=True) epsilon = tf.constant(epsilon) rstd = tf.rsqrt(var + epsilon) h_reshape = (h_reshape - mean) * rstd # reshape back to original h = tf.reshape(h_reshape, [batch_size, base * num_units]) with tf.variable_scope(scope): if reuse: tf.get_variable_scope().reuse_variables() gamma = tf.get_variable( 'ln_gamma', [4 * num_units], initializer=tf.constant_initializer(gamma_start)) if use_bias: beta = tf.get_variable( 'ln_beta', [4 * num_units], initializer=tf.constant_initializer(0.0)) if use_bias: return gamma * h + beta return gamma * h
def layer_norm(x, num_units, scope='layer_norm', reuse=False, gamma_start=1.0, epsilon=1e-3, use_bias=True): """Calculate layer norm.""" axes = [1] mean = tf.reduce_mean(x, axes, keep_dims=True) x_shifted = x - mean var = tf.reduce_mean(tf.square(x_shifted), axes, keep_dims=True) inv_std = tf.rsqrt(var + epsilon) with tf.variable_scope(scope): if reuse is True: tf.get_variable_scope().reuse_variables() gamma = tf.get_variable( 'ln_gamma', [num_units], initializer=tf.constant_initializer(gamma_start)) if use_bias: beta = tf.get_variable( 'ln_beta', [num_units], initializer=tf.constant_initializer(0.0)) output = gamma * (x_shifted) * inv_std if use_bias: output += beta return output
def l2_normalize(incoming, dim, epsilon=1e-12, name="l2_normalize"): """ L2 Normalization. Normalizes along dimension `dim` using an L2 norm. For a 1-D tensor with `dim = 0`, computes
output = x / sqrt(max(sum(x**2), epsilon)) ``` For `x` with more dimensions, independently normalizes each 1-D slice along dimension `dim`. Arguments: incoming: `Tensor`. Incoming Tensor. dim: `int`. Dimension along which to normalize. epsilon: `float`. A lower bound value for the norm. Will use `sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`. name: `str`. A name for this layer (optional). Returns: A `Tensor` with the same shape as `x`. """ with tf.name_scope(name) as name: x = tf.convert_to_tensor(incoming, name="x") square_sum = tf.reduce_sum(tf.square(x), [dim], keep_dims=True) x_inv_norm = tf.rsqrt(tf.maximum(square_sum, epsilon)) return tf.multiply(x, x_inv_norm, name=name)
```
def batch_norm(x, name="batch_norm"): eps = 1e-6 with tf.variable_scope(name): nchannels = x.get_shape()[3] scale = tf.get_variable("scale", [nchannels], initializer=tf.random_normal_initializer(1.0, 0.02, dtype=tf.float32)) center = tf.get_variable("center", [nchannels], initializer=tf.constant_initializer(0.0, dtype = tf.float32)) ave, dev = tf.nn.moments(x, axes=[1,2], keep_dims=True) inv_dev = tf.rsqrt(dev + eps) normalized = (x-ave)*inv_dev * scale + center return normalized
def _instance_norm(input): """ Instance Normalization """ with tf.variable_scope("instance_norm"): depth = input.get_shape()[3] scale = _weights("scale", [depth], mean=1.0) offset = _biases("offset", [depth]) mean, variance = tf.nn.moments(input, axes=[1,2], keep_dims=True) epsilon = 1e-5 inv = tf.rsqrt(variance + epsilon) normalized = (input-mean)*inv return scale*normalized + offset
def layer_norm(inputs, epsilon=1e-6, dtype=None, scope=None): """ Layer Normalization Args: inputs: A Tensor of shape [..., channel_size] epsilon: A floating number dtype: An optional instance of tf.DType scope: An optional string Returns: A Tensor with the same shape as inputs """ with tf.variable_scope(scope, default_name="layer_norm", values=[inputs], dtype=dtype): channel_size = inputs.get_shape().as_list()[-1] scale = tf.get_variable("scale", shape=[channel_size], initializer=tf.ones_initializer()) offset = tf.get_variable("offset", shape=[channel_size], initializer=tf.zeros_initializer()) mean = tf.reduce_mean(inputs, axis=-1, keep_dims=True) variance = tf.reduce_mean(tf.square(inputs - mean), axis=-1, keep_dims=True) norm_inputs = (inputs - mean) * tf.rsqrt(variance + epsilon) return norm_inputs * scale + offset
def spatial_batch_norm(input_layer, name='spatial_batch_norm'): """ Batch-normalizes the layer as in http://arxiv.org/abs/1502.03167 This is important since it allows the different scales to talk to each other when they get joined. """ mean, variance = tf.nn.moments(input_layer, [0, 1, 2]) variance_epsilon = 0.01 # TODO: Check what this value should be inv = tf.rsqrt(variance + variance_epsilon) num_channels = input_layer.get_shape().as_list()[3] # TODO: Clean this up scale = tf.Variable(tf.random_uniform([num_channels]), name='scale') # TODO: How should these initialize? offset = tf.Variable(tf.random_uniform([num_channels]), name='offset') return_val = tf.sub(tf.mul(tf.mul(scale, inv), tf.sub(input_layer, mean)), offset, name=name) return return_val
def _layer_norm_compute_python(x, epsilon, scale, bias): """Layer norm raw computation.""" mean = tf.reduce_mean(x, axis=[-1], keep_dims=True) variance = tf.reduce_mean(tf.square(x - mean), axis=[-1], keep_dims=True) norm_x = (x - mean) * tf.rsqrt(variance + epsilon) return norm_x * scale + bias
def BatchClipByL2norm(t, upper_bound, name=None): """Clip an array of tensors by L2 norm. Shrink each dimension-0 slice of tensor (for matrix it is each row) such that the l2 norm is at most upper_bound. Here we clip each row as it corresponds to each example in the batch. Args: t: the input tensor. upper_bound: the upperbound of the L2 norm. name: optional name. Returns: the clipped tensor. """ assert upper_bound > 0 with tf.name_scope(values=[t, upper_bound], name=name, default_name="batch_clip_by_l2norm") as name: saved_shape = tf.shape(t) batch_size = tf.slice(saved_shape, [0], [1]) t2 = tf.reshape(t, tf.concat(axis=0, values=[batch_size, [-1]])) upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]), tf.constant(1.0 / upper_bound)) # Add a small number to avoid divide by 0 l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001) scale = tf.minimum(l2norm_inv, upper_bound_inv) * upper_bound clipped_t = tf.matmul(tf.diag(scale), t2) clipped_t = tf.reshape(clipped_t, saved_shape, name=name) return clipped_t
def setUp(self): super(CoreUnaryOpsTest, self).setUp() self.ops = [ ('abs', operator.abs, tf.abs, core.abs_function), ('neg', operator.neg, tf.neg, core.neg), # TODO(shoyer): add unary + to core TensorFlow ('pos', None, None, None), ('sign', None, tf.sign, core.sign), ('reciprocal', None, tf.reciprocal, core.reciprocal), ('square', None, tf.square, core.square), ('round', None, tf.round, core.round_function), ('sqrt', None, tf.sqrt, core.sqrt), ('rsqrt', None, tf.rsqrt, core.rsqrt), ('log', None, tf.log, core.log), ('exp', None, tf.exp, core.exp), ('log', None, tf.log, core.log), ('ceil', None, tf.ceil, core.ceil), ('floor', None, tf.floor, core.floor), ('cos', None, tf.cos, core.cos), ('sin', None, tf.sin, core.sin), ('tan', None, tf.tan, core.tan), ('acos', None, tf.acos, core.acos), ('asin', None, tf.asin, core.asin), ('atan', None, tf.atan, core.atan), ('lgamma', None, tf.lgamma, core.lgamma), ('digamma', None, tf.digamma, core.digamma), ('erf', None, tf.erf, core.erf), ('erfc', None, tf.erfc, core.erfc), ('lgamma', None, tf.lgamma, core.lgamma), ] total_size = np.prod([v.size for v in self.original_lt.axes.values()]) self.test_lt = core.LabeledTensor( tf.cast(self.original_lt, tf.float32) / total_size, self.original_lt.axes)
def __call__(self, query, previous_alignments): '''Score the query based on the keys and values. Args: query: Tensor of dtype matching `self.values` and shape `[batch_size, query_depth]`. previous_alignments: Tensor of dtype matching `self.values` and shape `[batch_size, alignments_size]` (`alignments_size` is memory's `max_time`). Returns: alignments: Tensor of dtype matching `self.values` and shape `[batch_size, alignments_size]` (`alignments_size` is memory's `max_time`). ''' with tf.variable_scope(None, 'bahdanau_attention', [query]): processed_query = self.query_layer( query) if self.query_layer else query dtype = processed_query.dtype # Reshape from [batch_size, ...] to [batch_size, 1, ...] for broadcasting. processed_query = tf.expand_dims(processed_query, 1) if FLAGS.use_conv_feat_att: conv_feat = tf.nn.conv1d( tf.expand_dims(previous_alignments, 2), self.conv_filt, 1, 'SAME') keys = self._keys if self._normalize: # normed_v = g * v / ||v|| normed_v = self.g * self.v * tf.rsqrt( tf.reduce_sum(tf.square(self.v))) score = tf.reduce_sum( normed_v * tf.tanh(keys + processed_query + self.b), [2]) else: if FLAGS.use_conv_feat_att: score = tf.reduce_sum(self.v * tf.tanh(keys + processed_query + conv_feat), [2]) else: score = tf.reduce_sum(self.v * tf.tanh(keys + processed_query), [2]) alignments = self._probability_fn(score, previous_alignments) return alignments
def layer_norm(inputs, epsilon=1e-6, dtype=None, scope=None): """ Layer Normalization :param inputs: A Tensor of shape [..., channel_size] :param epsilon: A floating number :param dtype: An optional instance of tf.DType :param scope: An optional string :returns: A Tensor with the same shape as inputs """ with tf.variable_scope(scope, default_name="layer_norm", values=[inputs], dtype=dtype): channel_size = inputs.get_shape().as_list()[-1] scale = tf.get_variable("scale", shape=[channel_size], initializer=tf.ones_initializer()) offset = tf.get_variable("offset", shape=[channel_size], initializer=tf.zeros_initializer()) mean = tf.reduce_mean(inputs, axis=-1, keep_dims=True) variance = tf.reduce_mean(tf.square(inputs - mean), axis=-1, keep_dims=True) norm_inputs = (inputs - mean) * tf.rsqrt(variance + epsilon) return norm_inputs * scale + offset
def _opsBatchNorm(self, x, m, v, beta, gamma, epsilon, scale_after_normalization, shift_after_normalization): y = (x - m) * tf.rsqrt(v + epsilon) if scale_after_normalization: y = gamma * y return y + beta if shift_after_normalization else y
def instance_norm(x, name='instance_norm', reuse=False): with tf.variable_scope(name, reuse=reuse): depth = x.get_shape()[3] scale = tf.get_variable('scale', [depth], initializer=tf.random_normal_initializer(1.0, 0.02)) offset = tf.get_variable('offset', [depth], initializer=tf.constant_initializer(0.0)) mean, variance = tf.nn.moments(x, axes=[1, 2], keep_dims=True) inv = tf.rsqrt(variance + 1e-5) normalized = (x - mean) * inv return scale * normalized + offset
def standardize_images(x): """Image standardization on batches (tf.image.per_image_standardization).""" with tf.name_scope("standardize_images", [x]): x = tf.to_float(x) x_mean = tf.reduce_mean(x, axis=[1, 2, 3], keep_dims=True) x_variance = tf.reduce_mean( tf.square(x - x_mean), axis=[1, 2, 3], keep_dims=True) x_shape = shape_list(x) num_pixels = tf.to_float(x_shape[1] * x_shape[2] * 3) x = (x - x_mean) / tf.maximum(tf.sqrt(x_variance), tf.rsqrt(num_pixels)) # TODO(lukaszkaiser): remove hack below, needed for greedy decoding for now. if x.shape and len(x.shape) == 4 and x.shape[3] == 1: x = tf.concat([x, x, x], axis=3) # Not used, just a dead tf.cond branch. x.set_shape([None, None, None, 3]) return x
def layer_norm_compute_python(x, epsilon, scale, bias): """Layer norm raw computation.""" mean = tf.reduce_mean(x, axis=[-1], keep_dims=True) variance = tf.reduce_mean(tf.square(x - mean), axis=[-1], keep_dims=True) norm_x = (x - mean) * tf.rsqrt(variance + epsilon) return norm_x * scale + bias
def simple_attention(target, source, bias=None): """A simple attention function. Args: target: a `Tensor` with shape `[batch, target_timesteps, depth]` or `[batch, target_timesteps_1, target_timesteps_2, depth]` source: a `Tensor` with shape `[batch, source_timesteps, depth]` or `[batch, source_timesteps_1, source_timesteps_2, depth]` bias: an optional `Tensor` with shape `[batch, timesteps, 1, 1]` used to mask the attention to not attend to padding of input. Returns: a `Tensor` with same shape as `target` """ with tf.name_scope("simple_attention", [target, source]): target_shape = shape_list(target) source_shape = shape_list(source) target = tf.reshape( target, [target_shape[0], target_shape[1] * target_shape[2], target_shape[3]]) source = tf.reshape( source, [source_shape[0], source_shape[1] * source_shape[2], source_shape[3]]) attention = tf.matmul(target, source, transpose_b=True) attention *= tf.rsqrt(tf.to_float(shape_list(target)[2])) if bias is not None: attention += tf.expand_dims(tf.squeeze(bias, axis=[2, 3]), axis=1) attention = tf.nn.softmax(attention) if not tf.get_variable_scope().reuse: tf.summary.image("attention", tf.expand_dims(attention, 3), max_outputs=5) attended = tf.matmul(attention, source) return tf.reshape(attended, target_shape)
def test_Rsqrt(self): t = tf.rsqrt(self.random(4, 3)) self.check(t)
def corr(a, b): return cov(a, b)*tf.rsqrt(cov(a, a))*tf.rsqrt(cov(b, b))
def instance_normalization(x, index): with tf.variable_scope("instance_norm"): depth = x.get_shape()[3] scale = tf.get_variable("scale" + str(index), [depth], initializer=tf.random_normal_initializer(mean=1.0, stddev=0.02, dtype=tf.float32)) offset = tf.get_variable("offset" + str(index), [depth], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.02, dtype=tf.float32)) mean, variance = tf.nn.moments(x, axes=[1, 2], keep_dims=True) epsilon = 1e-5 inv = tf.rsqrt(variance + epsilon) normalized = (x - mean) * inv return scale*normalized + offset
def __l1_normalize(x, dim, epsilon=1e-12, name=None): square_sum = tf.reduce_sum(tf.abs(x), [dim], keep_dims=True) x_inv_norm = tf.rsqrt(tf.maximum(square_sum, epsilon)) return tf.mul(x, x_inv_norm, name=name)
def _apply(self, X, noise=0): ndim = X.get_shape().ndims # if is training, normalize input by its own mean and std mean, var = tf.nn.moments(X, axes=self.axes) # prepare dimshuffle pattern inserting broadcastable axes as needed param_axes = iter(range(ndim - len(self.axes))) pattern = ['x' if input_axis in self.axes else next(param_axes) for input_axis in range(ndim)] # apply dimshuffle pattern to all parameters beta = 0 if self.beta_init is None else \ K.dimshuffle(self.get('beta'), pattern) gamma = 1 if self.gamma_init is None else \ K.dimshuffle(self.get('gamma'), pattern) # ====== if trainign: use local mean and var ====== # def training_fn(): running_mean = ((1 - self.alpha) * self.get('mean') + self.alpha * mean) running_var = ((1 - self.alpha) * self.get('var') + self.alpha * var) with tf.control_dependencies([ tf.assign(self.get('mean'), running_mean), tf.assign(self.get('var'), running_var)]): return tf.identity(mean), tf.identity(var) # ====== if inference: use global mean and var ====== # def infer_fn(): return self.get('mean'), self.get('var') mean, var = tf.cond(K.is_training(), training_fn, infer_fn) inv_std = tf.rsqrt(var + self.epsilon) normalized = (X - K.dimshuffle(mean, pattern)) * \ (gamma * K.dimshuffle(inv_std, pattern)) # ====== applying noise if required ====== # if self.noise_level is not None: normalized = K.rand.apply_noise(normalized, level=self.noise_level, noise_dims=self.noise_dims, noise_type='gaussian') # add beta normalized = normalized + beta # activated output return self.activation(normalized)
def batch_norm(x, phase, shift=True, scale=True, momentum=0.99, eps=1e-3, internal_update=False, scope=None, reuse=None): C = x._shape_as_list()[-1] ndim = len(x.shape) var_shape = [1] * (ndim - 1) + [C] with tf.variable_scope(scope, 'batch_norm', reuse=reuse): def training(): m, v = tf.nn.moments(x, range(ndim - 1), keep_dims=True) update_m = _assign_moving_average(moving_m, m, momentum, 'update_mean') update_v = _assign_moving_average(moving_v, v, momentum, 'update_var') tf.add_to_collection('update_ops', update_m) tf.add_to_collection('update_ops', update_v) if internal_update: with tf.control_dependencies([update_m, update_v]): output = (x - m) * tf.rsqrt(v + eps) else: output = (x - m) * tf.rsqrt(v + eps) return output def testing(): m, v = moving_m, moving_v output = (x - m) * tf.rsqrt(v + eps) return output # Get mean and variance, normalize input moving_m = tf.get_variable('mean', var_shape, initializer=tf.zeros_initializer, trainable=False) moving_v = tf.get_variable('var', var_shape, initializer=tf.ones_initializer, trainable=False) if isinstance(phase, bool): output = training() if phase else testing() else: output = tf.cond(phase, training, testing) if scale: output *= tf.get_variable('gamma', var_shape, initializer=tf.ones_initializer) if shift: output += tf.get_variable('beta', var_shape, initializer=tf.zeros_initializer) return output