我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.ops.math_ops.sqrt()。
def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3): """Applies batch normalization on x given mean, var, beta and gamma. I.e. returns: `output = (x - mean) / (sqrt(var) + epsilon) * gamma + beta` Arguments: x: Input tensor or variable. mean: Mean of batch. var: Variance of batch. beta: Tensor with which to center the input. gamma: Tensor by which to scale the input. epsilon: Fuzz factor. Returns: A tensor. """ return nn.batch_normalization(x, mean, var, beta, gamma, epsilon) # SHAPE OPERATIONS
def lecun_uniform(seed=None): """LeCun uniform initializer. It draws samples from a uniform distribution within [-limit, limit] where `limit` is `sqrt(3 / fan_in)` where `fan_in` is the number of input units in the weight tensor. Arguments: seed: A Python integer. Used to seed the random generator. Returns: An initializer. References: LeCun 98, Efficient Backprop, http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf """ return VarianceScaling( scale=1., mode='fan_in', distribution='uniform', seed=seed)
def glorot_normal(seed=None): """Glorot normal initializer, also called Xavier normal initializer. It draws samples from a truncated normal distribution centered on 0 with `stddev = sqrt(2 / (fan_in + fan_out))` where `fan_in` is the number of input units in the weight tensor and `fan_out` is the number of output units in the weight tensor. Arguments: seed: A Python integer. Used to seed the random generator. Returns: An initializer. References: Glorot & Bengio, AISTATS 2010 http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf """ return VarianceScaling( scale=1., mode='fan_avg', distribution='normal', seed=seed)
def glorot_uniform(seed=None): """Glorot uniform initializer, also called Xavier uniform initializer. It draws samples from a uniform distribution within [-limit, limit] where `limit` is `sqrt(6 / (fan_in + fan_out))` where `fan_in` is the number of input units in the weight tensor and `fan_out` is the number of output units in the weight tensor. Arguments: seed: A Python integer. Used to seed the random generator. Returns: An initializer. References: Glorot & Bengio, AISTATS 2010 http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf """ return VarianceScaling( scale=1., mode='fan_avg', distribution='uniform', seed=seed)
def he_normal(seed=None): """He normal initializer. It draws samples from a truncated normal distribution centered on 0 with `stddev = sqrt(2 / fan_in)` where `fan_in` is the number of input units in the weight tensor. Arguments: seed: A Python integer. Used to seed the random generator. Returns: An initializer. References: He et al., http://arxiv.org/abs/1502.01852 """ return VarianceScaling( scale=2., mode='fan_in', distribution='normal', seed=seed)
def he_uniform(seed=None): """He uniform variance scaling initializer. It draws samples from a uniform distribution within [-limit, limit] where `limit` is `sqrt(6 / fan_in)` where `fan_in` is the number of input units in the weight tensor. Arguments: seed: A Python integer. Used to seed the random generator. Returns: An initializer. References: He et al., http://arxiv.org/abs/1502.01852 """ return VarianceScaling( scale=2., mode='fan_in', distribution='uniform', seed=seed) # Compatibility aliases # pylint: disable=invalid-name
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators self.updates = [] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) self.updates.append(K.update_add(self.iterations, 1)) for p, g, a in zip(params, grads, accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * K.square(g) self.updates.append(K.update(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators self.updates = [] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) self.updates.append(K.update_add(self.iterations, 1)) for p, g, a in zip(params, grads, accumulators): new_a = a + K.square(g) # update accumulator self.updates.append(K.update(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def loss(self, data, labels): """The loss to minimize while training.""" if self.is_regression: diff = self.training_inference_graph(data) - math_ops.to_float(labels) mean_squared_error = math_ops.reduce_mean(diff * diff) root_mean_squared_error = math_ops.sqrt(mean_squared_error, name="loss") loss = root_mean_squared_error else: loss = math_ops.reduce_mean( nn_ops.sparse_softmax_cross_entropy_with_logits( self.training_inference_graph(data), array_ops.squeeze(math_ops.to_int32(labels))), name="loss") if self.regularizer: loss += layers.apply_regularization(self.regularizer, variables.trainable_variables()) return loss
def _sample_n(self, n, seed=None): # We use 2 uniform random floats to generate polar random variates. # http://dl.acm.org/citation.cfm?id=179631 # Theorem 2. Let G, H be iid variates, uniformly distributed on [0,1]. # Let theta = 2*pi*H, let R = sqrt(df*(G^(-2/df) - 1)) for df > 0. # Let X = R*cos(theta), and let Y = R*sin(theta). # Then X ~ t_df and Y ~ t_df. # The variates X and Y are not independent. shape = array_ops.concat(0, ([2, n], self.batch_shape())) uniform = random_ops.random_uniform(shape=shape, dtype=self.dtype, seed=seed) samples_g, samples_h = array_ops.unpack(uniform, num=2) theta = (2. * math.pi) * samples_h r = math_ops.sqrt(self.df * (math_ops.pow(samples_g, -2 / self.df) - 1)) samples = r * math_ops.cos(theta) return samples * self.sigma + self.mu
def _adaptive_max_norm(norm, std_factor, decay, global_step, epsilon, name): """Find max_norm given norm and previous average.""" with vs.variable_scope(name, "AdaptiveMaxNorm", [norm]): log_norm = math_ops.log(norm + epsilon) def moving_average(name, value, decay): moving_average_variable = vs.get_variable( name, shape=value.get_shape(), dtype=value.dtype, initializer=init_ops.zeros_initializer, trainable=False) return moving_averages.assign_moving_average( moving_average_variable, value, decay, zero_debias=False) # quicker adaptation at the beginning if global_step is not None: n = math_ops.to_float(global_step) decay = math_ops.minimum(decay, n / (n + 1.)) # update averages mean = moving_average("mean", log_norm, decay) sq_mean = moving_average("sq_mean", math_ops.square(log_norm), decay) variance = sq_mean - math_ops.square(mean) std = math_ops.sqrt(math_ops.maximum(epsilon, variance)) max_norms = math_ops.exp(mean + std_factor*std) return max_norms, mean
def _apply_dense(self, grad, var): lr = (self._lr_t * math_ops.sqrt(1 - self._beta2_power) / (1 - self._beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - self._beta1_t) m_t = m * self._beta1_t m_t = m_t + m_scaled_g_values # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = tf.pow(grad, 2) * (1 - self._beta2_t) v_t = v * self._beta2_t v_t = v_t + v_scaled_g_values v_sqrt = tf.pow(v_t, self._pow_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + self._epsilon_t), use_locking=self._use_locking) # regularization var_update = state_ops.assign_sub(var_update, self._dense_regularization * var, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t])
def _apply_dense(self, g_t, x_tm1, prepare): """""" updates = [] if self._mu > 0: m_and_t = self._dense_moving_average(x_tm1, g_t, 'm', self._mu) m_bar_t = m_and_t[0] updates.extend(m_and_t) else: m_bar_t = g_t if self._ups > 0: v_and_t = self._dense_moving_average(x_tm1, g_t**2, 'v', self._ups) eps_t = ops.convert_to_tensor(self._eps) v_bar_t = math_ops.sqrt(v_and_t[0] + eps_t) updates.extend(v_and_t) else: v_bar_t = 1. s_t = self._lr * m_bar_t / v_bar_t return [[s_t, x_tm1, g_t]] + updates #=============================================================
def testUnitNormWithRandomMatrix(self): height, width = 2, 3 for dim in range(3): random_seed.set_random_seed(0) image = random_ops.random_uniform((height, width, 3)) output = _layers.unit_norm(image, dim=dim, epsilon=1e-6) norms = math_ops.sqrt( math_ops.reduce_sum( math_ops.square(output), reduction_indices=dim)) shape = [height, width, 3] del shape[dim] expected = np.ones(shape) with self.test_session(): actual = norms.eval() self.assertAllClose(expected, actual, 1e-4, 1e-4)
def testKnownRankUnknownDimsSucceeds(self): height, width = 2, 3 for dim in range(3): placeholder_value = np.ones((height, width, 3)) shape = [height, width, 3] del shape[dim] expected = np.ones(shape) image = array_ops.placeholder(dtypes.float32, (None, None, 3)) output = _layers.unit_norm(image, dim=dim, epsilon=1e-6) norms = math_ops.sqrt( math_ops.reduce_sum( math_ops.square(output), reduction_indices=dim)) with self.test_session(): actual = norms.eval({image: placeholder_value}) self.assertAllClose(expected, actual, 1e-4, 1e-4) # TODO(b/28426988): Add separate tests for non-legacy versions.
def sqrt_matmul(self, x): """Computes `matmul(self, x)`. Doesn't actually do the sqrt! Named as such to agree with API. Args: x: `Tensor` Returns: self_times_x: `Tensor` """ m_x = math_ops.matmul(self._m, x) vt_x = math_ops.matmul(self._v, x, adjoint_a=True) d_vt_x = self._d.matmul(vt_x) v_d_vt_x = math_ops.matmul(self._v, d_vt_x) return m_x + v_d_vt_x
def sqrt_log_abs_det(self): """Computes (log o abs o det)(X) for matrix X. Doesn't actually do the sqrt! Named as such to agree with API. To compute det(M + V D V.T), we use the matrix determinant lemma: det(Tril + V D V.T) = det(C) det(D) det(M) where C is defined as in `_inverse`, ie, C = inv(D) + V.T inv(M) V. See: https://en.wikipedia.org/wiki/Matrix_determinant_lemma Returns: log_abs_det: `Tensor`. """ log_det_c = math_ops.log(math_ops.abs( linalg_ops.matrix_determinant(self._woodbury_sandwiched_term()))) # Reduction is ok because we always prepad inputs to this class. log_det_m = math_ops.reduce_sum(math_ops.log(math_ops.abs( array_ops.matrix_diag_part(self._m))), reduction_indices=[-1]) return log_det_c + 2. * self._d.sqrt_log_abs_det() + log_det_m
def selu(x): with ops.name_scope('elu') as scope: alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x)) # (3) initialize weights with stddev sqrt(1/n) # e.g. use:
def dropout_selu(x, rate, alpha= -1.7580993408473766, fixedPointMean=0.0, fixedPointVar=1.0, noise_shape=None, seed=None, name=None, training=False): """Dropout to a value with rescaling.""" def dropout_selu_impl(x, rate, alpha, noise_shape, seed, name): keep_prob = 1.0 - rate x = ops.convert_to_tensor(x, name="x") if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1: raise ValueError("keep_prob must be a scalar tensor or a float in the " "range (0, 1], got %g" % keep_prob) keep_prob = ops.convert_to_tensor(keep_prob, dtype=x.dtype, name="keep_prob") keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar()) alpha = ops.convert_to_tensor(alpha, dtype=x.dtype, name="alpha") alpha.get_shape().assert_is_compatible_with(tensor_shape.scalar()) if tensor_util.constant_value(keep_prob) == 1: return x noise_shape = noise_shape if noise_shape is not None else array_ops.shape(x) random_tensor = keep_prob random_tensor += random_ops.random_uniform(noise_shape, seed=seed, dtype=x.dtype) binary_tensor = math_ops.floor(random_tensor) ret = x * binary_tensor + alpha * (1-binary_tensor) a = math_ops.sqrt(fixedPointVar / (keep_prob *((1-keep_prob) * math_ops.pow(alpha-fixedPointMean,2) + fixedPointVar))) b = fixedPointMean - a * (keep_prob * fixedPointMean + (1 - keep_prob) * alpha) ret = a * ret + b ret.set_shape(x.get_shape()) return ret with ops.name_scope(name, "dropout", [x]) as name: return utils.smart_cond(training, lambda: dropout_selu_impl(x, rate, alpha, noise_shape, seed, name), lambda: array_ops.identity(x))
def std(x, axis=None, keepdims=False): """Standard deviation of a tensor, alongside the specified axis. Arguments: x: A tensor or variable. axis: An integer, the axis to compute the standard deviation. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. Returns: A tensor with the standard deviation of elements of `x`. """ return math_ops.sqrt(var(x, axis=axis, keepdims=keepdims))
def sqrt(x): """Element-wise square root. Arguments: x: Tensor or variable. Returns: A tensor. """ zero = _to_tensor(0., x.dtype.base_dtype) inf = _to_tensor(np.inf, x.dtype.base_dtype) x = clip_ops.clip_by_value(x, zero, inf) return math_ops.sqrt(x)
def __call__(self, w): norms = K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True)) desired = K.clip(norms, 0, self.max_value) w *= (desired / (K.epsilon() + norms)) return w
def __call__(self, w): return w / ( K.epsilon() + K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True)))
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] delta_accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators + delta_accumulators self.updates = [] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) self.updates.append(K.update_add(self.iterations, 1)) for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * K.square(g) self.updates.append(K.update(a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon) new_p = p - lr * update # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update) self.updates.append(K.update(d_a, new_d_a)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) t = self.iterations + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) shapes = [K.int_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def call(self, inputs, training=None): if 0 < self.rate < 1: def noised(): stddev = np.sqrt(self.rate / (1.0 - self.rate)) return inputs * K.random_normal( shape=K.shape(inputs), mean=1.0, stddev=stddev) return K.in_train_phase(noised, inputs, training=training) return inputs
def unit_norm(inputs, dim, epsilon=1e-7, scope=None): """Normalizes the given input across the specified dimension to unit length. Note that the rank of `input` must be known. Args: inputs: A `Tensor` of arbitrary size. dim: The dimension along which the input is normalized. epsilon: A small value to add to the inputs to avoid dividing by zero. scope: Optional scope for variable_scope. Returns: The normalized `Tensor`. Raises: ValueError: If dim is smaller than the number of dimensions in 'inputs'. """ with variable_scope.variable_scope(scope, 'UnitNorm', [inputs]): if not inputs.get_shape(): raise ValueError('The input rank must be known.') input_rank = len(inputs.get_shape().as_list()) if dim < 0 or dim >= input_rank: raise ValueError( 'dim must be positive but smaller than the input rank.') lengths = math_ops.sqrt(epsilon + math_ops.reduce_sum( math_ops.square(inputs), dim, True)) multiples = [] if dim > 0: multiples.append(array_ops.ones([dim], dtypes.int32)) multiples.append(array_ops.slice(array_ops.shape(inputs), [dim], [1])) if dim < (input_rank - 1): multiples.append(array_ops.ones([input_rank - 1 - dim], dtypes.int32)) multiples = array_ops.concat(0, multiples) return math_ops.div(inputs, array_ops.tile(lengths, multiples))
def _mean(self): if self.cholesky_input_output_matrices: return math_ops.sqrt(self.df) * self.scale_operator_pd.sqrt_to_dense() return self.df * self.scale_operator_pd.to_dense()
def _variance(self): x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense() d = array_ops.expand_dims(array_ops.matrix_diag_part(x), -1) v = math_ops.square(x) + math_ops.batch_matmul(d, d, adj_y=True) if self.cholesky_input_output_matrices: return linalg_ops.cholesky(v) return v
def _std(self): return math_ops.sqrt(self._variance())
def _std(self): return math_ops.sqrt(self.variance())
def _batch_sqrt_matmul(self, x, transpose_x=False): if transpose_x: x = array_ops.matrix_transpose(x) diag_mat = array_ops.expand_dims(self._diag, -1) return math_ops.sqrt(diag_mat) * x
def _batch_sqrt_solve(self, rhs): diag_mat = array_ops.expand_dims(self._diag, -1) return rhs / math_ops.sqrt(diag_mat)
def _sqrt_to_dense(self): return array_ops.matrix_diag(math_ops.sqrt(self._diag))
def _std(self): return math_ops.sqrt(self.alpha) / self.beta
def _variance(self): scale = self.alpha_sum * math_ops.sqrt(1. + self.alpha_sum) alpha = self.alpha / scale outer_prod = -math_ops.batch_matmul( array_ops.expand_dims(alpha, dim=-1), # column array_ops.expand_dims(alpha, dim=-2)) # row return array_ops.matrix_set_diag(outer_prod, alpha * (self.alpha_sum / scale - alpha))
def _variance(self): x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense() d = array_ops.expand_dims(array_ops.matrix_diag_part(x), -1) v = math_ops.square(x) + math_ops.matmul(d, d, adjoint_b=True) if self.cholesky_input_output_matrices: return linalg_ops.cholesky(v) return v
def _mode(self): s = self.df - self.dimension - 1. s = math_ops.select( math_ops.less(s, 0.), constant_op.constant(float("NaN"), dtype=self.dtype, name="nan"), s) if self.cholesky_input_output_matrices: return math_ops.sqrt(s) * self.scale_operator_pd.sqrt_to_dense() return s * self.scale_operator_pd.to_dense()
def _sample_n(self, n, seed=None): # The sampling method comes from the well known fact that if X ~ Normal(0, # 1), and Z ~ Chi2(df), then X / sqrt(Z / df) ~ StudentT(df). shape = array_ops.concat(0, ([n], self.batch_shape())) normal_sample = random_ops.random_normal( shape, dtype=self.dtype, seed=seed) half = constant_op.constant(0.5, self.dtype) df = self.df * array_ops.ones(self.batch_shape(), dtype=self.dtype) gamma_sample = random_ops.random_gamma( [n,], half * df, beta=half, dtype=self.dtype, seed=distribution_util.gen_new_seed(seed, salt="student_t")) samples = normal_sample / math_ops.sqrt(gamma_sample / df) return samples * self.sigma + self.mu
def _prob(self, x): y = (x - self.mu) / self.sigma half_df = 0.5 * self.df return (math_ops.exp(math_ops.lgamma(0.5 + half_df) - math_ops.lgamma(half_df)) / (math_ops.sqrt(self.df) * math.sqrt(math.pi) * self.sigma) * math_ops.pow(1. + math_ops.square(y) / self.df, -(0.5 + half_df)))