我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.ops.math_ops.square()。
def _gini(self, class_counts): """Calculate the Gini impurity. If c(i) denotes the i-th class count and c = sum_i c(i) then score = 1 - sum_i ( c(i) / c )^2 Args: class_counts: A 2-D tensor of per-class counts, usually a slice or gather from variables.node_sums. Returns: A 1-D tensor of the Gini impurities for each row in the input. """ smoothed = 1.0 + array_ops.slice(class_counts, [0, 1], [-1, -1]) sums = math_ops.reduce_sum(smoothed, 1) sum_squares = math_ops.reduce_sum(math_ops.square(smoothed), 1) return 1.0 - sum_squares / (sums * sums)
def _weighted_gini(self, class_counts): """Our split score is the Gini impurity times the number of examples. If c(i) denotes the i-th class count and c = sum_i c(i) then score = c * (1 - sum_i ( c(i) / c )^2 ) = c - sum_i c(i)^2 / c Args: class_counts: A 2-D tensor of per-class counts, usually a slice or gather from variables.node_sums. Returns: A 1-D tensor of the Gini impurities for each row in the input. """ smoothed = 1.0 + array_ops.slice(class_counts, [0, 1], [-1, -1]) sums = math_ops.reduce_sum(smoothed, 1) sum_squares = math_ops.reduce_sum(math_ops.square(smoothed), 1) return sums - sum_squares / sums
def _variance(self, sums, squares): """Calculate the variance for each row of the input tensors. Variance is V = E[x^2] - (E[x])^2. Args: sums: A tensor containing output sums, usually a slice from variables.node_sums. Should contain the number of examples seen in index 0 so we can calculate expected value. squares: Same as sums, but sums of squares. Returns: A 1-D tensor of the variances for each row in the input. """ total_count = array_ops.slice(sums, [0, 0], [-1, 1]) e_x = sums / total_count e_x2 = squares / total_count return math_ops.reduce_sum(e_x2 - math_ops.square(e_x), 1)
def var(x, axis=None, keepdims=False): """Variance of a tensor, alongside the specified axis. Arguments: x: A tensor or variable. axis: An integer, the axis to compute the variance. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. Returns: A tensor with the variance of elements of `x`. """ axis = _normalize_axis(axis, ndim(x)) if x.dtype.base_dtype == dtypes_module.bool: x = math_ops.cast(x, floatx()) m = math_ops.reduce_mean(x, reduction_indices=axis, keep_dims=True) devs_squared = math_ops.square(x - m) return math_ops.reduce_mean( devs_squared, reduction_indices=axis, keep_dims=keepdims)
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators self.updates = [] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) self.updates.append(K.update_add(self.iterations, 1)) for p, g, a in zip(params, grads, accumulators): new_a = a + K.square(g) # update accumulator self.updates.append(K.update(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def _log_ndtr_lower(x, series_order): """Asymptotic expansion version of `Log[cdf(x)]`, apppropriate for `x<<-1`.""" x_2 = math_ops.square(x) # Log of the term multiplying (1 + sum) log_scale = -0.5 * x_2 - math_ops.log(-x) - 0.5 * math.log(2. * math.pi) # Compute the summation. even_sum = 0. odd_sum = 0. x_2n = x_2 # Start with x^{2*1} = x^{2*n} with n = 1. for n in range(1, series_order + 1): if n % 2: odd_sum -= _double_factorial(2 * n - 1) / x_2n else: even_sum += _double_factorial(2 * n - 1) / x_2n x_2n *= x_2 return log_scale + math_ops.log(1. + even_sum + odd_sum)
def _kl_normal_normal(n_a, n_b, name=None): """Calculate the batched KL divergence KL(n_a || n_b) with n_a and n_b Normal. Args: n_a: instance of a Normal distribution object. n_b: instance of a Normal distribution object. name: (optional) Name to use for created operations. default is "kl_normal_normal". Returns: Batchwise KL(n_a || n_b) """ with ops.name_scope(name, "kl_normal_normal", [n_a.mu, n_b.mu]): one = constant_op.constant(1, dtype=n_a.dtype) two = constant_op.constant(2, dtype=n_a.dtype) half = constant_op.constant(0.5, dtype=n_a.dtype) s_a_squared = math_ops.square(n_a.sigma) s_b_squared = math_ops.square(n_b.sigma) ratio = s_a_squared / s_b_squared return (math_ops.square(n_a.mu - n_b.mu) / (two * s_b_squared) + half * (ratio - one - math_ops.log(ratio)))
def _iqfov_via_sqrt_solve(self, x): """Get the inverse quadratic form on vectors via a sqrt_solve.""" # x^{-1} A^{-1} x = || S^{-1}x ||^2, # where S is a square root of A (A = SS^T). # Steps: # 1. Convert x to a matrix, flipping all extra dimensions in `x` to the # final dimension of x_matrix. x_matrix = flip_vector_to_matrix( x, self.batch_shape(), self.get_batch_shape()) # 2. Get soln_matrix = S^{-1} x_matrix soln_matrix = self.sqrt_solve(x_matrix) # 3. Reshape back to a vector. soln = flip_matrix_to_vector( soln_matrix, extract_batch_shape(x, 1), x.get_shape()[:-1]) # 4. L2 (batch) vector norm squared. result = math_ops.reduce_sum( math_ops.square(soln), reduction_indices=[-1]) result.set_shape(x.get_shape()[:-1]) return result
def _variance(self): var = (self._ones() * math_ops.square(self.sigma) * self.df / (self.df - 2)) # When 1 < df <= 2, variance is infinite. inf = np.array(np.inf, dtype=self.dtype.as_numpy_dtype()) result_where_defined = math_ops.select( math_ops.greater(self.df, array_ops.fill(self.batch_shape(), 2.)), var, array_ops.fill(self.batch_shape(), inf, name="inf")) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return math_ops.select( math_ops.greater(self.df, self._ones()), result_where_defined, array_ops.fill(self.batch_shape(), nan, name="nan")) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.df, message="variance not defined for components of df <= 1"), ], result_where_defined)
def _adaptive_max_norm(norm, std_factor, decay, global_step, epsilon, name): """Find max_norm given norm and previous average.""" with vs.variable_scope(name, "AdaptiveMaxNorm", [norm]): log_norm = math_ops.log(norm + epsilon) def moving_average(name, value, decay): moving_average_variable = vs.get_variable( name, shape=value.get_shape(), dtype=value.dtype, initializer=init_ops.zeros_initializer, trainable=False) return moving_averages.assign_moving_average( moving_average_variable, value, decay, zero_debias=False) # quicker adaptation at the beginning if global_step is not None: n = math_ops.to_float(global_step) decay = math_ops.minimum(decay, n / (n + 1.)) # update averages mean = moving_average("mean", log_norm, decay) sq_mean = moving_average("sq_mean", math_ops.square(log_norm), decay) variance = sq_mean - math_ops.square(mean) std = math_ops.sqrt(math_ops.maximum(epsilon, variance)) max_norms = math_ops.exp(mean + std_factor*std) return max_norms, mean
def mean_squared_error(weights=1.0, name='MeanSquaredError', scope=None, collect=True): """Computes Mean Square Loss. Args: weights: Coefficients for the loss a `scalar`. scope: scope to add the op to. name: name of the op. collect: add to losses collection. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If `predictions` shape doesn't match `labels` shape, or `weights` is `None`. """ def inner_loss(y_true, y_pred): losses = math_ops.square(math_ops.subtract(y_pred, y_true)) return losses return built_loss(inner_loss, weights, name, scope, collect)
def testExplicitStochasticTensors(self): with self.test_session() as sess: mu = constant_op.constant([0.0, 0.1, 0.2]) sigma = constant_op.constant([1.1, 1.2, 1.3]) with st.value_type(st.SampleValue()): dt1 = st.StochasticTensor(NormalNotParam(loc=mu, scale=sigma)) dt2 = st.StochasticTensor(NormalNotParam(loc=mu, scale=sigma)) loss = math_ops.square(array_ops.identity(dt1)) + 10. + dt2 sl_all = sg.surrogate_loss([loss]) sl_dt1 = sg.surrogate_loss([loss], stochastic_tensors=[dt1]) sl_dt2 = sg.surrogate_loss([loss], stochastic_tensors=[dt2]) dt1_term = dt1.distribution.log_prob(dt1) * loss dt2_term = dt2.distribution.log_prob(dt2) * loss self.assertAllClose(*sess.run( [sl_all, sum([loss, dt1_term, dt2_term])])) self.assertAllClose(*sess.run([sl_dt1, sum([loss, dt1_term])])) self.assertAllClose(*sess.run([sl_dt2, sum([loss, dt2_term])]))
def testTraversesControlInputs(self): dt1 = st.StochasticTensor(distributions.Normal(loc=0., scale=1.)) logits = dt1.value() * 3. dt2 = st.StochasticTensor(distributions.Bernoulli(logits=logits)) dt3 = st.StochasticTensor(distributions.Normal(loc=0., scale=1.)) x = dt3.value() y = array_ops.ones((2, 2)) * 4. z = array_ops.ones((2, 2)) * 3. out = control_flow_ops.cond( math_ops.cast(dt2, dtypes.bool), lambda: math_ops.add(x, y), lambda: math_ops.square(z)) out += 5. dep_map = sg._stochastic_dependencies_map([out]) self.assertEqual(dep_map[dt1], set([out])) self.assertEqual(dep_map[dt2], set([out])) self.assertEqual(dep_map[dt3], set([out]))
def test_normal_distribution_second_moment_estimated_correctly(self): # Test the importance sampled estimate against an analytical result. n = int(1e6) with self.test_session(): mu_p = constant_op.constant([0.0, 0.0], dtype=dtypes.float64) mu_q = constant_op.constant([-1.0, 1.0], dtype=dtypes.float64) sigma_p = constant_op.constant([1.0, 2 / 3.], dtype=dtypes.float64) sigma_q = constant_op.constant([1.0, 1.0], dtype=dtypes.float64) p = distributions.Normal(loc=mu_p, scale=sigma_p) q = distributions.Normal(loc=mu_q, scale=sigma_q) # Compute E_p[X^2]. # Should equal [1, (2/3)^2] log_e_x2 = monte_carlo.expectation_importance_sampler_logspace( log_f=lambda x: math_ops.log(math_ops.square(x)), log_p=p.log_prob, sampling_dist_q=q, n=n, seed=42) e_x2 = math_ops.exp(log_e_x2) # Relative tolerance (rtol) chosen 2 times as large as minimim needed to # pass. self.assertEqual(p.get_batch_shape(), e_x2.get_shape()) self.assertAllClose([1., (2 / 3.)**2], e_x2.eval(), rtol=0.02)
def testKnownRankUnknownDimsSucceeds(self): height, width = 2, 3 for dim in range(3): placeholder_value = np.ones((height, width, 3)) shape = [height, width, 3] del shape[dim] expected = np.ones(shape) image = array_ops.placeholder(dtypes.float32, (None, None, 3)) output = _layers.unit_norm(image, dim=dim, epsilon=1e-6) norms = math_ops.sqrt( math_ops.reduce_sum( math_ops.square(output), reduction_indices=dim)) with self.test_session(): actual = norms.eval({image: placeholder_value}) self.assertAllClose(expected, actual, 1e-4, 1e-4) # TODO(b/28426988): Add separate tests for non-legacy versions.
def _covariance(x, diag): """Defines the covariance operation of a matrix. Args: x: a matrix Tensor. Dimension 0 should contain the number of examples. diag: if True, it computes the diagonal covariance. Returns: A Tensor representing the covariance of x. In the case of diagonal matrix just the diagonal is returned. """ num_points = math_ops.to_float(array_ops.shape(x)[0]) x -= math_ops.reduce_mean(x, 0, keep_dims=True) if diag: cov = math_ops.reduce_sum( math_ops.square(x), 0, keep_dims=True) / (num_points - 1) else: cov = math_ops.matmul(x, x, transpose_a=True) / (num_points - 1) return cov
def _define_full_covariance_probs(self, shard_id, shard): """Defines the full covariance probabilties per example in a class. Updates a matrix with dimension num_examples X num_classes. Args: shard_id: id of the current shard. shard: current data shard, 1 X num_examples X dimensions. """ diff = shard - self._means cholesky = linalg_ops.cholesky(self._covs + self._min_var) log_det_covs = 2.0 * math_ops.reduce_sum( math_ops.log(array_ops.matrix_diag_part(cholesky)), 1) x_mu_cov = math_ops.square( linalg_ops.matrix_triangular_solve( cholesky, array_ops.transpose( diff, perm=[0, 2, 1]), lower=True)) diag_m = array_ops.transpose(math_ops.reduce_sum(x_mu_cov, 1)) self._probs[shard_id] = -0.5 * (diag_m + math_ops.to_float(self._dimensions) * math_ops.log(2 * np.pi) + log_det_covs)
def _define_diag_covariance_probs(self, shard_id, shard): """Defines the diagonal covariance probabilities per example in a class. Args: shard_id: id of the current shard. shard: current data shard, 1 X num_examples X dimensions. Returns a matrix num_examples * num_classes. """ # num_classes X 1 # TODO(xavigonzalvo): look into alternatives to log for # reparametrization of variance parameters. det_expanded = math_ops.reduce_sum( math_ops.log(self._covs + 1e-3), 1, keep_dims=True) diff = shard - self._means x2 = math_ops.square(diff) cov_expanded = array_ops.expand_dims(1.0 / (self._covs + 1e-3), 2) # num_classes X num_examples x2_cov = math_ops.matmul(x2, cov_expanded) x2_cov = array_ops.transpose(array_ops.squeeze(x2_cov, [2])) self._probs[shard_id] = -0.5 * ( math_ops.to_float(self._dimensions) * math_ops.log(2.0 * np.pi) + array_ops.transpose(det_expanded) + x2_cov)
def _kl_normal_normal(n_a, n_b, name=None): """Calculate the batched KL divergence KL(n_a || n_b) with n_a and n_b Normal. Args: n_a: instance of a Normal distribution object. n_b: instance of a Normal distribution object. name: (optional) Name to use for created operations. default is "kl_normal_normal". Returns: Batchwise KL(n_a || n_b) """ with ops.name_scope(name, "kl_normal_normal", [n_a.loc, n_b.loc]): one = constant_op.constant(1, dtype=n_a.dtype) two = constant_op.constant(2, dtype=n_a.dtype) half = constant_op.constant(0.5, dtype=n_a.dtype) s_a_squared = math_ops.square(n_a.scale) s_b_squared = math_ops.square(n_b.scale) ratio = s_a_squared / s_b_squared return (math_ops.square(n_a.loc - n_b.loc) / (two * s_b_squared) + half * (ratio - one - math_ops.log(ratio)))
def sqrt_log_abs_det(self, name="sqrt_log_det"): """Log absolute value determinant of the sqrt `S` for every batch member. In most cases, this will be the same as `sqrt_log_det`, but for certain operators defined by a square root, this might be implemented slightly differently. Args: name: A name scope to use for ops added by this method. Returns: Logarithm of absolute value determinant of the square root `S` for every batch member. """ with ops.name_scope(self.name): with ops.name_scope(name, values=self.inputs): return self._dispatch_based_on_batch( self._batch_sqrt_log_abs_det, self._sqrt_log_abs_det)
def test_optimize(self): scalar = variables.Variable(random_ops.random_normal([]), 'scalar') vector = variables.Variable(random_ops.random_normal([2]), 'vector') matrix = variables.Variable(random_ops.random_normal([2, 3]), 'matrix') minimum_location = constant_op.constant(np.arange(9), dtype=dtypes.float32) loss = math_ops.reduce_sum(math_ops.square(vector - minimum_location[:2])) / 2. loss += math_ops.reduce_sum(math_ops.square(scalar - minimum_location[ 2])) / 2. loss += math_ops.reduce_sum( math_ops.square(matrix - array_ops.reshape(minimum_location[3:], [2, 3]))) / 2. optimizer = MockOptimizerInterface(loss) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) optimizer.minimize(sess) self.assertAllClose(np.arange(2), sess.run(vector)) self.assertAllClose(np.arange(1) + 2, sess.run(scalar)) self.assertAllClose(np.arange(6).reshape(2, 3) + 3, sess.run(matrix))
def test_nonlinear_programming(self): vector_initial_value = [7., 7.] vector = variables.Variable(vector_initial_value, 'vector') # Make norm as small as possible. loss = math_ops.reduce_sum(math_ops.square(vector)) # Ensure y = 1. equalities = [vector[1] - 1.] # Ensure x >= 1. Thus optimum should be at (1, 1). inequalities = [vector[0] - 1.] optimizer = external_optimizer.ScipyOptimizerInterface( loss, equalities=equalities, inequalities=inequalities, method='SLSQP') with self.test_session() as sess: sess.run(variables.global_variables_initializer()) optimizer.minimize(sess) self.assertAllClose(np.ones(2), sess.run(vector))
def testNumericOps(self): for dtype in self.numeric_types: self._testUnary( math_ops.abs, np.array([[2, -1]], dtype=dtype), expected=np.array([[2, 1]], dtype=dtype)) self._testUnary( math_ops.negative, np.array([[-1, 1]], dtype=dtype), expected=np.array([[1, -1]], dtype=dtype)) self._testUnary( math_ops.square, np.array([[-2, 3]], dtype=dtype), expected=np.array([[4, 9]], dtype=dtype)) self._testUnary( array_ops.zeros_like, np.array([[4, 3], [2, 1]], dtype=dtype), expected=np.array([[0, 0], [0, 0]], dtype=dtype))
def square(x): """Element-wise square. Arguments: x: Tensor or variable. Returns: A tensor. """ return math_ops.square(x)
def sqrt(x): """Element-wise square root. Arguments: x: Tensor or variable. Returns: A tensor. """ zero = _to_tensor(0., x.dtype.base_dtype) inf = _to_tensor(np.inf, x.dtype.base_dtype) x = clip_ops.clip_by_value(x, zero, inf) return math_ops.sqrt(x)
def __call__(self, w): norms = K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True)) desired = K.clip(norms, 0, self.max_value) w *= (desired / (K.epsilon() + norms)) return w
def __call__(self, w): return w / ( K.epsilon() + K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True)))
def __call__(self, shape, dtype=None): if len(shape) != 2 or shape[0] != shape[1]: raise ValueError('Identity matrix initializer can only be used ' 'for 2D square matrices.') else: return self.gain * np.identity(shape[0])
def mean_squared_error(y_true, y_pred): return K.mean(K.square(y_pred - y_true), axis=-1)
def mean_squared_logarithmic_error(y_true, y_pred): first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.) second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.) return K.mean(K.square(first_log - second_log), axis=-1)
def squared_hinge(y_true, y_pred): return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)), axis=-1)
def get_gradients(self, loss, params): grads = K.gradients(loss, params) if hasattr(self, 'clipnorm') and self.clipnorm > 0: norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) grads = [clip_norm(g, self.clipnorm, norm) for g in grads] if hasattr(self, 'clipvalue') and self.clipvalue > 0: grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads] return grads
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] delta_accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators + delta_accumulators self.updates = [] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) self.updates.append(K.update_add(self.iterations, 1)) for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * K.square(g) self.updates.append(K.update(a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon) new_p = p - lr * update # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update) self.updates.append(K.update(d_a, new_d_a)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) t = self.iterations + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) shapes = [K.int_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def __call__(self, x): regularization = 0. if self.l1: regularization += K.sum(self.l1 * K.abs(x)) if self.l2: regularization += K.sum(self.l2 * K.square(x)) return regularization
def abs_smooth_2(x): """Smoothed absolute function. Useful to compute an L1 smooth error. Define as: x^2 / 2 if abs(x) < 1 abs(x) - 0.5 if abs(x) > 1 an implementation that strictly stick to the formula """ absx = tf.abs(x) r = array_ops.where(absx < 1, math_ops.square(x)/2.0, absx-0.5) return r
def sum_of_squares(predictions, targets, weight=1.0, scope=None): """Adds a Sum-of-Squares loss to the training procedure. `weight` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weight` is a tensor of size [batch_size], then the total loss for each sample of the batch is rescaled by the corresponding element in the `weight` vector. If the shape of `weight` matches the shape of `predictions`, then the loss of each measurable element of `predictions` is scaled by the corresponding value of `weight`. Args: predictions: The predicted outputs. targets: The ground truth output tensor, same dimensions as 'predictions'. weight: Coefficients for the loss a scalar, a tensor of shape [batch_size] or a tensor whose shape matches `predictions`. scope: The scope for the operations performed in computing the loss. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If the shape of `predictions` doesn't match that of `targets` or if the shape of `weight` is invalid. """ with ops.name_scope(scope, "sum_of_squares_loss", [predictions, targets]) as scope: predictions.get_shape().assert_is_compatible_with(targets.get_shape()) if weight is None: raise ValueError("`weight` cannot be None") predictions = math_ops.to_float(predictions) targets = math_ops.to_float(targets) losses = math_ops.square(math_ops.sub(predictions, targets)) return compute_weighted_loss(losses, weight)
def _l2_loss(self, l2): """Computes the (un-normalized) l2 loss of the model.""" with name_scope('sdca/l2_loss'): sums = [] for name in ['sparse_features_weights', 'dense_features_weights']: for weights in self._convert_n_to_tensor(self._variables[name]): with ops.device(weights.device): sums.append( math_ops.reduce_sum( math_ops.square(math_ops.cast(weights, dtypes.float64)))) sum = math_ops.add_n(sums) # SDCA L2 regularization cost is: l2 * sum(weights^2) / 2 return l2 * sum / 2.0
def unit_norm(inputs, dim, epsilon=1e-7, scope=None): """Normalizes the given input across the specified dimension to unit length. Note that the rank of `input` must be known. Args: inputs: A `Tensor` of arbitrary size. dim: The dimension along which the input is normalized. epsilon: A small value to add to the inputs to avoid dividing by zero. scope: Optional scope for variable_scope. Returns: The normalized `Tensor`. Raises: ValueError: If dim is smaller than the number of dimensions in 'inputs'. """ with variable_scope.variable_scope(scope, 'UnitNorm', [inputs]): if not inputs.get_shape(): raise ValueError('The input rank must be known.') input_rank = len(inputs.get_shape().as_list()) if dim < 0 or dim >= input_rank: raise ValueError( 'dim must be positive but smaller than the input rank.') lengths = math_ops.sqrt(epsilon + math_ops.reduce_sum( math_ops.square(inputs), dim, True)) multiples = [] if dim > 0: multiples.append(array_ops.ones([dim], dtypes.int32)) multiples.append(array_ops.slice(array_ops.shape(inputs), [dim], [1])) if dim < (input_rank - 1): multiples.append(array_ops.ones([input_rank - 1 - dim], dtypes.int32)) multiples = array_ops.concat(0, multiples) return math_ops.div(inputs, array_ops.tile(lengths, multiples))
def _variance(self): var = (math_ops.square(self.beta) / (math_ops.square(self.alpha - 1.) * (self.alpha - 2.))) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return math_ops.select( self.alpha > 2., var, array_ops.fill(self.batch_shape(), nan, name="nan")) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( constant_op.constant(2., dtype=self.dtype), self.alpha, message="variance not defined for components of alpha <= 2"), ], var)
def _log_prob(self, x): return (-0.5 * math.log(2. * math.pi) - math_ops.log(self.sigma) -0.5 * math_ops.square(self._z(x)))
def _variance(self): return math_ops.square(self.std())