我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.ops.math_ops.reduce_mean()。
def var(x, axis=None, keepdims=False): """Variance of a tensor, alongside the specified axis. Arguments: x: A tensor or variable. axis: An integer, the axis to compute the variance. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. Returns: A tensor with the variance of elements of `x`. """ axis = _normalize_axis(axis, ndim(x)) if x.dtype.base_dtype == dtypes_module.bool: x = math_ops.cast(x, floatx()) m = math_ops.reduce_mean(x, reduction_indices=axis, keep_dims=True) devs_squared = math_ops.square(x - m) return math_ops.reduce_mean( devs_squared, reduction_indices=axis, keep_dims=keepdims)
def mean(x, axis=None, keepdims=False): """Mean of a tensor, alongside the specified axis. Arguments: x: A tensor or variable. axis: A list of integer. Axes to compute the mean. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1 for each entry in `axis`. If `keep_dims` is `True`, the reduced dimensions are retained with length 1. Returns: A tensor with the mean of elements of `x`. """ axis = _normalize_axis(axis, ndim(x)) if x.dtype.base_dtype == dtypes_module.bool: x = math_ops.cast(x, floatx()) return math_ops.reduce_mean(x, reduction_indices=axis, keep_dims=keepdims)
def _rescale_eval_loss(loss, weights): """Rescales evaluation loss according to the given weights. The rescaling is needed because in the training loss weights are not considered in the denominator, whereas for the evaluation loss we should divide by the sum of weights. The rescaling factor is: R = sum_{i} 1 / sum_{i} w_{i} Args: loss: the scalar weighted loss. weights: weight coefficients. Either a scalar, or a `Tensor` of shape [batch_size]. Returns: The given loss multiplied by the rescaling factor. """ rescaling_factor = math_ops.reduce_mean(weights) return math_ops.div(loss, rescaling_factor)
def loss(self, data, labels): """The loss to minimize while training.""" if self.is_regression: diff = self.training_inference_graph(data) - math_ops.to_float(labels) mean_squared_error = math_ops.reduce_mean(diff * diff) root_mean_squared_error = math_ops.sqrt(mean_squared_error, name="loss") loss = root_mean_squared_error else: loss = math_ops.reduce_mean( nn_ops.sparse_softmax_cross_entropy_with_logits( self.training_inference_graph(data), array_ops.squeeze(math_ops.to_int32(labels))), name="loss") if self.regularizer: loss += layers.apply_regularization(self.regularizer, variables.trainable_variables()) return loss
def _centered_bias_step(centered_bias, logits_dimension, labels, loss_fn): """Creates and returns training op for centered bias.""" if (logits_dimension is None) or (logits_dimension < 1): raise ValueError("Invalid logits_dimension %s." % logits_dimension) with ops.name_scope(None, "centered_bias_step", (labels,)) as name: batch_size = array_ops.shape(labels)[0] logits = array_ops.reshape( array_ops.tile(centered_bias, (batch_size,)), (batch_size, logits_dimension)) with ops.name_scope(None, "centered_bias", (labels, logits)): centered_bias_loss = math_ops.reduce_mean( loss_fn(logits, labels), name="training_loss") # Learn central bias by an optimizer. 0.1 is a convervative lr for a # single variable. return training.AdagradOptimizer(0.1).minimize( centered_bias_loss, var_list=(centered_bias,), name=name)
def _covariance(x, diag): """Defines the covariance operation of a matrix. Args: x: a matrix Tensor. Dimension 0 should contain the number of examples. diag: if True, it computes the diagonal covariance. Returns: A Tensor representing the covariance of x. In the case of diagonal matrix just the diagonal is returned. """ num_points = math_ops.to_float(array_ops.shape(x)[0]) x -= math_ops.reduce_mean(x, 0, keep_dims=True) if diag: cov = math_ops.reduce_sum( math_ops.square(x), 0, keep_dims=True) / (num_points - 1) else: cov = math_ops.matmul(x, x, transpose_a=True) / (num_points - 1) return cov
def average_size(self): """Constructs a TF graph for evaluating the average size of a forest. Returns: The average number of nodes over the trees. """ sizes = [] for i in range(self.params.num_trees): with ops.device(self.device_assigner.get_device(i)): sizes.append(self.trees[i].size()) return math_ops.reduce_mean(array_ops.pack(sizes)) # pylint: disable=unused-argument
def average_impurity(self): """Constructs a TF graph for evaluating the leaf impurity of a forest. Returns: The last op in the graph. """ impurities = [] for i in range(self.params.num_trees): with ops.device(self.device_assigner.get_device(i)): impurities.append(self.trees[i].average_impurity()) return math_ops.reduce_mean(array_ops.pack(impurities))
def _sample_mean(values): """Mean over sample indices. In this module this is always [0].""" return math_ops.reduce_mean(values, reduction_indices=[0])
def training_loss(self, logits, target, features, name="training_loss"): """Returns training loss tensor for this head. Training loss is different from the loss reported on the tensorboard as we should respect the example weights when computing the gradient. L = sum_{i} w_{i} * l_{i} / B where B is the number of examples in the batch, l_{i}, w_{i} are individual losses, and example weight. Args: logits: logits, a float tensor. target: either a tensor for labels or in multihead case, a dict of string to target tensor. features: features dict. name: Op name. Returns: Loss tensor. """ target = target[self.name] if isinstance(target, dict) else target loss_unweighted = self._loss_fn(logits, target) weight_tensor = self.get_weight_tensor(features) if weight_tensor is None: return math_ops.reduce_mean(loss_unweighted, name=name) loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor) return math_ops.reduce_mean(loss_weighted, name=name)
def average_size(self): """Constructs a TF graph for evaluating the average size of a forest. Returns: The average number of nodes over the trees. """ sizes = [] for i in range(self.params.num_trees): with ops.device(self.device_assigner.get_device(i)): sizes.append(self.trees[i].size()) return math_ops.reduce_mean(math_ops.to_float(array_ops.pack(sizes))) # pylint: disable=unused-argument
def _r2(probabilities, targets): if targets.get_shape().ndims == 1: targets = array_ops.expand_dims(targets, -1) y_mean = math_ops.reduce_mean(targets, 0) squares_total = math_ops.reduce_sum(math_ops.square(targets - y_mean), 0) squares_residuals = math_ops.reduce_sum(math_ops.square( targets - probabilities), 0) score = 1 - math_ops.reduce_sum(squares_residuals / squares_total) return metric_ops.streaming_mean(score)
def _do_layer_inference(self, layer, data): # If this is a collection of layers, return the mean of their inference # results. if isinstance(layer, collections.Iterable): return math_ops.reduce_mean( array_ops.pack([l.inference_graph(data) for l in layer]), 0) # If this is a single layer, return its inference result. else: return layer.inference_graph(data)
def get_mean_baseline(ema_decay=0.99, name=None): """ExponentialMovingAverage baseline. Args: ema_decay: decay rate for the ExponentialMovingAverage. name: name for variable scope of the ExponentialMovingAverage. Returns: Callable baseline function that takes the `StochasticTensor` (unused) and the downstream `loss`, and returns an EMA of the loss. """ def mean_baseline(_, loss): with vs.variable_scope(name, default_name="MeanBaseline"): reduced_loss = math_ops.reduce_mean(loss) ema = training.ExponentialMovingAverage(decay=ema_decay) update_op = ema.apply([reduced_loss]) with ops.control_dependencies([update_op]): # Using `identity` causes an op to be added in this context, which # triggers the update. Removing the `identity` means nothing is updated. baseline = array_ops.identity(ema.average(reduced_loss)) return baseline return mean_baseline
def _loss(loss_unweighted, weight, name): """Returns loss.""" if weight is None: loss = math_ops.reduce_mean(loss_unweighted, name=name) return loss, loss else: loss_weighted = _weighted_loss(loss_unweighted, weight) weighted_average_loss = math_ops.div( math_ops.reduce_sum(loss_weighted), math_ops.to_float(math_ops.reduce_sum(weight)), name="weighted_average_loss") loss = math_ops.reduce_mean(loss_weighted, name=name) return loss, weighted_average_loss
def _centered_bias_step(logits_dimension, weight_collection, labels, train_loss_fn): """Creates and returns training op for centered bias.""" centered_bias = ops.get_collection(weight_collection) batch_size = array_ops.shape(labels)[0] logits = array_ops.reshape( array_ops.tile(centered_bias[0], [batch_size]), [batch_size, logits_dimension]) with ops.name_scope(None, "centered_bias", (labels, logits)): centered_bias_loss = math_ops.reduce_mean( train_loss_fn(logits, labels), name="training_loss") # Learn central bias by an optimizer. 0.1 is a convervative lr for a # single variable. return training.AdagradOptimizer(0.1).minimize( centered_bias_loss, var_list=centered_bias)
def loss(self, logits, target, features): """Returns loss tensor for this head. The loss returned is the weighted average. L = sum_{i} w_{i} * l_{i} / sum_{i} w_{i} Args: logits: logits, a float tensor. target: either a tensor for labels or in multihead case, a dict of string to target tensor. features: features dict. Returns: Loss tensor. """ target = target[self.name] if isinstance(target, dict) else target loss_unweighted = self._loss_fn(logits, target) weight_tensor = self.get_weight_tensor(features) if weight_tensor is None: return math_ops.reduce_mean(loss_unweighted, name="loss") loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor) return math_ops.div( math_ops.reduce_sum(loss_weighted), math_ops.to_float(math_ops.reduce_sum(weight_tensor)), name="loss")
def _r2(probabilities, targets, weights=None): if targets.get_shape().ndims == 1: targets = array_ops.expand_dims(targets, -1) targets = math_ops.to_float(targets) y_mean = math_ops.reduce_mean(targets, 0) squares_total = math_ops.reduce_sum(math_ops.square(targets - y_mean), 0) squares_residuals = math_ops.reduce_sum(math_ops.square( targets - probabilities), 0) score = 1 - math_ops.reduce_sum(squares_residuals / squares_total) return metric_ops.streaming_mean(score, weights=weights)
def accuracy(predictions, labels, weights=None): """Computes the percentage of times that predictions matches labels. Args: predictions: the predicted values, a `Tensor` whose dtype and shape matches 'labels'. labels: the ground truth values, a `Tensor` of any shape and bool, integer, or string dtype. weights: None or `Tensor` of float values to reweight the accuracy. Returns: Accuracy `Tensor`. Raises: ValueError: if dtypes don't match or if dtype is not bool, integer, or string. """ if not (labels.dtype.is_integer or labels.dtype in (dtypes.bool, dtypes.string)): raise ValueError( 'Labels should have bool, integer, or string dtype, not %r' % labels.dtype) if not labels.dtype.is_compatible_with(predictions.dtype): raise ValueError('Dtypes of predictions and labels should match. ' 'Given: predictions (%r) and labels (%r)' % (predictions.dtype, labels.dtype)) with ops.name_scope('accuracy', values=[predictions, labels]): is_correct = math_ops.cast( math_ops.equal(predictions, labels), dtypes.float32) if weights is not None: is_correct = math_ops.mul(is_correct, weights) num_values = math_ops.mul(weights, array_ops.ones_like(is_correct)) return math_ops.div(math_ops.reduce_sum(is_correct), math_ops.reduce_sum(num_values)) return math_ops.reduce_mean(is_correct)
def get_mean_baseline(ema_decay=0.99, name=None): """ExponentialMovingAverage baseline. Args: ema_decay: decay rate for the ExponentialMovingAverage. name: name for variable scope of the ExponentialMovingAverage. Returns: Callable baseline function that takes the `StochasticTensor` (unused) and the downstream `loss`, and returns an EMA of the loss. """ def mean_baseline(_, loss): with vs.variable_scope(name, default_name="MeanBaseline"): reduced_loss = math_ops.reduce_mean(loss) ema = training.ExponentialMovingAverage(decay=ema_decay, zero_debias=True) update_op = ema.apply([reduced_loss]) with ops.control_dependencies([update_op]): # Using `identity` causes an op to be added in this context, which # triggers the update. Removing the `identity` means nothing is updated. baseline = array_ops.identity(ema.average(reduced_loss)) return baseline return mean_baseline
def test_fitting_two_dimensional_normal_n_equals_1000(self): # Minmizing Renyi divergence should allow us to make one normal match # another one exactly. n = 1000 mu_true = np.array([1.0, -1.0], dtype=np.float64) chol_true = np.array([[2.0, 0.0], [0.5, 1.0]], dtype=np.float64) with self.test_session() as sess: target = distributions.MultivariateNormalCholesky(mu_true, chol_true) # Set up q distribution by defining mean/covariance as Variables mu = variables.Variable( np.zeros(mu_true.shape), dtype=mu_true.dtype, name='mu') mat = variables.Variable( np.zeros(chol_true.shape), dtype=chol_true.dtype, name='mat') chol = distributions.matrix_diag_transform(mat, transform=nn_ops.softplus) q = distributions.MultivariateNormalCholesky(mu, chol) for alpha in [0.25, 0.75]: negative_renyi_divergence = entropy.renyi_ratio( log_p=target.log_prob, q=q, n=n, alpha=alpha, seed=0) train_op = get_train_op( math_ops.reduce_mean(-negative_renyi_divergence), optimizer='SGD', learning_rate=0.5, decay=0.1) variables.global_variables_initializer().run() renyis = [] for step in range(1000): sess.run(train_op) if step in [1, 5, 100]: renyis.append(negative_renyi_divergence.eval()) # This optimization should maximize the renyi divergence. _assert_monotonic_increasing(renyis, atol=0) # Relative tolerance (rtol) chosen 2 times as large as minimim needed to # pass. self.assertAllClose(target.mu.eval(), q.mu.eval(), rtol=0.06) self.assertAllClose(target.sigma.eval(), q.sigma.eval(), rtol=0.02)
def testAddExternalLoss(self): logits = constant_op.constant([[1.2, 0.4, -1.0, -1.1]]) labels = constant_op.constant([[1.0, 0.0, 0.0, 1.0]]) losses = loss_ops.hinge_loss(logits, labels) self.assertFalse(loss_ops.get_losses()) loss_ops.add_loss(math_ops.reduce_mean(losses)) self.assertTrue(loss_ops.get_losses()) total_loss = loss_ops.get_total_loss() with self.test_session(): self.assertAllClose(losses.eval(), [[0.0, 1.4, 0.0, 2.1]], atol=1e-3) self.assertAllClose(total_loss.eval(), 3.5 / 4.0, atol=1e-3)
def testNoneLossCollection(self): logits = constant_op.constant([[1.2, 0.4, -1.0, -1.1]]) labels = constant_op.constant([[1.0, 0.0, 0.0, 1.0]]) losses = loss_ops.hinge_loss(logits, labels) self.assertFalse(loss_ops.get_losses()) loss_ops.add_loss(math_ops.reduce_mean(losses), loss_collection=None) self.assertFalse(loss_ops.get_losses()) with self.test_session(): self.assertAllClose(losses.eval(), [[0.0, 1.4, 0.0, 2.1]], atol=1e-3)
def testDropout(self): height, width = 10, 10 with self.test_session() as sess: images = random_ops.random_uniform( (5, height, width, 3), seed=1, name='images') num_elem_initial = math_ops.reduce_mean(math_ops.to_float(images > 0)) output = _layers.dropout(images) num_elem = math_ops.reduce_mean(math_ops.to_float(output > 0)) sess.run(variables_lib.global_variables_initializer()) num_elem, num_elem_initial = sess.run([num_elem, num_elem_initial]) self.assertLess(num_elem, num_elem_initial / 2 + 0.1) self.assertGreater(num_elem, num_elem_initial / 2 - 0.1)
def testCreateDropoutNoTraining(self): height, width = 3, 3 with self.test_session() as sess: images = random_ops.random_uniform( (5, height, width, 3), seed=1, name='images') num_elem_initial = math_ops.reduce_mean(math_ops.to_float(images > 0)) output = _layers.dropout(images, is_training=False) num_elem = math_ops.reduce_mean(math_ops.to_float(output > 0)) sess.run(variables_lib.global_variables_initializer()) num_elem, num_elem_initial = sess.run([num_elem, num_elem_initial]) self.assertEqual(num_elem, num_elem_initial) outputs, inputs = sess.run([output, images]) self.assertAllClose(outputs, inputs)
def testCreateFCFollowByDropout(self): height, width = 3, 3 with self.test_session() as sess: images = random_ops.random_uniform( (5, height, width, 3), seed=1, name='images') output = _layers.fully_connected(images, 50) num_elem_initial = math_ops.reduce_mean(math_ops.to_float(output > 0)) output = _layers.dropout(output) num_elem = math_ops.reduce_mean(math_ops.to_float(output > 0)) sess.run(variables_lib.global_variables_initializer()) num_elem, num_elem_initial = sess.run([num_elem, num_elem_initial]) self.assertLess(num_elem, num_elem_initial / 2 + 0.1) self.assertGreater(num_elem, num_elem_initial / 2 - 0.1)
def loss(self, logits, target, features): """Returns loss tensor for this head. The loss returned is the weighted average. L = sum_{i} w_{i} * l_{i} / sum_{i} w_{i} Args: logits: logits, a float tensor. target: either a tensor for labels or in multihead case, a dict of string to target tensor. features: features dict. Returns: Loss tensor. """ target = target[self.name] if isinstance(target, dict) else target loss_unweighted = self._loss_fn(logits, target) weight_tensor = self.get_weight_tensor(features) if weight_tensor is None: return math_ops.reduce_mean(loss_unweighted, name="loss") loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor) return math_ops.div(math_ops.reduce_sum(loss_weighted), math_ops.to_float(math_ops.reduce_sum(weight_tensor)), name="loss")
def accuracy(predictions, labels, weights=None): """Computes the percentage of times that predictions matches labels. Args: predictions: the predicted values, a `Tensor` whose dtype and shape matches 'labels'. labels: the ground truth values, a `Tensor` of any shape and bool, integer, or string dtype. weights: None or `Tensor` of float values to reweight the accuracy. Returns: Accuracy `Tensor`. Raises: ValueError: if dtypes don't match or if dtype is not bool, integer, or string. """ if not (labels.dtype.is_integer or labels.dtype in (dtypes.bool, dtypes.string)): raise ValueError( 'Labels should have bool, integer, or string dtype, not %r' % labels.dtype) if not labels.dtype.is_compatible_with(predictions.dtype): raise ValueError('Dtypes of predictions and labels should match. ' 'Given: predictions (%r) and labels (%r)' % (predictions.dtype, labels.dtype)) with ops.name_scope('accuracy', values=[predictions, labels]): is_correct = math_ops.cast( math_ops.equal(predictions, labels), dtypes.float32) if weights is not None: is_correct = math_ops.multiply(is_correct, weights) num_values = math_ops.multiply(weights, array_ops.ones_like(is_correct)) return math_ops.div(math_ops.reduce_sum(is_correct), math_ops.reduce_sum(num_values)) return math_ops.reduce_mean(is_correct)
def test_name(self): actual_lt = ops.reduce_mean(self.original_lt, {'channel'}) self.assertIn('lt_reduce_mean', actual_lt.name)
def testGammaGammaKL(self): alpha0 = np.array([3.]) beta0 = np.array([1., 2., 3., 1.5, 2.5, 3.5]) alpha1 = np.array([0.4]) beta1 = np.array([0.5, 1., 1.5, 2., 2.5, 3.]) # Build graph. with self.test_session() as sess: g0 = gamma_lib.Gamma(alpha=alpha0, beta=beta0) g1 = gamma_lib.Gamma(alpha=alpha1, beta=beta1) x = g0.sample(int(1e4), seed=0) kl_sample = math_ops.reduce_mean(g0.log_prob(x) - g1.log_prob(x), 0) kl_actual = kullback_leibler.kl(g0, g1) # Execute graph. [kl_sample_, kl_actual_] = sess.run([kl_sample, kl_actual]) kl_expected = ((alpha0 - alpha1) * special.digamma(alpha0) + special.gammaln(alpha1) - special.gammaln(alpha0) + alpha1 * np.log(beta0) - alpha1 * np.log(beta1) + alpha0 * (beta1 / beta0 - 1.)) self.assertEqual(beta0.shape, kl_actual.get_shape()) self.assertAllClose(kl_expected, kl_actual_, atol=0., rtol=1e-6) self.assertAllClose(kl_sample_, kl_actual_, atol=0., rtol=1e-2)
def testCovarianceFromSampling(self): alpha = np.array([[1., 2, 3], [2.5, 4, 0.01]], dtype=np.float32) with self.test_session() as sess: dist = dirichlet_lib.Dirichlet(alpha) # batch_shape=[2], event_shape=[3] x = dist.sample(int(250e3), seed=1) sample_mean = math_ops.reduce_mean(x, 0) x_centered = x - sample_mean[None, ...] sample_cov = math_ops.reduce_mean(math_ops.matmul( x_centered[..., None], x_centered[..., None, :]), 0) sample_var = array_ops.matrix_diag_part(sample_cov) sample_stddev = math_ops.sqrt(sample_var) [ sample_mean_, sample_cov_, sample_var_, sample_stddev_, analytic_mean, analytic_cov, analytic_var, analytic_stddev, ] = sess.run([ sample_mean, sample_cov, sample_var, sample_stddev, dist.mean(), dist.covariance(), dist.variance(), dist.stddev(), ]) self.assertAllClose(sample_mean_, analytic_mean, atol=0., rtol=0.04) self.assertAllClose(sample_cov_, analytic_cov, atol=0., rtol=0.06) self.assertAllClose(sample_var_, analytic_var, atol=0., rtol=0.03) self.assertAllClose(sample_stddev_, analytic_stddev, atol=0., rtol=0.02)
def testCovarianceFromSampling(self): # We will test mean, cov, var, stddev on a DirichletMultinomial constructed # via broadcast between alpha, n. alpha = np.array([[1., 2, 3], [2.5, 4, 0.01]], dtype=np.float32) # Ideally we'd be able to test broadcasting but, the multinomial sampler # doesn't support different total counts. n = np.float32(5) with self.test_session() as sess: # batch_shape=[2], event_shape=[3] dist = ds.DirichletMultinomial(n, alpha) x = dist.sample(int(250e3), seed=1) sample_mean = math_ops.reduce_mean(x, 0) x_centered = x - sample_mean[None, ...] sample_cov = math_ops.reduce_mean(math_ops.matmul( x_centered[..., None], x_centered[..., None, :]), 0) sample_var = array_ops.matrix_diag_part(sample_cov) sample_stddev = math_ops.sqrt(sample_var) [ sample_mean_, sample_cov_, sample_var_, sample_stddev_, analytic_mean, analytic_cov, analytic_var, analytic_stddev, ] = sess.run([ sample_mean, sample_cov, sample_var, sample_stddev, dist.mean(), dist.covariance(), dist.variance(), dist.stddev(), ]) self.assertAllClose(sample_mean_, analytic_mean, atol=0., rtol=0.04) self.assertAllClose(sample_cov_, analytic_cov, atol=0., rtol=0.05) self.assertAllClose(sample_var_, analytic_var, atol=0., rtol=0.03) self.assertAllClose(sample_stddev_, analytic_stddev, atol=0., rtol=0.02)
def testSampleUnbiasedScalarBatch(self): with self.test_session() as sess: dist = ds.DirichletMultinomial( n=5., alpha=2. * self._rng.rand(4).astype(np.float32)) n = int(5e3) x = dist.sample(n, seed=0) sample_mean = math_ops.reduce_mean(x, 0) x_centered = x - sample_mean # Already transposed to [n, 2]. sample_covariance = math_ops.matmul( x_centered, x_centered, adjoint_a=True) / n [ sample_mean_, sample_covariance_, actual_mean_, actual_covariance_, ] = sess.run([ sample_mean, sample_covariance, dist.mean(), dist.covariance(), ]) self.assertAllEqual([4], sample_mean.get_shape()) self.assertAllClose(actual_mean_, sample_mean_, atol=0., rtol=0.05) self.assertAllEqual([4, 4], sample_covariance.get_shape()) self.assertAllClose( actual_covariance_, sample_covariance_, atol=0., rtol=0.15)
def testCovarianceFromSampling(self): # We will test mean, cov, var, stddev on a DirichletMultinomial constructed # via broadcast between alpha, n. theta = np.array([[1., 2, 3], [2.5, 4, 0.01]], dtype=np.float32) theta /= np.sum(theta, 1)[..., None] # Ideally we'd be able to test broadcasting but, the multinomial sampler # doesn't support different total counts. n = np.float32(5) with self.test_session() as sess: dist = ds.Multinomial(n, theta) # batch_shape=[2], event_shape=[3] x = dist.sample(int(250e3), seed=1) sample_mean = math_ops.reduce_mean(x, 0) x_centered = x - sample_mean[None, ...] sample_cov = math_ops.reduce_mean(math_ops.matmul( x_centered[..., None], x_centered[..., None, :]), 0) sample_var = array_ops.matrix_diag_part(sample_cov) sample_stddev = math_ops.sqrt(sample_var) [ sample_mean_, sample_cov_, sample_var_, sample_stddev_, analytic_mean, analytic_cov, analytic_var, analytic_stddev, ] = sess.run([ sample_mean, sample_cov, sample_var, sample_stddev, dist.mean(), dist.covariance(), dist.variance(), dist.stddev(), ]) self.assertAllClose(sample_mean_, analytic_mean, atol=0., rtol=0.01) self.assertAllClose(sample_cov_, analytic_cov, atol=0., rtol=0.01) self.assertAllClose(sample_var_, analytic_var, atol=0., rtol=0.01) self.assertAllClose(sample_stddev_, analytic_stddev, atol=0., rtol=0.01)
def testSampleUnbiasedNonScalarBatch(self): with self.test_session() as sess: dist = ds.Multinomial( total_count=5., logits=math_ops.log(2. * self._rng.rand(4, 3, 2).astype(np.float32))) n = int(3e3) x = dist.sample(n, seed=0) sample_mean = math_ops.reduce_mean(x, 0) # Cyclically rotate event dims left. x_centered = array_ops.transpose(x - sample_mean, [1, 2, 3, 0]) sample_covariance = math_ops.matmul( x_centered, x_centered, adjoint_b=True) / n [ sample_mean_, sample_covariance_, actual_mean_, actual_covariance_, ] = sess.run([ sample_mean, sample_covariance, dist.mean(), dist.covariance(), ]) self.assertAllEqual([4, 3, 2], sample_mean.get_shape()) self.assertAllClose(actual_mean_, sample_mean_, atol=0., rtol=0.07) self.assertAllEqual([4, 3, 2, 2], sample_covariance.get_shape()) self.assertAllClose( actual_covariance_, sample_covariance_, atol=0., rtol=0.10)
def testSampleUnbiasedScalarBatch(self): with self.test_session() as sess: dist = ds.Multinomial( total_count=5., logits=math_ops.log(2. * self._rng.rand(4).astype(np.float32))) n = int(5e3) x = dist.sample(n, seed=0) sample_mean = math_ops.reduce_mean(x, 0) x_centered = x - sample_mean # Already transposed to [n, 2]. sample_covariance = math_ops.matmul( x_centered, x_centered, adjoint_a=True) / n [ sample_mean_, sample_covariance_, actual_mean_, actual_covariance_, ] = sess.run([ sample_mean, sample_covariance, dist.mean(), dist.covariance(), ]) self.assertAllEqual([4], sample_mean.get_shape()) self.assertAllClose(actual_mean_, sample_mean_, atol=0., rtol=0.07) self.assertAllEqual([4, 4], sample_covariance.get_shape()) self.assertAllClose( actual_covariance_, sample_covariance_, atol=0., rtol=0.10)
def pool_as_vector(images, scope=None): """Reduce images to vectors by averaging all pixels.""" with ops.name_scope(scope, "PoolAsVector", [images]): return math_ops.reduce_mean(images, [1, 2])
def average_gradients(tower_grads): """Calculate the mean gradient for each shared variable across all towers. Note ---- This function provides a synchronization point across all towers. Parameters ---------- tower_grads: List of lists of (gradient, variable) tuples. The outer list is over individual gradients. The inner list is over the gradient calculation for each tower. Return ------ List of pairs of (gradient, variable) where the gradient has been averaged across all towers. """ average_grads = [] for grads_and_vars in zip(*tower_grads): # Note that each grads_and_vars looks like the following: # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) # TODO no need for the loop here # grad.append(mean(grad_gpu[0..N]), var_gpu0) grads = [] for g, _ in grads_and_vars: # Add 0 dimension to the gradients to represent the tower. expanded_g = tf.expand_dims(g, 0) # Append on a 'tower' dimension which we will average over below. grads.append(expanded_g) # Average over the 'tower' dimension. grad = tf.concat(axis=0, values=grads) grad = tf.reduce_mean(grad, 0) # Keep in mind that the Variables are redundant because they are shared # across towers. So .. we will just return the first tower's pointer to # the Variable. v = grads_and_vars[0][1] grads_and_vars = (grad, v) average_grads.append(grads_and_vars) return average_grads
def get_mean_baseline(ema_decay=0.99, name=None): """ExponentialMovingAverage baseline. EMA initializes to 0, which introduces a bias. This baseline implements the bias correction term from Adam (section 3 of https://arxiv.org/pdf/1412.6980v8.pdf), dividing by `1 - ema_decay^t`, where `t` is the step count. Args: ema_decay: decay rate for the ExponentialMovingAverage. name: name for variable scope of the ExponentialMovingAverage. Returns: Callable baseline function that takes the `DistributionTensor` (unused) and the downstream `loss`, and returns an EMA of the loss. """ def mean_baseline(_, loss): with vs.variable_scope(name, default_name="MeanBaseline"): reduced_loss = math_ops.reduce_mean(loss) ema = training.ExponentialMovingAverage(decay=ema_decay) update_op = ema.apply([reduced_loss]) # The bias correction term requires keeping track of how many times the # EMA has been updated. Creating a variable here to do so. The global step # is not used because it may or may not track exactly the number of times # the EMA is updated. ema_var = ema.average(reduced_loss) assert ema_var is not None with ops.colocate_with(ema_var): num_updates = vs.get_variable( "local_ema_step", initializer=0, trainable=False) num_updates = num_updates.assign_add(1) bias_correction = 1. - math_ops.pow(ema_decay, math_ops.cast( num_updates, reduced_loss.dtype)) with ops.control_dependencies([update_op]): baseline = ema.average(reduced_loss) / bias_correction return baseline return mean_baseline
def sdca_classifier_model_fn(features, targets, mode, params): """Estimator's linear model_fn.""" feature_columns = params["feature_columns"] optimizer = params["optimizer"] weight_column_name = params["weight_column_name"] loss_type = params["loss_type"] if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer): raise ValueError("Optimizer must be of type SDCAOptimizer") loss_fn = { "logistic_loss": _log_loss_with_two_classes, "hinge_loss": _hinge_loss, }[loss_type] logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=1)) _add_bias_column(feature_columns, features, bias, targets, columns_to_variables) loss = None if mode != estimator.ModeKeys.INFER: loss = math_ops.reduce_mean(loss_fn(logits, targets), name="loss") logging_ops.scalar_summary("loss", loss) train_op = None if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() train_op = optimizer.get_train_step( columns_to_variables, weight_column_name, loss_type, features, targets, global_step) predictions = {} predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, train_op # Ensures consistency with LinearComposableModel.
def weighted_resample(inputs, weights, overall_rate, scope=None, mean_decay=0.999, warmup=10, seed=None): """Performs an approximate weighted resampling of `inputs`. This method chooses elements from `inputs` where each item's rate of selection is proportional to its value in `weights`, and the average rate of selection across all inputs (and many invocations!) is `overall_rate`. Args: inputs: A list of tensors whose first dimension is `batch_size`. weights: A `[batch_size]`-shaped tensor with each batch member's weight. overall_rate: Desired overall rate of resampling. scope: Scope to use for the op. mean_decay: How quickly to decay the running estimate of the mean weight. warmup: Until the resulting tensor has been evaluated `warmup` times, the resampling menthod uses the true mean over all calls as its weight estimate, rather than a decayed mean. seed: Random seed. Returns: A list of tensors exactly like `inputs`, but with an unknown (and possibly zero) first dimension. A tensor containing the effective resampling rate used for each output. """ # Algorithm: Just compute rates as weights/mean_weight * # overall_rate. This way the the average weight corresponds to the # overall rate, and a weight twice the average has twice the rate, # etc. with ops.name_scope(scope, 'weighted_resample', inputs) as opscope: # First: Maintain a running estimated mean weight, with decay # adjusted (by also maintaining an invocation count) during the # warmup period so that at the beginning, there aren't too many # zeros mixed in, throwing the average off. with variable_scope.variable_scope(scope, 'estimate_mean', inputs): count_so_far = variable_scope.get_local_variable( 'resample_count', initializer=0) estimated_mean = variable_scope.get_local_variable( 'estimated_mean', initializer=0.0) count = count_so_far.assign_add(1) real_decay = math_ops.minimum( math_ops.truediv((count - 1), math_ops.minimum(count, warmup)), mean_decay) batch_mean = math_ops.reduce_mean(weights) mean = moving_averages.assign_moving_average( estimated_mean, batch_mean, real_decay, zero_debias=False) # Then, normalize the weights into rates using the mean weight and # overall target rate: rates = weights * overall_rate / mean results = resample_at_rate([rates] + inputs, rates, scope=opscope, seed=seed, back_prop=False) return (results[1:], results[0])
def moments(x, axes, shift=None, name=None, keep_dims=False): """Calculate the mean and variance of `x`. The mean and variance are calculated by aggregating the contents of `x` across `axes`. If `x` is 1-D and `axes = [0]` this is just the mean and variance of a vector. Note: for numerical stability, when shift=None, the true mean would be computed and used as shift. When using these moments for batch normalization (see `tf.nn.batch_normalization`): * for so-called "global normalization", used with convolutional filters with shape `[batch, height, width, depth]`, pass `axes=[0, 1, 2]`. * for simple batch normalization pass `axes=[0]` (batch only). Args: x: A `Tensor`. axes: Array of ints. Axes along which to compute mean and variance. shift: A `Tensor` containing the value by which to shift the data for numerical stability, or `None` in which case the true mean of the data is used as shift. A shift close to the true mean provides the most numerically stable results. name: Name used to scope the operations that compute the moments. keep_dims: produce moments with the same dimensionality as the input. Returns: Two `Tensor` objects: `mean` and `variance`. """ #with ops.name_scope(name, "moments", [x, axes, shift]): if 1: # The dynamic range of fp16 is too limited to support the collection of # sufficient statistics. As a workaround we simply perform the operations # on 32-bit floats before converting the mean and variance back to fp16 y = math_ops.cast(x, dtypes.float32) if x.dtype == dtypes.float16 else x if shift is None: # Compute true mean while keeping the dims for proper broadcasting. shift = array_ops.stop_gradient( math_ops.reduce_mean(y, axes, keep_dims=True)) else: shift = math_ops.cast(shift, y.dtype) counts, m_ss, v_ss, shift = nn.sufficient_statistics( y, axes, shift=shift, keep_dims=keep_dims, name=name+'_statistics') # Reshape shift as needed. shift = array_ops.reshape(shift, array_ops.shape(m_ss)) shift.set_shape(m_ss.get_shape()) with ops.control_dependencies([counts, m_ss, v_ss]): mean, variance = normalize_moments(counts, m_ss, v_ss, shift, name=name) if x.dtype == dtypes.float16: return (math_ops.cast(mean, dtypes.float16), math_ops.cast(variance, dtypes.float16)) else: return (mean, variance)