我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.ops.array_ops.stop_gradient()。
def _extract_argmax_and_embed(embedding, output_projection=None, update_embedding=True): """Get a loop_function that extracts the previous symbol and embeds it. Args: embedding: embedding tensor for symbols. output_projection: None or a pair (W, B). If provided, each fed previous output will first be multiplied by W and added B. update_embedding: Boolean; if False, the gradients will not propagate through the embeddings. Returns: A loop function. """ def loop_function(prev, _): if output_projection is not None: prev = nn_ops.xw_plus_b( prev, output_projection[0], output_projection[1]) prev_symbol = math_ops.argmax(prev, 1) # Note that gradients will not propagate through the second parameter of # embedding_lookup. emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev return loop_function
def get_score_function_with_advantage(advantage_fn=None, name="ScoreFunctionWithAdvantage"): """Score function estimator with advantage function. Args: advantage_fn: callable that takes the `DistributionTensor` and the downstream `loss` and returns a `Tensor` advantage (e.g. `loss - baseline`). name: name to prepend ops with. Returns: Callable score function estimator that takes the `DistributionTensor`, the sampled `value`, and the downstream `loss`, and uses the provided advantage. """ def score_function_with_advantage(dist_tensor, value, loss): with ops.name_scope(name, values=[value, loss]): advantage = advantage_fn(dist_tensor, loss) advantage = array_ops.stop_gradient(advantage) return dist_tensor.distribution.log_prob(value) * advantage return score_function_with_advantage
def loss(self, sample_loss): """Returns the term to add to the surrogate loss. This method is called by `surrogate_loss`. The input `sample_loss` should have already had `stop_gradient` applied to it. This is because the surrogate_loss usually provides a Monte Carlo sample term of the form `differentiable_surrogate * sample_loss` where `sample_loss` is considered constant with respect to the input for purposes of the gradient. Args: sample_loss: `Tensor`, sample loss downstream of this `StochasticTensor`. Returns: Either `None` or a `Tensor`. """ raise NotImplementedError("surrogate_loss not implemented")
def get_score_function_with_advantage(advantage_fn=None, name="ScoreFunctionWithAdvantage"): """Score function estimator with advantage function. Args: advantage_fn: callable that takes the `StochasticTensor` and the downstream `loss` and returns a `Tensor` advantage (e.g. `loss - baseline`). name: name to prepend ops with. Returns: Callable score function estimator that takes the `StochasticTensor`, the sampled `value`, and the downstream `loss`, and uses the provided advantage. """ def score_function_with_advantage(stochastic_tensor, value, loss): with ops.name_scope(name, values=[value, loss]): advantage = advantage_fn(stochastic_tensor, loss) advantage = array_ops.stop_gradient(advantage) return stochastic_tensor.distribution.log_prob(value) * advantage return score_function_with_advantage
def _create_value(self): """Create the value Tensor based on the value type, store as self._value.""" if isinstance(self._value_type, MeanValue): value_tensor = self._dist.mean() elif isinstance(self._value_type, SampleValue): value_tensor = self._dist.sample(self._value_type.shape) else: raise TypeError( "Unrecognized Distribution Value Type: %s", self._value_type) if self._value_type.stop_gradient: # stop_gradient is being enforced by the value type return array_ops.stop_gradient(value_tensor) if isinstance(self._value_type, MeanValue): return value_tensor # Using pathwise-derivative for this one. if self._dist.is_continuous and self._dist.is_reparameterized: return value_tensor # Using pathwise-derivative for this one. else: # Will have to perform some variant of score function # estimation. Call stop_gradient on the sampler just in case we # may accidentally leak some gradient from it. return array_ops.stop_gradient(value_tensor)
def loss(self, final_loss, name="Loss"): # Return a loss based on final_loss and the distribution. Returns # None if pathwise derivatives are supported, if the loss_fn # was explicitly set to None, or if the value type is MeanValue. if self._loss_fn is None: return None if (self._dist.is_continuous and self._dist.is_reparameterized and not self._value_type.stop_gradient): # Can perform pathwise-derivative on this one; no additional loss needed. return None with ops.name_scope(self.name, values=[final_loss]): with ops.name_scope(name): if (self._value_type.stop_gradient or isinstance(self._value_type, SampleValue)): return self._loss_fn(self, self._value, final_loss) elif isinstance(self._value_type, MeanValue): return None # MeanValue generally provides its own gradient else: raise TypeError("Unrecognized Distribution Value Type: %s", self._value_type)
def _argmax_or_mcsearch(embedding, output_projection=None, update_embedding=True, mc_search=False): def loop_function(prev, _): if output_projection is not None: prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1]) if isinstance(mc_search, bool): #tf.multinomial???prev????????? ?-1?????????? prev_symbol = tf.reshape(tf.multinomial(prev, 1), [-1]) if mc_search else math_ops.argmax(prev, 1) else: prev_symbol = tf.cond(mc_search, lambda: tf.reshape(tf.multinomial(prev, 1), [-1]), lambda: tf.argmax(prev, 1)) emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) #??????????? if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev return loop_function
def _extract_argmax_and_embed(embedding, output_projection=None, update_embedding=True): """Get a loop_function that extracts the previous symbol and embeds it. Args: embedding: embedding tensor for symbols. output_projection: None or a pair (W, B). If provided, each fed previous output will first be multiplied by W and added B. update_embedding: Boolean; if False, the gradients will not propagate through the embeddings. Returns: A loop function. """ def loop_function(prev, _): if output_projection is not None: prev = nn_ops.xw_plus_b( prev, output_projection[0], output_projection[1]) prev_symbol = math_ops.argmax(prev, 1) #?????????? # Note that gradients will not propagate through the second parameter of # embedding_lookup. emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) #???????????? if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev return loop_function
def loss(self, final_loss, name="Loss"): # Return a loss based on final_loss and the distribution. Returns # None if pathwise derivatives are supported, if the loss_fn # was explicitly set to None, or if the value type is MeanValue. if self._loss_fn is None: return None if (self._dist.is_continuous and self._dist.reparameterization_type is distribution.FULLY_REPARAMETERIZED and not self._value_type.stop_gradient): # Can perform pathwise-derivative on this one; no additional loss needed. return None with ops.name_scope(self.name, values=[final_loss]): with ops.name_scope(name): if (self._value_type.stop_gradient or isinstance(self._value_type, SampleValue)): return self._loss_fn(self, self._value, final_loss) elif isinstance(self._value_type, MeanValue): return None # MeanValue generally provides its own gradient else: raise TypeError("Unrecognized Distribution Value Type: %s", self._value_type)
def _extract_argmax_and_embed(embedding, output_projection=None, update_embedding=True): """Get a loop_function that extracts the previous symbol and embeds it. Args: embedding: embedding tensor for symbols. output_projection: None or a pair (W, B). If provided, each fed previous output will first be multiplied by W and added B. update_embedding: Boolean; if False, the gradients will not propagate through the embeddings. Returns: A loop function. """ def loop_function(prev, _): if output_projection is not None: prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1]) prev_symbol = math_ops.argmax(prev, 1) # Note that gradients will not propagate through the second parameter of # embedding_lookup. emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev return loop_function
def apply_input_bias_and_extract_argmax_fn_factory(input_bias): """ :param encoder_inputs: list of length equal to the input bucket length of 1-D tensors (of length equal to the batch size) whose elements consist of the token index of each sample in the batch at a given index in the input. :return: """ def fn_factory(embedding, output_projection=None, update_embedding=True): """Get a loop_function that extracts the previous symbol and embeds it. Args: embedding: embedding tensor for symbols. output_projection: None or a pair (W, B). If provided, each fed previous output will first be multiplied by W and added B. update_embedding: Boolean; if False, the gradients will not propagate through the embeddings. Returns: A loop function. """ def loop_function(prev, _): prev = project_and_apply_input_bias(prev, output_projection, input_bias) prev_symbol = math_ops.argmax(prev, 1) # Note that gradients will not propagate through the second # parameter of embedding_lookup. emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev, prev_symbol return loop_function return fn_factory
def _extract_argmax_and_embed(embedding, output_projection=None, update_embedding=True): """Get a loop_function that extracts the previous symbol and embeds it. Args: embedding: embedding tensor for symbols. output_projection: None or a pair (W, B). If provided, each fed previous output will first be multiplied by W and added B. update_embedding: Boolean; if False, the gradients will not propagate through the embeddings. Returns: A loop function. """ def loop_function(prev, _): # decoder outputs thus far. if output_projection is not None: prev = nn_ops.xw_plus_b( prev, output_projection[0], output_projection[1]) prev_symbol = math_ops.argmax(prev, 1) # Note that gradients will not propagate through the second parameter of # embedding_lookup. emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev, prev_symbol return loop_function
def stop_gradient(variables): """Returns `variables` but with zero gradient w.r.t. every other variable. Arguments: variables: List of variables. Returns: The same list of variables. """ return array_ops.stop_gradient(variables) # CONTROL FLOW
def score_function(dist_tensor, value, loss, baseline=None, name="ScoreFunction"): """Score function estimator. Computes the integrand of the score function with a baseline: `p.log_prob(value) * (loss - baseline)`. It will add a `stop_gradient` to the advantage `(loss - baseline)`. Args: dist_tensor: `DistributionTensor` p(x). value: `Tensor` x. Samples from p(x). loss: `Tensor`. baseline: `Tensor` broadcastable to `loss`. name: name to prepend ops with. Returns: `Tensor` `p.log_prob(x) * (loss - b)`. Taking the gradient yields the score function estimator. """ with ops.name_scope(name, values=[value, loss, baseline]): value = ops.convert_to_tensor(value) loss = ops.convert_to_tensor(loss) if baseline is not None: baseline = ops.convert_to_tensor(baseline) advantage = loss - baseline else: advantage = loss advantage = array_ops.stop_gradient(advantage) return dist_tensor.distribution.log_prob(value) * advantage
def stop_gradient(self): """Whether the value should be wrapped in stop_gradient. StochasticTensors must respect this property. """ pass
def __init__(self, stop_gradient=False): self._stop_gradient = stop_gradient
def __init__(self, n=1, stop_gradient=False): """Sample `n` times and concatenate along a new outer dimension. Args: n: A python integer or int32 tensor. The number of samples to take. stop_gradient: If `True`, StochasticTensors' values are wrapped in `stop_gradient`, to avoid backpropagation through. """ self._n = n self._stop_gradient = stop_gradient
def stop_gradient(self): return self._stop_gradient
def __init__(self, n=1, stop_gradient=False): """Sample `n` times and reshape the outer 2 axes so rank does not change. Args: n: A python integer or int32 tensor. The number of samples to take. stop_gradient: If `True`, StochasticTensors' values are wrapped in `stop_gradient`, to avoid backpropagation through. """ self._n = n self._stop_gradient = stop_gradient
def stop_gradient(self): return self._stop_gradient # Keeps track of how a StochasticTensor's value should be accessed. # Used by value_type and get_current_value_type below.
def _logspace_mean(log_values): """Evaluate `Log[E[values]]` in a stable manner. Args: log_values: `Tensor` holding `Log[values]`. Returns: `Tensor` of same `dtype` as `log_values`, reduced across dim 0. `Log[Mean[values]]`. """ # center = Max[Log[values]], with stop-gradient # The center hopefully keep the exponentiated term small. It is cancelled # from the final result, so putting stop gradient on it will not change the # final result. We put stop gradient on to eliminate unnecessary computation. center = array_ops.stop_gradient(_sample_max(log_values)) # centered_values = exp{Log[values] - E[Log[values]]} centered_values = math_ops.exp(log_values - center) # log_mean_of_values = Log[ E[centered_values] ] + center # = Log[ E[exp{log_values - E[log_values]}] ] + center # = Log[E[values]] - E[log_values] + center # = Log[E[values]] log_mean_of_values = math_ops.log(_sample_mean(centered_values)) + center return log_mean_of_values
def score_function(stochastic_tensor, value, loss, baseline=None, name="ScoreFunction"): """Score function estimator. Computes the integrand of the score function with a baseline: `p.log_prob(value) * (loss - baseline)`. It will add a `stop_gradient` to the advantage `(loss - baseline)`. Args: stochastic_tensor: `StochasticTensor` p(x). value: `Tensor` x. Samples from p(x). loss: `Tensor`. baseline: `Tensor` broadcastable to `loss`. name: name to prepend ops with. Returns: `Tensor` `p.log_prob(x) * (loss - b)`. Taking the gradient yields the score function estimator. """ with ops.name_scope(name, values=[value, loss, baseline]): value = ops.convert_to_tensor(value) loss = ops.convert_to_tensor(loss) if baseline is not None: baseline = ops.convert_to_tensor(baseline) advantage = loss - baseline else: advantage = loss advantage = array_ops.stop_gradient(advantage) return stochastic_tensor.distribution.log_prob(value) * advantage
def __init__(self, shape=(), stop_gradient=False): """Sample according to shape. For the given StochasticTensor `st` using this value type, the shape of `st.value()` will match that of `st.distribution.sample(shape)`. Args: shape: A shape tuple or int32 tensor. The sample shape. Default is a scalar: take one sample and do not change the size. stop_gradient: If `True`, StochasticTensors' values are wrapped in `stop_gradient`, to avoid backpropagation through. """ self._shape = shape self._stop_gradient = stop_gradient
def _extract_argmax_and_embed(embedding, output_projection=None, update_embedding=True): def loop_function(prev, _): if output_projection is not None: prev = nn_ops.xw_plus_b( prev, output_projection[0], output_projection[1]) prev_symbol = math_ops.argmax(prev, 1) # Note that gradients will not propagate through the second parameter of # embedding_lookup. emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev return loop_function