我们从Python开源项目中,提取了以下24个代码示例,用于说明如何使用tensorflow.VariableScope()。
def variable_scope(self): """Returns the variable_scope declared by the module. It is valid for library users to access the internal templated variable_scope, but only makes sense to do so after connection. Therefore we raise an error here if the variable_scope is requested before connection. The only case where it does make sense to access the variable_scope before connection is to get the post-uniquification name, which we support using the separate .name property. Returns: variable_scope: `tf.VariableScope` instance of the internal `tf.Template`. Raises: NotConnectedError: If the module is not connected to the Graph. """ self._ensure_is_connected() return self._template.variable_scope
def get_variable_scope_name(value): """Returns the name of the variable scope indicated by the given value. Args: value: String, variable scope, or object with `variable_scope` attribute (e.g., Sonnet module). Returns: The name (a string) of the corresponding variable scope. Raises: ValueError: If `value` does not identify a variable scope. """ # If the object has a "variable_scope" property, use it. value = getattr(value, "variable_scope", value) if isinstance(value, tf.VariableScope): return value.name elif isinstance(value, six.string_types): return value else: raise ValueError("Not a variable scope: {}".format(value))
def get_variables_in_scope(scope, collection=tf.GraphKeys.TRAINABLE_VARIABLES): """Returns a tuple `tf.Variable`s in a scope for a given collection. Args: scope: `tf.VariableScope` or string to retrieve variables from. collection: Collection to restrict query to. By default this is `tf.Graphkeys.TRAINABLE_VARIABLES`, which doesn't include non-trainable variables such as moving averages. Returns: A tuple of `tf.Variable` objects. """ scope_name = get_variable_scope_name(scope) # Escape the name in case it contains any "." characters. Add a closing slash # so we will not search any scopes that have this scope name as a prefix. scope_name = re.escape(scope_name) + "/" return tuple(tf.get_collection(collection, scope_name))
def _set_scope(self, scope): """Set the given scope as the scope of the layer. If not already present, set the scope for the layer. The name of such scope will be accessible through the `self.scope` property. Argsuments: scope: the given scope, of type `str` of `tf.VariableScope`. If `None`, the one returned from the `self._default_scope()` method will be used. """ if self._scope is None: if self._reuse: self._scope = next(tf.variable_scope( # pylint: disable=I0011,E1101 scope if scope is not None else self._default_scope()).gen) else: self._scope = next(tf.variable_scope( # pylint: disable=I0011,E1101 scope, default_name=self._default_scope().name).gen)
def __call__(self, values, weights=None, scope=None): """Computes the streaming average. This method builds the fragment of computational graph that computes the streaming average, returnins a variable representing the actual streaming average value and an `Op` to update such value. Arguments: values: a `Tensor` of arbitrary dimensions. weights: pptional `Tensor` whose rank is either `0`, or the same rank as values, and must be broadcastable to values (i.e., all dimensions must be either `1`, or the same as the corresponding values dimension). It contains the weights for summing up all the elements in `values`. scope: a `str` or a `tf.VariableScope` used for building the fragment of the computational graph that computes the streaming average. Returns: mean: a `Tensor` representing the current mean, which is a reference to `self.value`. update_op: an `Op` that updates the streaming value, which is a reference to `self.update_op`. """ self.compute(values, weights=weights, scope=scope) return self.value, self.update_op
def get_variables_in_scope(scope, collection=tf.GraphKeys.TRAINABLE_VARIABLES): """Returns a tuple `tf.Variable`s in a scope for a given collection. Args: scope: `tf.VariableScope` instance to retrieve variables from. collection: Collection to restrict query to. By default this is `tf.Graphkeys.TRAINABLE_VARIABLES`, which doesn't include non-trainable variables such as moving averages. Returns: A tuple of `tf.Variable` objects. """ # Escape the name in case it contains any "." characters. Add a closing slash # so we will not search any scopes that have this scope name as a prefix. scope_name = re.escape(scope.name) + "/" return tuple(tf.get_collection(collection, scope_name))
def __init__(self, input_type=None, output_type=None, name_or_scope=None): """Creates the layer. Args: input_type: A type. output_type: A type. name_or_scope: A string or variable scope. If a string, a new variable scope will be created by calling [`create_variable_scope`](#create_variable_scope), with defaults inherited from the current variable scope. If no caching device is set, it will be set to `lambda op: op.device`. This is because `tf.while` can be very inefficient if the variables it uses are not cached locally. """ if name_or_scope is None: name_or_scope = type(self).__name__ if isinstance(name_or_scope, tf.VariableScope): self._vscope = name_or_scope name = str(self._vscope.name) elif isinstance(name_or_scope, six.string_types): self._vscope = create_variable_scope(name_or_scope) name = name_or_scope else: raise TypeError('name_or_scope must be a tf.VariableScope or a string: ' '%s' % (name_or_scope,)) if self._vscope.caching_device is None: self._vscope.set_caching_device(lambda op: op.device) super(Layer, self).__init__(input_type, output_type, name) if not hasattr(self, '_constructor_name'): self._constructor_name = '__.%s' % self.__class__.__name__ if not hasattr(self, '_constructor_args'): self._constructor_args = None if not hasattr(self, '_constructor_kwargs'): self._constructor_kwargs = None
def __init__(self, states, inner_size, trainable=True, scope=None): """Initiailzes a new instance of the BahdanauAttention class. The attention mechanism implemented in this class is the one described by Bahdanau et al. here: https://arxiv.org/abs/1409.0473. The attention states and the query are projected to the attention inner size, then summed together and processed with a tanh and finally dot producted with an attention vector. All the operations are performed on a reference size, named as the attention size, which must be set during the initialization phase (with the `size` argument). Arguments: states: 3-D Tensor of shape [batch, timesteps, state] representing the states on which the attention scores will be computed; the third dimension of the tensor must be statically determined. inner_size: int representing the inner attention size; trainable: if True, variables will be trainable; scope: None, str or tf.VariableScope representing the variable scope of the layer which will be used to create all the needed variables. Raises: ValueError: if the last dimension of the `state` argument is not statically determined. """ super(BahdanauAttention, self).__init__(trainable=trainable, scope=scope) self._states = states self._size = inner_size self._memory = None self._vector = None self._var_op_names = set() # check that the last dimension of the `states` # variable is fully defined. state_size = states.get_shape()[-1].value if state_size is None: raise ValueError('Last dimension of `states` must be defined, found %s' % str(tf.shape(states))) self._state_size = state_size
def __init__(self, shortlist_size, decoder_out_size, state_size, trainable=True, scope='PointingSoftmaxOutput'): """Initializes a new instance. Arguments: shorlist_size: a `int` representing the dimension of the known output vocabulary. decoder_out_size: a `int` representing the output size of the recoder. state_size: a `int` representing the size of the attention states. trainable: if `True`, the created variables will be trainable. scope: VariableScope for the created subgraph;. """ super(PointingSoftmaxOutput, self).__init__(trainable=trainable, scope=scope) self._shortlist_size = shortlist_size self._decoder_out_size = decoder_out_size self._state_size = state_size
def as_scope(scope): """Get the proper variable scope. Given an object that can represent a `tf.VariableScope`, namely a `str` or a `tf.VariableScope`, performs type checking and return a proper `tf.VariableScope` object. Such function is hancy when a function accepts an argument serving as a variable scope but doesn's know its proper type. Arguments: scope: a `str` or a `tf.VariableScope` representing a variable scope. Returns: a `tf.VariableScope` instance. Raises: ValueError: if `scope` is `None`. TypeError: if `scope` is neither `str` or `tf.VariableScope`. Example: ```python from dket import utils def do_something(scope): scope = utils.as_scope(scope or 'DefaultScope') with tf.variable_scope(scope) as scope: # do something pass
""" if scope is None: raise ValueError('Cannot create a scope from a None.') if isinstance(scope, str): return next(tf.variable_scope(scope).gen) # pylint: disable=I0011,E1101 if isinstance(scope, tf.VariableScope): return scope raise TypeError("""`scope` argument can be of type str, """ """tf.VariableScope, while %s found.""", (str(type(scope))))
```
def get_variables(prefix=None): """Get variables by their name prefix. Arguments: prefix: a `str` or a `tf.VariableScope` instance. Returns: a list of `tf.Variable` with their name starting with the given prefix, i.e. all those variables under the scope specified by the prefix. """ prefix = prefix or tf.get_variable_scope().name return [var for var in tf.global_variables() if var.name.startswith(prefix)]
def __init__(self, subnet, name=None, scope=None): """Create the Shared operator. Use this as: f = Shared(Cr(100, 3)) g = f | f | f Ordinarily, you do not need to provide either a name or a scope. Providing a name is useful if you want a well-defined namespace for the variables (e.g., for saving a subnet). Args: subnet: Definition of the shared network. name: Optional name for the shared context. scope: Optional shared scope (must be a Scope, not a string). Raises: ValueError: Scope is not of type tf.Scope, name is not of type string, or both scope and name are given together. """ if scope is not None and not isinstance(scope, tf.VariableScope): raise ValueError("scope must be None or a VariableScope") if name is not None and not isinstance(scope, str): raise ValueError("name must be None or a string") if scope is not None and name is not None: raise ValueError("cannot provide both a name and a scope") if name is None: name = "Shared_%d" % Shared.shared_number Shared.shared_number += 1 self.subnet = subnet self.name = name self.scope = scope
def build_graph(self, graph): np.random.seed(self.random_seed) with graph.as_default(): tf.set_random_seed(self.random_seed) # Dims: bs x num_steps x state_size self.inputs = tf.placeholder(tf.float32, shape=[None, None, self.policy_params['nb_inputs']], name='inputs') input_shape = tf.shape(self.inputs) dynamic_batch_size, dynamic_num_steps = input_shape[0], input_shape[1] policy_scope = tf.VariableScope(reuse=False, name='Policy') with tf.variable_scope(policy_scope): policy_inputs = tf.reshape(self.inputs, [-1, self.policy_params['nb_inputs']]) probs, actions = capacities.policy(self.policy_params, policy_inputs) self.probs = tf.reshape(probs, [dynamic_batch_size, dynamic_num_steps, self.policy_params['nb_outputs']]) self.actions = tf.reshape(actions, [dynamic_batch_size, dynamic_num_steps, 1]) self.action_t = self.actions[0, 0, 0] with tf.variable_scope('Training'): self.rewards = tf.placeholder(tf.float32, shape=[None, None, 1], name="reward") self.mask_plh = tf.placeholder(tf.float32, shape=[None, None, 1], name="mask_plh") baseline = tf.reduce_mean(self.rewards) batch_size, num_steps = tf.shape(self.actions)[0], tf.shape(self.actions)[1] line_indices = tf.matmul( # Line indice tf.reshape(tf.range(0, batch_size), [-1, 1]) , tf.ones([1, num_steps], dtype=tf.int32) ) column_indices = tf.matmul( # Column indice tf.ones([batch_size, 1], dtype=tf.int32) , tf.reshape(tf.range(0, num_steps), [1, -1]) ) depth_indices = tf.cast(tf.squeeze(self.actions, 2), tf.int32) stacked_actions = tf.stack( [line_indices, column_indices, depth_indices], 2 ) log_probs = tf.expand_dims(tf.log(tf.gather_nd(self.probs, stacked_actions)), 2) # We want to average on sequence self.loss = tf.reduce_mean( - tf.reduce_sum((log_probs * (self.rewards - baseline)) * self.mask_plh, 1)) adam = tf.train.AdamOptimizer(self.lr) self.global_step = tf.Variable(0, trainable=False, name="global_step", collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES]) self.train_op = adam.minimize(self.loss, global_step=self.global_step) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('av_score', self.score_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh") q_scope = tf.VariableScope(reuse=False, name='QValues') with tf.variable_scope(q_scope): self.Qs = tf.get_variable('Qs' , shape=[self.nb_state, self.action_space.n] , initializer=tf.constant_initializer(self.initial_q_value) , dtype=tf.float32 ) tf.summary.histogram('Qarray', self.Qs) self.q_preds_t = tf.gather(self.Qs, self.inputs_plh) policy_scope = tf.VariableScope(reuse=False, name='Policy') with tf.variable_scope(policy_scope): self.actions_t, self.probs_t = capacities.tabular_eps_greedy( self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps ) self.action_t = self.actions_t[0] self.q_value_t = self.q_preds_t[0][self.action_t] learning_scope = tf.VariableScope(reuse=False, name='TDLearning') with tf.variable_scope(learning_scope): self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh") self.targets_plh = tf.placeholder(tf.float32, shape=[None], name="targets_plh") self.loss, self.train_op = capacities.tabular_learning_with_lr( self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_plh ) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh") q_scope = tf.VariableScope(reuse=False, name='QValues') with tf.variable_scope(q_scope): self.Qs = tf.get_variable('Qs' , shape=[self.nb_state, self.action_space.n] , initializer=tf.constant_initializer(self.initial_q_value) , dtype=tf.float32 ) tf.summary.histogram('Qarray', self.Qs) self.q_preds_t = tf.gather(self.Qs, self.inputs_plh) fixed_q_scope = tf.VariableScope(reuse=False, name='FixedQValues') with tf.variable_scope(fixed_q_scope): self.update_fixed_vars_op = capacities.fix_scope(q_scope) policy_scope = tf.VariableScope(reuse=False, name='Policy') with tf.variable_scope(policy_scope): if 'UCB' in self.config and self.config['UCB']: self.actions_t, self.probs_t = capacities.tabular_UCB( self.Qs, self.inputs_plh ) else: self.actions_t, self.probs_t = capacities.tabular_eps_greedy( self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps ) self.action_t = self.actions_t[0] self.q_value_t = self.q_preds_t[0][self.action_t] # Experienced replay part with tf.variable_scope('Learning'): with tf.variable_scope(fixed_q_scope, reuse=True): fixed_Qs = tf.get_variable('Qs') self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh") self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh") # Note that we use the fixed Qs to create the targets self.targets_t = capacities.get_q_learning_target(fixed_Qs, self.rewards_plh, self.next_states_plh, self.discount) self.loss, self.train_op = capacities.tabular_learning_with_lr( self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t ) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id") self.event_count, self.inc_event_count_op = capacities.counter("event_count") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh") q_scope = tf.VariableScope(reuse=False, name='QValues') with tf.variable_scope(q_scope): self.Qs = tf.get_variable('Qs' , shape=[self.nb_state, self.action_space.n] , initializer=tf.constant_initializer(self.initial_q_value) , dtype=tf.float32 ) tf.summary.histogram('Qarray', self.Qs) self.q_preds_t = tf.gather(self.Qs, self.inputs_plh) policy_scope = tf.VariableScope(reuse=False, name='Policy') with tf.variable_scope(policy_scope): if 'UCB' in self.config and self.config['UCB']: self.actions_t, self.probs_t = capacities.tabular_UCB( self.Qs, self.inputs_plh ) else: self.actions_t, self.probs_t = capacities.tabular_eps_greedy( self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps ) self.action_t = self.actions_t[0] self.q_value_t = self.q_preds_t[0][self.action_t] learning_scope = tf.VariableScope(reuse=False, name='Learning') with tf.variable_scope(learning_scope): self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh") self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh") self.next_probs_plh = tf.placeholder(tf.float32, shape=[None, self.action_space.n], name="next_probs_plh") self.targets_t = capacities.get_expected_sarsa_target(self.Qs, self.rewards_plh, self.next_states_plh, self.next_probs_plh, self.discount) self.loss, self.train_op = capacities.tabular_learning_with_lr( self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t ) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.N0_t = tf.constant(self.N0, tf.float32, name='N_0') self.N = tf.Variable(0., dtype=tf.float32, name='N', trainable=False) self.min_eps_t = tf.constant(self.min_eps, tf.float32, name='min_eps') self.inputs = tf.placeholder(tf.float32, shape=[None, self.q_params['nb_inputs']], name='inputs') q_scope = tf.VariableScope(reuse=False, name='QValues') with tf.variable_scope(q_scope): self.q_values = tf.squeeze(capacities.value_f(self.q_params, self.inputs)) self.action_t = capacities.eps_greedy( self.inputs, self.q_values, self.env.action_space.n, self.N0, self.min_eps ) self.q_t = self.q_values[self.action_t] with tf.variable_scope('Training'): self.reward = tf.placeholder(tf.float32, shape=[], name="reward") self.next_state = tf.placeholder(tf.float32, shape=[1, self.q_params['nb_inputs']], name="nextState") self.next_action = tf.placeholder(tf.int32, shape=[], name="nextAction") with tf.variable_scope(q_scope, reuse=True): next_q_values = tf.squeeze(capacities.value_f(self.q_params, self.next_state)) target_q1 = tf.stop_gradient(self.reward + self.discount * next_q_values[self.next_action]) target_q2 = self.reward is_done = tf.cast(self.next_state[0, 4], tf.bool) target_q = tf.where(is_done, target_q2, target_q1) with tf.control_dependencies([target_q]): self.loss = 1/2 * tf.square(target_q - self.q_t) adam = tf.train.AdamOptimizer(self.lr) self.global_step = tf.Variable(0, trainable=False, name="global_step", collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES]) self.train_op = adam.minimize(self.loss, global_step=self.global_step) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh") q_scope = tf.VariableScope(reuse=False, name='QValues') with tf.variable_scope(q_scope): self.Qs = tf.get_variable('Qs' , shape=[self.nb_state, self.action_space.n] , initializer=tf.constant_initializer(self.initial_q_value) , dtype=tf.float32 ) tf.summary.histogram('Qarray', self.Qs) self.q_preds_t = tf.gather(self.Qs, self.inputs_plh) policy_scope = tf.VariableScope(reuse=False, name='Policy') with tf.variable_scope(policy_scope): if 'UCB' in self.config and self.config['UCB']: self.actions_t, self.probs_t = capacities.tabular_UCB( self.Qs, self.inputs_plh ) else: self.actions_t, self.probs_t = capacities.tabular_eps_greedy( self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps ) self.action_t = self.actions_t[0] self.q_value_t = self.q_preds_t[0][self.action_t] learning_scope = tf.VariableScope(reuse=False, name='Learning') with tf.variable_scope(learning_scope): self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh") self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh") self.targets_t = capacities.get_q_learning_target(self.Qs, self.rewards_plh, self.next_states_plh, self.discount) self.loss, self.train_op = capacities.tabular_learning_with_lr( self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t ) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh") q_scope = tf.VariableScope(reuse=False, name='QValues') with tf.variable_scope(q_scope): self.Qs = tf.get_variable('Qs' , shape=[self.nb_state, self.action_space.n] , initializer=tf.constant_initializer(self.initial_q_value) , dtype=tf.float32 ) tf.summary.histogram('Qarray', self.Qs) self.q_preds_t = tf.gather(self.Qs, self.inputs_plh) policy_scope = tf.VariableScope(reuse=False, name='Policy') with tf.variable_scope(policy_scope): if 'UCB' in self.config and self.config['UCB']: self.actions_t, self.probs_t = capacities.tabular_UCB( self.Qs, self.inputs_plh ) else: self.actions_t, self.probs_t = capacities.tabular_eps_greedy( self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps ) self.action_t = self.actions_t[0] self.q_value_t = self.q_preds_t[0][self.action_t] learning_scope = tf.VariableScope(reuse=False, name='Learning') with tf.variable_scope(learning_scope): self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh") self.targets_t = capacities.get_mc_target(self.rewards_plh, self.discount) self.loss, self.train_op = capacities.tabular_learning( self.Qs, self.inputs_plh, self.actions_t, self.targets_t ) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh") q_scope = tf.VariableScope(reuse=False, name='QValues') with tf.variable_scope(q_scope): self.Qs = tf.get_variable('Qs' , shape=[self.nb_state, self.action_space.n] , initializer=tf.constant_initializer(self.initial_q_value) , dtype=tf.float32 ) tf.summary.histogram('Qarray', self.Qs) self.q_preds_t = tf.gather(self.Qs, self.inputs_plh) policy_scope = tf.VariableScope(reuse=False, name='Policy') with tf.variable_scope(policy_scope): if 'UCB' in self.config and self.config['UCB']: self.actions_t, self.probs_t = capacities.tabular_UCB( self.Qs, self.inputs_plh ) else: self.actions_t, self.probs_t = capacities.tabular_eps_greedy( self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps ) self.action_t = self.actions_t[0] self.q_value_t = self.q_preds_t[0, self.action_t] learning_scope = tf.VariableScope(reuse=False, name='Learning') with tf.variable_scope(learning_scope): self.targets_t = tf.placeholder(tf.float32, shape=[None], name="targets_t") self.loss, self.train_op = capacities.tabular_learning_with_lr( self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t ) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh") q_scope = tf.VariableScope(reuse=False, name='QValues') with tf.variable_scope(q_scope): self.Qs = tf.get_variable('Qs' , shape=[self.nb_state, self.action_space.n] , initializer=tf.constant_initializer(self.initial_q_value) , dtype=tf.float32 ) tf.summary.histogram('Qarray', self.Qs) self.q_preds_t = tf.gather(self.Qs, self.inputs_plh) policy_scope = tf.VariableScope(reuse=False, name='Policy') with tf.variable_scope(policy_scope): if 'UCB' in self.config and self.config['UCB']: self.actions_t, self.probs_t = capacities.tabular_UCB( self.Qs, self.inputs_plh ) else: self.actions_t, self.probs_t = capacities.tabular_eps_greedy( self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps ) self.action_t = self.actions_t[0] self.q_value_t = self.q_preds_t[0][self.action_t] self.episode_id, self.inc_ep_id_op = capacities.counter("episode_id") learning_scope = tf.VariableScope(reuse=False, name='Learning') with tf.variable_scope(learning_scope): self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh") self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh") self.next_actions_plh = tf.placeholder(tf.int32, shape=[None], name="next_actions_plh") self.next_probs_plh = tf.placeholder(tf.float32, shape=[None, self.action_space.n], name="next_probs_plh") sigma = tf.train.inverse_time_decay(tf.constant(1., dtype=tf.float32), self.episode_id, decay_steps=100, decay_rate=0.1) tf.summary.scalar('sigma', sigma) self.targets_t = capacities.get_sigma_target(self.Qs, sigma, self.rewards_plh, self.next_states_plh, self.next_actions_plh, self.next_probs_plh, self.discount) self.loss, self.train_op = capacities.tabular_learning_with_lr( self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t ) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph