我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用tensorflow.clip_by_average_norm()。
def __init__(self, nA, learning_rate,decay,grad_clip,entropy_beta, state_shape=[84,84,4], master=None, device_name='/gpu:0', scope_name='master'): with tf.device(device_name) : self.state = tf.placeholder(tf.float32,[None]+state_shape) block, self.scope = ActorCritic._build_shared_block(self.state,scope_name) self.policy, self.log_softmax_policy = ActorCritic._build_policy(block,nA,scope_name) self.value = ActorCritic._build_value(block,scope_name) self.train_vars = sorted(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope.name), key=lambda v:v.name) if( master is not None ) : self.sync_op= self._sync_op(master) self.action = tf.placeholder(tf.int32,[None,]) self.target_value = tf.placeholder(tf.float32,[None,]) advantage = self.target_value - self.value entropy = tf.reduce_sum(-1. * self.policy * self.log_softmax_policy,axis=1) log_p_s_a = tf.reduce_sum(self.log_softmax_policy * tf.one_hot(self.action,nA),axis=1) self.policy_loss = tf.reduce_mean(tf.stop_gradient(advantage)*log_p_s_a) self.entropy_loss = tf.reduce_mean(entropy) self.value_loss = tf.reduce_mean(advantage**2) loss = -self.policy_loss - entropy_beta* self.entropy_loss + self.value_loss self.gradients = tf.gradients(loss,self.train_vars) clipped_gs = [tf.clip_by_average_norm(g,grad_clip) for g in self.gradients] self.train_op = master.optimizer.apply_gradients(zip(clipped_gs,master.train_vars)) else : #self.optimizer = tf.train.AdamOptimizer(learning_rate,beta1=BETA) self.optimizer = tf.train.RMSPropOptimizer(learning_rate,decay=decay,use_locking=True)
def _clip_grads(self, grads): if self.clip_norm_type == 'ignore': return grads elif self.clip_norm_type == 'global': return tf.clip_by_global_norm(grads, self.clip_norm)[0] elif self.clip_norm_type == 'avg': return tf.clip_by_average_norm(grads, self.clip_norm)[0] elif self.clip_norm_type == 'local': return [tf.clip_by_norm(g, self.clip_norm) for g in grads]
def get_gradients(self, loss_or_grads, params): """ Note ---- The returned gradients may contain None value """ # check valid algorithm if self.algorithm is None or \ not hasattr(self.algorithm, 'compute_gradients') or \ not hasattr(self.algorithm, 'apply_gradients'): raise RuntimeError("Optimizer is None, or doesn't has attributes: " "compute_gradients and apply_gradients.") with tf.variable_scope(self.name): # get the gradient grads_var = self.algorithm.compute_gradients(loss_or_grads, var_list=params) grads_var = {g: v for g, v in grads_var if g is not None} grads = list(grads_var.keys()) params = list(grads_var.values()) # ====== clipnorm ====== # if self.clipnorm is not None: if self.clip_alg == 'norm': grads = [tf.clip_by_norm(g, self.clipnorm) for g in grads] elif self.clip_alg == 'total_norm': grads, _ = tf.clip_by_global_norm(grads, self.clipnorm) elif self.clip_alg == 'avg_norm': grads = [tf.clip_by_average_norm(g, self.clipnorm) for g in grads] # ====== clipvalue ====== # if self.clipvalue is not None: grads = [tf.clip_by_value(g, -self.clipvalue, self.clipvalue) for g in grads] # ====== get final norm value ====== # self._norm = add_role(tf.global_norm(grads, name="GradientNorm"), GradientsNorm) return [(g, p) for g, p in zip(grads, params)]
def setup_gradients(self, prefix, opt, cost): grads = opt.compute_gradients(cost) ret_grads = [] ret_names = [] ret_apply = [] for e in grads: grad, var = e if grad is None or var is None: continue #print "var: %s, gradient: %s" % (var, grad) if self.scope != get_scope_name(var.name): continue pname = get_param_name(var.name) gname = '%s/gradient_%s' % (prefix, pname) print "gradient %s -> %s" % (var, gname) # get all gradients ret_grads.append(grad) ret_names.append(gname) pl = tf.placeholder(tf.float32, shape=var.get_shape(), name=gname) clip = tf.clip_by_average_norm(pl, 1) ret_apply.append((clip, var)) ag = tf.summary.histogram('%s/%s/apply_%s'% (self.scope, prefix, gname), clip) self.summary_apply_gradients.append(ag) return ret_grads, ret_names, ret_apply
def _op(self, grad, var): _ = grad _ = tf.clip_by_average_norm(_, self._clip_norm) return _