我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.trainable_variables()。
def __init__(self, sess, checkpoint_dir, log_dir, training_paths, testing_paths, roi, im_size, nclass, batch_size=1, layers=3, features_root=32, conv_size=3, dropout=0.5, testing_gt_available=True, loss_type='cross_entropy', class_weights=None): self.sess = sess self.checkpoint_dir = checkpoint_dir self.log_dir = log_dir self.training_paths = training_paths self.testing_paths = testing_paths self.testing_gt_available = testing_gt_available self.nclass = nclass self.im_size = im_size self.roi = roi # (roi_order, roi_name) self.batch_size = batch_size self.layers = layers self.features_root = features_root self.conv_size = conv_size self.dropout = dropout self.loss_type = loss_type self.class_weights = class_weights self.build_model() self.saver = tf.train.Saver(tf.trainable_variables() + tf.get_collection_ref('bn_collections'))
def create_network(self,state_dim,action_dim,scope): with tf.variable_scope(scope,reuse=False) as s: state_input = tf.placeholder("float",[None,None,state_dim]) # creating the recurrent part lstm_cell=rnn.BasicLSTMCell(LSTM_HIDDEN_UNIT) lstm_output,lstm_state=tf.nn.dynamic_rnn(cell=lstm_cell,inputs=state_input,dtype=tf.float32) W3 = tf.Variable(tf.random_uniform([lstm_cell.state_size,action_dim],-3e-3,3e-3)) b3 = tf.Variable(tf.random_uniform([action_dim],-3e-3,3e-3)) action_output = tf.tanh(tf.matmul(lstm_state,W3) + b3) net = [v for v in tf.trainable_variables() if scope in v.name] return state_input,action_output,net
def get_forward_parameters(vocab_size=4716): t_vars = tf.trainable_variables() h1_vars_weight = [var for var in t_vars if 'hidden_1' in var.name and 'weights' in var.name] h1_vars_biases = [var for var in t_vars if 'hidden_1' in var.name and 'biases' in var.name] h2_vars_weight = [var for var in t_vars if 'hidden_2' in var.name and 'weights' in var.name] h2_vars_biases = [var for var in t_vars if 'hidden_2' in var.name and 'biases' in var.name] o1_vars_weight = [var for var in t_vars if 'output_1' in var.name and 'weights' in var.name] o1_vars_biases = [var for var in t_vars if 'output_1' in var.name and 'biases' in var.name] o2_vars_weight = [var for var in t_vars if 'output_2' in var.name and 'weights' in var.name] o2_vars_biases = [var for var in t_vars if 'output_2' in var.name and 'biases' in var.name] h1_vars_biases = tf.reshape(h1_vars_biases[0],[1,FLAGS.hidden_size_1]) h2_vars_biases = tf.reshape(h2_vars_biases[0],[1,FLAGS.hidden_size_2]) o1_vars_biases = tf.reshape(o1_vars_biases[0],[1,FLAGS.hidden_size_1]) o2_vars_biases = tf.reshape(o2_vars_biases[0],[1,vocab_size]) vars_1 = tf.concat((h1_vars_weight[0],h1_vars_biases),axis=0) vars_2 = tf.concat((h2_vars_weight[0],h2_vars_biases),axis=0) vars_3 = tf.concat((o1_vars_weight[0],o1_vars_biases),axis=0) vars_4 = tf.concat((o2_vars_weight[0],o2_vars_biases),axis=0) return [vars_1,vars_2,vars_3,vars_4]
def build_model2(self): self.weights3, self.biases3 = self.get_en_z_variables() #training Ez self.fake_images = self.generate(self.z, self.y, weights=self.weights1, biases=self.biases1) self.e_z= self.encode_z(self.fake_images, weights=self.weights3, biases=self.biases3) self.loss_z = tf.reduce_mean(tf.square(tf.contrib.layers.flatten(self.e_z - self.z))) t_vars = tf.trainable_variables() self.g_vars = [var for var in t_vars if 'gen' in var.name] self.enz_vars = [var for var in t_vars if 'enz' in var.name] print len(self.g_vars) print len(self.enz_vars) self.saver = tf.train.Saver(self.g_vars) self.saver_z = tf.train.Saver(self.g_vars + self.enz_vars) #Training the Encode_y
def build_model4(self): self.weights3, self.biases3 = self.get_en_z_variables() self.weights4, self.biases4 = self.get_en_y_variables() self.e_z = self.encode_z(self.images, weights=self.weights3, biases=self.biases3) self.e_y = self.encode_y(self.images, weights=self.weights4, biases=self.biases4) #Changing y : + 1 or +2 or +3 self.e_y = tf.one_hot(tf.arg_max(self.e_y, 1) + self.extend_value, 10) self.fake_images = self.generate(self.e_z, self.e_y, weights=self.weights1, biases=self.biases1) t_vars = tf.trainable_variables() self.g_vars = [var for var in t_vars if 'gen' in var.name] self.enz_vars = [var for var in t_vars if 'enz' in var.name] self.eny_vars = [var for var in t_vars if 'eny' in var.name] self.saver = tf.train.Saver(self.g_vars) self.saver_z = tf.train.Saver(self.g_vars + self.enz_vars) self.saver_y = tf.train.Saver(self.eny_vars) #do train
def get_training_tensors(self, learning_rate = 0.001, grad_clip = 5): #----------------------------------------------------------------------- # Build a loss function #----------------------------------------------------------------------- with tf.name_scope('targets-encode'): y_one_hot = tf.one_hot(self.targets, self.n_classes) y_reshaped = tf.reshape(y_one_hot, self.logits.get_shape()) with tf.name_scope('loss'): loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=y_reshaped) loss = tf.reduce_mean(loss) tf.summary.scalar('loss', loss) #----------------------------------------------------------------------- # Build the optimizer #----------------------------------------------------------------------- with tf.name_scope('optimizer'): tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip) train_op = tf.train.AdamOptimizer(learning_rate) optimizer = train_op.apply_gradients(zip(grads, tvars)) return loss, optimizer
def get_optimizer(self, learning_rate = 0.001, grad_clip = 5): #----------------------------------------------------------------------- # Build a loss function #----------------------------------------------------------------------- with tf.variable_scope('loss'): loss = tf.losses.mean_squared_error(self.target, self.output) #----------------------------------------------------------------------- # Build the optimizer #----------------------------------------------------------------------- with tf.variable_scope('optimizer'): tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip) train_op = tf.train.AdamOptimizer(learning_rate) optimizer = train_op.apply_gradients(zip(grads, tvars)) return optimizer, loss
def trainable_variables_on_device(self, rel_device_num, abs_device_num, writable): """Return the set of trainable variables on the specified device. Args: rel_device_num: local worker device index. abs_device_num: global graph device index. writable: whether the returned variables is writable or read-only. Returns: Return the set of trainable variables on the specified device. """ del abs_device_num params_refs = tf.trainable_variables() if writable: return params_refs params = [] for param in params_refs: var_name = param.name.split(':')[0] _, var_get_op = self.variable_mgr.staging_vars_on_devices[rel_device_num][ var_name] params.append(var_get_op) return params
def trainable_variables_on_device(self, rel_device_num, abs_device_num, writable=False): """Return the set of trainable variables on device. Args: rel_device_num: local worker device index. abs_device_num: global graph device index. writable: whether to get a reference to the underlying variable. Returns: The set of trainable variables on the specified device. """ del rel_device_num, writable if self.each_tower_has_variables(): params = [ v for v in tf.trainable_variables() if v.name.startswith('v%s/' % abs_device_num) ] else: params = tf.trainable_variables() return params
def savable_variables(self): """Returns a list/dict of savable variables to pass to tf.train.Saver.""" params = {} for v in tf.global_variables(): assert (v.name.startswith(variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/v0/') or v.name in ('global_step:0', 'loss_scale:0', 'loss_scale_normal_steps:0')), ( 'Invalid global variable: %s' % v) # We store variables in the checkpoint with the shadow variable prefix # removed so we can evaluate checkpoints in non-distributed replicated # mode. The checkpoints can also be loaded for training in # distributed_replicated mode. name = self._strip_port(self._remove_shadow_var_prefix_if_present(v.name)) params[name] = v for v in tf.local_variables(): # Non-trainable variables, such as batch norm moving averages, do not have # corresponding global shadow variables, so we add them here. Trainable # local variables have corresponding global shadow variables, which were # added in the global variable loop above. if v.name.startswith('v0/') and v not in tf.trainable_variables(): params[self._strip_port(v.name)] = v return params
def _optimize(self): ''' NOTE: The author said that there was no need for 100 d_iter per 100 iters. https://github.com/igul222/improved_wgan_training/issues/3 ''' global_step = tf.Variable(0, name='global_step') lr = self.arch['training']['lr'] b1 = self.arch['training']['beta1'] b2 = self.arch['training']['beta2'] optimizer = tf.train.AdamOptimizer(lr, b1, b2) g_vars = tf.trainable_variables() with tf.name_scope('Update'): opt_g = optimizer.minimize(self.loss['G'], var_list=g_vars, global_step=global_step) return { 'g': opt_g, 'global_step': global_step }
def loss(self, l2_lambda=0.0001): # 0.001 with tf.name_scope("loss"): # input: `logits`:[batch_size, num_classes], and `labels`:[batch_size] # output: A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the softmax cross entropy loss. losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y_label,logits=self.logits); # sigmoid_cross_entropy_with_logits.#losses=tf.nn.softmax_cross_entropy_with_logits(labels=self.input_y,logits=self.logits) # print("1.sparse_softmax_cross_entropy_with_logits.losses:",losses) # shape=(?,) loss = tf.reduce_mean(losses) # print("2.loss.loss:", loss) #shape=() l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if ('bias' not in v.name ) and ('alpha' not in v.name)]) * l2_lambda loss = loss + l2_losses return loss #def loss_seq2seq(self): # with tf.variable_scope("loss"): # losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y_label, logits=self.logits);#losses:[batch_size,self.decoder_sent_length] # loss_batch=tf.reduce_sum(losses,axis=1)/self.decoder_sent_length #loss_batch:[batch_size] # loss=tf.reduce_mean(loss_batch) # l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * self.l2_lambda # loss = loss + l2_losses # return loss
def loss_nce(self,l2_lambda=0.0001): #0.0001-->0.001 """calculate loss using (NCE)cross entropy here""" # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels each # time we evaluate the loss. if self.is_training: #training #labels=tf.reshape(self.input_y,[-1]) #[batch_size,1]------>[batch_size,] labels=tf.expand_dims(self.input_y,1) #[batch_size,]----->[batch_size,1] loss = tf.reduce_mean( #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. tf.nn.nce_loss(weights=tf.transpose(self.W_projection),#[hidden_size*2, num_classes]--->[num_classes,hidden_size*2]. nce_weights:A `Tensor` of shape `[num_classes, dim].O.K. biases=self.b_projection, #[label_size]. nce_biases:A `Tensor` of shape `[num_classes]`. labels=labels, #[batch_size,1]. train_labels, # A `Tensor` of type `int64` and shape `[batch_size,num_true]`. The target classes. inputs=self.output_rnn_last,# [batch_size,hidden_size*2] #A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. num_sampled=self.num_sampled, #scalar. 100 num_classes=self.num_classes,partition_strategy="div")) #scalar. 1999 l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * l2_lambda loss = loss + l2_losses return loss
def build_optim(self, loss): global_step = self.global_step learn_rate = self.learn_rate # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = self.average_gradients(self.tower_grads) # Apply the gradients to adjust the shared variables. apply_gradient_op = self.opt.apply_gradients( grads, global_step=global_step) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( 0.999, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) # for m in self.sub_models: # self.log.info(m.device) # self.log.fatal('haha') return train_op
def prepare_trainer(self, generator_loss, discriminator_loss): '''Helper function for init_opt''' all_vars = tf.trainable_variables() g_vars = [var for var in all_vars if var.name.startswith('g_')] d_vars = [var for var in all_vars if var.name.startswith('d_')] generator_opt = tf.train.AdamOptimizer(self.generator_lr, beta1=0.5) self.generator_trainer =\ pt.apply_optimizer(generator_opt, losses=[generator_loss], var_list=g_vars) discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr, beta1=0.5) self.discriminator_trainer =\ pt.apply_optimizer(discriminator_opt, losses=[discriminator_loss], var_list=d_vars) self.log_vars.append(("g_learning_rate", self.generator_lr)) self.log_vars.append(("d_learning_rate", self.discriminator_lr))
def build_model(self, sess): self.init_opt() sess.run(tf.initialize_all_variables()) if len(self.model_path) > 0: print("Reading model parameters from %s" % self.model_path) all_vars = tf.trainable_variables() # all_vars = tf.all_variables() restore_vars = [] for var in all_vars: if var.name.startswith('g_') or var.name.startswith('d_'): restore_vars.append(var) # print(var.name) saver = tf.train.Saver(restore_vars) saver.restore(sess, self.model_path) istart = self.model_path.rfind('_') + 1 iend = self.model_path.rfind('.') counter = self.model_path[istart:iend] counter = int(counter) else: print("Created model with fresh parameters.") counter = 0 return counter
def train(self, loss, global_step): """ Return a training step for the tensorflow graph Args: loss : loss to do sgd on global_step : which step are we at """ opt = tf.train.AdamOptimizer(self.learning_rate) grads = opt.compute_gradients(loss) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage(self.moving_avg_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') return train_op
def load_trainable_vars(sess,filename): """load a .npz archive and assign the value of each loaded ndarray to the trainable variable whose name matches the archive key. Any elements in the archive that do not have a corresponding trainable variable will be returned in a dict. """ other={} try: tv=dict([ (str(v.name),v) for v in tf.trainable_variables() ]) for k,d in np.load(filename).items(): if k in tv: print('restoring ' + k) sess.run(tf.assign( tv[k], d) ) else: other[k] = d except IOError: pass return other
def get_trainable_variables(trainable_scopes): """Returns a list of variables to train. Returns: A list of variables to train by the optimizer. """ if trainable_scopes is None: return tf.trainable_variables() trainable_scopes = [scope.strip() for scope in trainable_scopes] variables_to_train = [] for scope in trainable_scopes: variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) variables_to_train.extend(variables) return variables_to_train
def build_model(self): self.model = classmap[FLAGS.model_type](hidden_size=FLAGS.hidden, vocab_size=self.vocab_size, encoder_in_size=self.data.feats.shape[-1], encoder_in_length=self.data.feats.shape[1], decoder_in_length=self.data.decoder_in.shape[-1] - 1, word2vec_weight=self.w2v_W, embedding_size=FLAGS.embedding_dim, neg_sample_num=self.sample_num, start_id=self.vocab_processor._mapping['<BOS>'], end_id=self.vocab_processor._mapping['<EOS>'], Bk=FLAGS.K) self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.RMSPropOptimizer(FLAGS.lr) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.model.cost, tvars), 5) self.updates = self.optimizer.apply_gradients( zip(grads, tvars), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables())
def get_number_of_trainable_parameters(): """ use default graph """ # https://stackoverflow.com/questions/38160940/ ... LOGGER.debug('Now compute total number of trainable params...') total_parameters = 0 for variable in tf.trainable_variables(): shape = variable.get_shape() name = variable.name variable_parameters = 1 for dim in shape: variable_parameters *= dim.value LOGGER.debug(' layer name = {}, shape = {}, n_params = {}'.format( name, shape, variable_parameters )) total_parameters += variable_parameters LOGGER.debug('Total parameters = %d' % total_parameters) return total_parameters
def testCreateLogisticClassifier(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(0) tf_inputs = tf.constant(self._inputs, dtype=tf.float32) tf_labels = tf.constant(self._labels, dtype=tf.float32) model_fn = LogisticClassifier clone_args = (tf_inputs, tf_labels) deploy_config = model_deploy.DeploymentConfig(num_clones=1) self.assertEqual(slim.get_variables(), []) clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) self.assertEqual(len(slim.get_variables()), 2) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.assertEqual(update_ops, []) optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) total_loss, grads_and_vars = model_deploy.optimize_clones(clones, optimizer) self.assertEqual(len(grads_and_vars), len(tf.trainable_variables())) self.assertEqual(total_loss.op.name, 'total_loss') for g, v in grads_and_vars: self.assertDeviceEqual(g.device, '') self.assertDeviceEqual(v.device, 'CPU:0')
def testCreateSingleclone(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(0) tf_inputs = tf.constant(self._inputs, dtype=tf.float32) tf_labels = tf.constant(self._labels, dtype=tf.float32) model_fn = BatchNormClassifier clone_args = (tf_inputs, tf_labels) deploy_config = model_deploy.DeploymentConfig(num_clones=1) self.assertEqual(slim.get_variables(), []) clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) self.assertEqual(len(slim.get_variables()), 5) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.assertEqual(len(update_ops), 2) optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) total_loss, grads_and_vars = model_deploy.optimize_clones(clones, optimizer) self.assertEqual(len(grads_and_vars), len(tf.trainable_variables())) self.assertEqual(total_loss.op.name, 'total_loss') for g, v in grads_and_vars: self.assertDeviceEqual(g.device, '') self.assertDeviceEqual(v.device, 'CPU:0')
def get_update_op(self, loss, opts, global_step=None, max_gradient_norm=None, freeze_variables=None): if loss is None: return None freeze_variables = freeze_variables or [] # compute gradient only for variables that are not frozen frozen_parameters = [var.name for var in tf.trainable_variables() if any(re.match(var_, var.name) for var_ in freeze_variables)] params = [var for var in tf.trainable_variables() if var.name not in frozen_parameters] self.params = params gradients = tf.gradients(loss, params) if max_gradient_norm: gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm) update_ops = [] for opt in opts: with tf.variable_scope('gradients' if self.name is None else 'gradients_{}'.format(self.name)): update_op = opt.apply_gradients(list(zip(gradients, params)), global_step=global_step) update_ops.append(update_op) return update_ops
def _create_optimizer(self): print('Create optimizer... ') with tf.variable_scope('training'): self.global_step = tf.Variable( 0, dtype=tf.int32, trainable=False, name='global_step') if not self.fw_only: self.optimizer = tf.train.GradientDescentOptimizer(config.LR) trainable_vars = tf.trainable_variables() self.gradient_norms = [] self.train_ops = [] start = time.time() for bucket_id in range(len(config.BUCKETS)): clipped_grads, norm = tf.clip_by_global_norm( tf.gradients(self.losses[bucket_id], trainable_vars), config.MAX_GRAD_NORM) self.gradient_norms.append(norm) self.train_ops.append(self.optimizer.apply_gradients( zip(clipped_grads, trainable_vars), global_step=self.global_step)) print('Creating opt for bucket {:d} took {:.2f} seconds.'.format( bucket_id, time.time() - start)) start = time.time()
def build_model(self): self.inputs = tf.placeholder(tf.float32, [self.batch_size, self.input_size, self.input_size, 3], name='real_images') # self.inputs = tf.placeholder(tf.float32, [None, self.input_size, self.input_size, 3], name='real_images') try: self.up_inputs = tf.image.resize_images(self.inputs, self.image_shape[0], self.image_shape[1], tf.image.ResizeMethod.NEAREST_NEIGHBOR) except ValueError: # newer versions of tensorflow self.up_inputs = tf.image.resize_images(self.inputs, [self.image_shape[0], self.image_shape[1]], tf.image.ResizeMethod.NEAREST_NEIGHBOR) self.images = tf.placeholder(tf.float32, [self.batch_size] + self.image_shape, name='real_images') # self.images = tf.placeholder(tf.float32, [None] + self.image_shape, name='real_images') self.sample_images= tf.placeholder(tf.float32, [self.sample_size] + self.image_shape, name='sample_images') # self.sample_images = tf.placeholder(tf.float32, [None] + self.image_shape, name='sample_images') self.G = self.generator(self.inputs) self.G_sum = tf.image_summary("G", self.G) self.g_loss = tf.reduce_mean(tf.square(self.images-self.G)) self.g_loss_sum = tf.scalar_summary("g_loss", self.g_loss) t_vars = tf.trainable_variables() self.g_vars = [var for var in t_vars if 'g_' in var.name] self.saver = tf.train.Saver()
def _flatgrad(self, loss, var_list): """ A Tensorflow version of John Schulman's `flatgrad` function. It computes the gradients but does NOT apply them (for now). This is only called during the `init` of the TRPO graph, so I think it's OK. Otherwise, wouldn't it be constantly rebuilding the computational graph? Or doing something else? Eh, for now I think it's OK. Params: loss: The loss function we're optimizing, which I assume is always scalar-valued. var_list: The list of variables (from `tf.trainable_variables()`) to take gradients. This should only be for the policynets. Returns: A single flat vector with all gradients concatenated. """ grads = tf.gradients(loss, var_list) return tf.concat([tf.reshape(g, [-1]) for g in grads], axis=0)
def init_optimizer(self): print("setting optimizer..") # Gradients and SGD update operation for training the model trainable_params = tf.trainable_variables() if self.optimizer.lower() == 'adadelta': self.opt = tf.train.AdadeltaOptimizer(learning_rate=self.learning_rate) elif self.optimizer.lower() == 'adam': self.opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate) elif self.optimizer.lower() == 'rmsprop': self.opt = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate) else: self.opt = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate) # Compute gradients of loss w.r.t. all trainable variables gradients = tf.gradients(self.loss, trainable_params) # Clip gradients by a given maximum_gradient_norm clip_gradients, _ = tf.clip_by_global_norm(gradients, self.max_gradient_norm) # Update the model self.updates = self.opt.apply_gradients( zip(clip_gradients, trainable_params), global_step=self.global_step)
def create(config): batch_size = config["batch_size"] x = tf.placeholder(tf.float32, [batch_size, X_DIMS[0], X_DIMS[1], 1], name="x") y = tf.placeholder(tf.float32, [batch_size, Y_DIMS], name="y") hidden = hidden_layers(config, x) output = output_layer(config, hidden) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, y), name="loss") output = tf.nn.softmax(output) correct_prediction = tf.equal(tf.argmax(output,1), tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) variables = tf.trainable_variables() optimizer = tf.train.GradientDescentOptimizer(config['learning_rate']).minimize(loss) set_tensor("x", x) set_tensor("y", y) set_tensor("loss", loss) set_tensor("optimizer", optimizer) set_tensor("accuracy", accuracy)
def _build_train_op(self): """Build training specific ops for the graph.""" self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32) tf.summary.scalar('learning_rate', self.lrn_rate) trainable_variables = tf.trainable_variables() grads = tf.gradients(self.cost, trainable_variables) if self.hps.optimizer == 'sgd': optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate) elif self.hps.optimizer == 'mom': optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9) apply_op = optimizer.apply_gradients( zip(grads, trainable_variables), global_step=self.global_step, name='train_step') train_ops = [apply_op] + self._extra_train_ops self.train_op = tf.group(*train_ops) # TODO(xpan): Consider batch_norm in contrib/layers/python/layers/layers.py
def summarize_variables(train_vars=None, summary_collection="tflearn_summ"): """ summarize_variables. Arguemnts: train_vars: list of `Variable`. The variable weights to monitor. summary_collection: A collection to add this summary to and also used for returning a merged summary over all its elements. Default: 'tflearn_summ'. Returns: `Tensor`. Merge of all summary in 'summary_collection' """ if not train_vars: train_vars = tf.trainable_variables() summaries.add_trainable_vars_summary(train_vars, "", "", summary_collection) return merge_summary(tf.get_collection(summary_collection))
def testUsage(self): with tf.variable_scope("", custom_getter=snt.custom_getters.stop_gradient): lin1 = snt.Linear(10, name="linear1") x = tf.placeholder(tf.float32, [10, 10]) y = lin1(x) variables = tf.trainable_variables() variable_names = [v.name for v in variables] self.assertEqual(2, len(variables)) self.assertIn("linear1/w:0", variable_names) self.assertIn("linear1/b:0", variable_names) grads = tf.gradients(y, variables) names_to_grads = {var.name: grad for var, grad in zip(variables, grads)} self.assertEqual(None, names_to_grads["linear1/w:0"]) self.assertEqual(None, names_to_grads["linear1/b:0"])
def testCustomGetter(self): """Check that custom getters work appropriately.""" def custom_getter(getter, *args, **kwargs): kwargs["trainable"] = False return getter(*args, **kwargs) inputs = tf.placeholder(tf.float32, shape=[self.batch_size, self.in_size]) # Make w and b non-trainable. lin1 = snt.Linear(output_size=self.out_size, custom_getter=custom_getter) lin1(inputs) self.assertEqual(0, len(tf.trainable_variables())) self.assertEqual(2, len(tf.global_variables())) # Make w non-trainable. lin2 = snt.Linear(output_size=self.out_size, custom_getter={"w": custom_getter}) lin2(inputs) self.assertEqual(1, len(tf.trainable_variables())) self.assertEqual(4, len(tf.global_variables()))
def _get_variables_to_train(): """Returns a list of variables to train. Returns: A list of variables to train by the optimizer. """ if FLAGS.trainable_scopes is None: return tf.trainable_variables() else: scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')] variables_to_train = [] for scope in scopes: variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) variables_to_train.extend(variables) return variables_to_train ########################################################################
def _load(self, sess): config = self.config vars_ = {var.name.split(":")[0]: var for var in tf.all_variables()} if config.load_ema: ema = self.model.var_ema for var in tf.trainable_variables(): del vars_[var.name.split(":")[0]] vars_[ema.average_name(var)] = var saver = tf.train.Saver(vars_, max_to_keep=config.max_to_keep) if config.load_path: save_path = config.load_path elif config.load_step > 0: save_path = os.path.join(config.save_dir, "{}-{}".format(config.model_name, config.load_step)) else: save_dir = config.save_dir checkpoint = tf.train.get_checkpoint_state(save_dir) assert checkpoint is not None, "cannot load checkpoint at {}".format(save_dir) save_path = checkpoint.model_checkpoint_path print("Loading saved model from {}".format(save_path)) saver.restore(sess, save_path)
def build_model(self): self.x = tf.placeholder(tf.float32, [self.reader.vocab_size], name="input") self.x_idx = tf.placeholder(tf.int32, [None], name="x_idx") self.build_encoder() self.build_generator() # Kullback Leibler divergence self.e_loss = -0.5 * tf.reduce_sum(1 + self.log_sigma_sq - tf.square(self.mu) - tf.exp(self.log_sigma_sq)) # Log likelihood self.g_loss = -tf.reduce_sum(tf.log(tf.gather(self.p_x_i, self.x_idx) + 1e-10)) self.loss = self.e_loss + self.g_loss self.encoder_var_list, self.generator_var_list = [], [] for var in tf.trainable_variables(): if "encoder" in var.name: self.encoder_var_list.append(var) elif "generator" in var.name: self.generator_var_list.append(var) # optimizer for alternative update self.optim_e = tf.train.AdamOptimizer(learning_rate=self.lr) \ .minimize(self.e_loss, global_step=self.step, var_list=self.encoder_var_list) self.optim_g = tf.train.AdamOptimizer(learning_rate=self.lr) \ .minimize(self.g_loss, global_step=self.step, var_list=self.generator_var_list) # optimizer for one shot update self.optim = tf.train.AdamOptimizer(learning_rate=self.lr) \ .minimize(self.loss, global_step=self.step) _ = tf.scalar_summary("encoder loss", self.e_loss) _ = tf.scalar_summary("generator loss", self.g_loss) _ = tf.scalar_summary("total loss", self.loss)
def _add_train_graph(self): """Define the training operation.""" mc = self.mc self.global_step = tf.Variable(0, name='global_step', trainable=False) lr = tf.train.exponential_decay(mc.LEARNING_RATE, self.global_step, mc.DECAY_STEPS, mc.LR_DECAY_FACTOR, staircase=True) tf.summary.scalar('learning_rate', lr) _add_loss_summaries(self.loss) opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=mc.MOMENTUM) grads_vars = opt.compute_gradients(self.loss, tf.trainable_variables()) with tf.variable_scope('clip_gradient') as scope: for i, (grad, var) in enumerate(grads_vars): grads_vars[i] = (tf.clip_by_norm(grad, mc.MAX_GRAD_NORM), var) apply_gradient_op = opt.apply_gradients(grads_vars, global_step=self.global_step) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) for grad, var in grads_vars: if grad is not None: tf.summary.histogram(var.op.name + '/gradients', grad) with tf.control_dependencies([apply_gradient_op]): self.train_op = tf.no_op(name='train')
def compute_gradients(loss, learning_rate, gradient_clipping=-1): """ Create optimizer, compute gradients and (optionally) apply gradient clipping """ opt = tf.train.AdamOptimizer(learning_rate) if gradient_clipping > 0: vars_to_optimize = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(loss, vars_to_optimize), clip_norm=gradient_clipping) grads_and_vars = list(zip(grads, vars_to_optimize)) else: grads_and_vars = opt.compute_gradients(loss) return opt, grads_and_vars
def build_model1(self): #Constructing the Gan #Get the variables self.fake_images = self.generate(self.z, self.y, weights=self.weights1, biases=self.biases1) # the loss of dis network self.D_pro = self.discriminate(self.images, self.y, self.weights2, self.biases2, False) self.G_pro = self.discriminate(self.fake_images, self.y, self.weights2, self.biases2, True) self.G_fake_loss = -tf.reduce_mean(tf.log(self.G_pro + TINY)) self.loss = -tf.reduce_mean(tf.log(1. - self.G_pro + TINY) + tf.log(self.D_pro + TINY)) self.log_vars.append(("generator_loss", self.G_fake_loss)) self.log_vars.append(("discriminator_loss", self.loss)) t_vars = tf.trainable_variables() self.d_vars = [var for var in t_vars if 'dis' in var.name] self.g_vars = [var for var in t_vars if 'gen' in var.name] self.saver = tf.train.Saver(self.g_vars) for k, v in self.log_vars: tf.summary.scalar(k, v) #Training the Encode_z
def build_model3(self): self.weights4, self.biases4 = self.get_en_y_variables() # Training Ey self.e_y = self.encode_y(self.images, weights=self.weights4, biases=self.biases4) self.loss_y = tf.reduce_mean(tf.square(self.e_y - self.y)) t_vars = tf.trainable_variables() self.eny_vars = [var for var in t_vars if 'eny' in var.name] self.saver_y = tf.train.Saver(self.eny_vars) #Test model
def __initialize_DDQN(self): """Initialize Double DQN.""" tf.reset_default_graph() self.mainDQN = Qnetwork(self.num_dims, self.num_actions, clip=self.grad_clip, activation_fn=tf.nn.relu, hidden_layer_sizes=self.ddqn_hidden_layer_sizes) self.targetDQN = Qnetwork(self.num_dims, self.num_actions, clip=self.grad_clip, activation_fn=tf.nn.relu, hidden_layer_sizes=self.ddqn_hidden_layer_sizes) init = tf.global_variables_initializer() self.trainables = tf.trainable_variables() self.targetOps = self.__update_target_graph() self.sess = tf.Session() self.sess.run(init) self.__update_target()
def get_optimizers(self, learning_rate=0.002, smooth=0.1): #----------------------------------------------------------------------- # Define loss functions #----------------------------------------------------------------------- with tf.variable_scope('loses'): dsc_real_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=self.dsc_real_logits, labels=tf.ones_like(self.dsc_real_logits) * (1 - smooth))) dsc_fake_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=self.dsc_fake_logits, labels=tf.zeros_like(self.dsc_fake_logits))) dsc_loss = (dsc_real_loss + dsc_fake_loss)/2 gen_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=self.dsc_fake_logits, labels=tf.ones_like(self.dsc_fake_logits))) #----------------------------------------------------------------------- # Optimizers #----------------------------------------------------------------------- trainable_vars = tf.trainable_variables() gen_vars = [var for var in trainable_vars \ if var.name.startswith('generator')] dsc_vars = [var for var in trainable_vars \ if var.name.startswith('discriminator')] with tf.variable_scope('optimizers'): with tf.variable_scope('deiscriminator_optimizer'): dsc_train_opt = tf.train.AdamOptimizer(learning_rate) \ .minimize(dsc_loss, var_list=dsc_vars) with tf.variable_scope('generator_optimizer'): gen_train_opt = tf.train.AdamOptimizer(learning_rate) \ .minimize(gen_loss, var_list=gen_vars) return dsc_train_opt, gen_train_opt, dsc_loss, gen_loss
def _optimize(self): ''' NOTE: The author said that there was no need for 100 d_iter per 100 iters. https://github.com/igul222/improved_wgan_training/issues/3 ''' global_step = tf.Variable(0, name='global_step') lr = self.arch['training']['lr'] b1 = self.arch['training']['beta1'] b2 = self.arch['training']['beta2'] optimizer = tf.train.AdamOptimizer(lr, b1, b2) trainables = tf.trainable_variables() g_vars = [v for v in trainables if 'Generator' in v.name or 'y_emb' in v.name] d_vars = [v for v in trainables if 'Discriminator' in v.name] e_vars = [v for v in trainables if 'Encoder' in v.name] # # Debug =============== # debug(['Generator', 'Discriminator'], [g_vars, d_vars]) # # ============================ with tf.name_scope('Update'): opt_d = optimizer.minimize(self.loss['l_D'], var_list=d_vars) opt_e = optimizer.minimize(self.loss['l_E'], var_list=e_vars) with tf.control_dependencies([opt_e]): opt_g = optimizer.minimize(self.loss['l_G'], var_list=g_vars, global_step=global_step) return { 'd': opt_d, 'g': opt_g, 'e': opt_e, 'global_step': global_step }
def _optimize(self): ''' NOTE: The author said that there was no need for 100 d_iter per 100 iters. https://github.com/igul222/improved_wgan_training/issues/3 ''' global_step = tf.Variable(0, name='global_step') lr = self.arch['training']['lr'] b1 = self.arch['training']['beta1'] b2 = self.arch['training']['beta2'] optimizer = tf.train.AdamOptimizer(lr, b1, b2) trainables = tf.trainable_variables() g_vars = [v for v in trainables if 'Generator' in v.name or 'y_emb' in v.name] d_vars = [v for v in trainables if 'Discriminator' in v.name] # # Debug =============== # debug(['Generator', 'Discriminator'], [g_vars, d_vars]) # # ============================ with tf.name_scope('Update'): opt_g = optimizer.minimize(self.loss['l_G'], var_list=g_vars, global_step=global_step) opt_d = optimizer.minimize(self.loss['l_D'], var_list=d_vars) return { 'd': opt_d, 'g': opt_g, 'global_step': global_step }
def _optimize(self): ''' NOTE: The author said that there was no need for 100 d_iter per 100 iters. https://github.com/igul222/improved_wgan_training/issues/3 ''' global_step = tf.Variable(0, name='global_step') lr = self.arch['training']['lr'] b1 = self.arch['training']['beta1'] b2 = self.arch['training']['beta2'] rho = self.arch['training']['rho'] optimizer = tf.train.AdamOptimizer(lr, b1, b2) optimizer_l = tf.train.GradientDescentOptimizer(rho) trainables = tf.trainable_variables() g_vars = [v for v in trainables if 'Generator' in v.name or 'y_emb' in v.name] d_vars = [v for v in trainables if 'Discriminator' in v.name] l_vars = [v for v in trainables if 'lambda' in v.name] # # Debug =============== # debug(['G', 'D', 'lambda'], [g_vars, d_vars, l_vars]) # # ============================ with tf.name_scope('Update'): opt_g = optimizer.minimize(self.loss['l_G'], var_list=g_vars, global_step=global_step) opt_l = optimizer_l.minimize(- self.loss['l_D'], var_list=l_vars) with tf.control_dependencies([opt_l]): opt_d = optimizer.minimize(self.loss['l_D'], var_list=d_vars) return { 'd': opt_d, 'g': opt_g, 'l': opt_l, 'global_step': global_step }
def _save(self): # save weights in .npy format # this function could be overwritten weights = {} tvars = tf.trainable_variables() + tf.get_collection(tf.GraphKeys.SAVE_TENSORS) tvars_vals = self.sess.run(tvars) for var, val in zip(tvars, tvars_vals): weights[var.name] = val name = "{}/{}_{}_{}_{}.npy".format(self.flags.save_path, self.flags.task, self.flags.run_name, self.flags.net, self.flags.pre_epochs + int(self.epoch)) np.save(name, weights)