我们从Python开源项目中,提取了以下39个代码示例,用于说明如何使用tensorflow.RunOptions()。
def _refresh_status(self, sess): fetches = { "l_D": self.loss['l_D'], "l_G": self.loss['l_G'], "step": self.opt['global_step'], } result = sess.run( fetches=fetches, # options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # run_metadata=run_metadata, ) # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # with open(os.path.join(dirs['logdir'], 'timeline.ctf.json'), 'w') as fp: # fp.write(trace.generate_chrome_trace_format()) # Message msg = 'Iter {:05d}: '.format(result['step']) msg += 'l_D={:.3e} '.format(result['l_D']) msg += 'l_G={:.3e} '.format(result['l_G']) print('\r{}'.format(msg), end='', flush=True) logging.info(msg)
def _refresh_status(self, sess): fetches = { "D_KL": self.loss['D_KL'], "logP": self.loss['logP'], "step": self.opt['global_step'], } result = sess.run( fetches=fetches, # options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # run_metadata=run_metadata, ) # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # with open(os.path.join(dirs['logdir'], 'timeline.ctf.json'), 'w') as fp: # fp.write(trace.generate_chrome_trace_format()) # Message msg = 'Iter {:05d}: '.format(result['step']) msg += 'log P(x|z, y) = {:.3e} '.format(result['logP']) msg += 'D_KL(z) = {:.3e} '.format(result['D_KL']) print('\r{}'.format(msg), end='', flush=True) logging.info(msg)
def __init__(self, output_file_name=None, output_directory=None, each_time=None, **kwargs): self.output_file_name = output_file_name self.output_directory = output_directory self.each_time = each_time self.local_run_metadata = None if self.each_time: warnings.warn("Outputting a trace for each run. " "May result in large disk usage.") super(TracerSession, self).__init__(**kwargs) self.counter = 0 self.profiler_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) if self.output_directory is not None: if os.path.isfile(self.output_directory): raise IOError("In tracer: given directory name is a file.") if not os.path.isdir(self.output_directory): os.mkdir(self.output_directory)
def run(self, fetches, feed_dict=None): """like Session.run, but return a Timeline object in Chrome trace format (JSON). Save the json to a file, go to chrome://tracing, and open the file. Args: fetches feed_dict Returns: dict: a JSON dict """ options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() super(ProfiledSession, self).run(fetches, feed_dict, options=options, run_metadata=run_metadata) # Create the Timeline object, and write it to a json tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() return json.loads(ctf)
def train_it(sess, step=1): _pat_chars_i, _pat_lens = get_batch(__batch_size) inputs = { pat_chars_i: _pat_chars_i, pat_lens: _pat_lens} # Run optimization op (backprop) #run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) #run_metadata = tf.RunMetadata() #sess.run(optimizer, feed_dict=inputs, options=run_options, run_metadata=run_metadata) sess.run(optimizer, feed_dict=inputs) #with open('timeline.json', 'w') as f: # f.write( # timeline.Timeline(run_metadata.step_stats) # .generate_chrome_trace_format()) if step % display_step == 0: # Calculate batch loss cost_f = sess.run(cost, feed_dict=inputs) print ("Iter {}, cost= {:.6f}".format( str(step*__batch_size), cost_f))
def optimize(self, data, with_metrics=False, with_trace=False): """ Optimize a single batch """ run_metadata = tf.RunMetadata() if with_trace else None trace = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) if with_trace else None _, metrics = self.run( self.training_operation, data, run_options=trace, run_metadata=run_metadata) if with_metrics: self.timer_update() steps, elapsed = self.elapsed() num_devices = len(self.towers) examples = steps * self.batch_size * num_devices print('Step {}, examples/sec {:.3f}, ms/batch {:.1f}'.format( self.global_step, examples / elapsed, 1000 * elapsed / num_devices)) self.output_metrics(data, metrics) self.write_summaries(data) if with_trace: step = '{}/step{}'.format(self.name, self.global_step) self.summary_writer.add_run_metadata(run_metadata, step, global_step=self.global_step)
def train(self, images, labels, summaries=False, run_metadata=False): if (summaries or run_metadata) and not self.summary_writer: raise ValueError("Logdir is required for summaries or run_metadata.") args = {"feed_dict": {self.images: images, self.labels: labels}} targets = [self.training] if summaries: targets.append(self.summaries["training"]) if run_metadata: args["options"] = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) args["run_metadata"] = tf.RunMetadata() results = self.session.run(targets, **args) if summaries: self.summary_writer.add_summary(results[-1], self.training_step - 1) if run_metadata: self.summary_writer.add_run_metadata(args["run_metadata"], "step{:05}".format(self.training_step - 1))
def _createTestGraphAndRunOptions(self, sess, gated_grpc=True): a = tf.Variable([1.0], name='a') b = tf.Variable([2.0], name='b') c = tf.Variable([3.0], name='c') d = tf.Variable([4.0], name='d') x = tf.add(a, b, name='x') y = tf.add(c, d, name='y') z = tf.add(x, y, name='z') run_options = tf.RunOptions(output_partition_graphs=True) debug_op = 'DebugIdentity' if gated_grpc: debug_op += '(gated_grpc=True)' tf_debug.watch_graph(run_options, sess.graph, debug_ops=debug_op, debug_urls=self.debug_server_url) return z, run_options
def before_run(self, _run_context): if not self.is_chief or self._done: return if not self._active: return tf.train.SessionRunArgs(self._global_step) else: tf.logging.info("Performing full trace on next step.") run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) #pylint: disable=E1101 return tf.train.SessionRunArgs(self._global_step, options=run_options)
def _refresh_status(self, sess): fetches = { "D_KL": self.loss['D_KL'], "logP": self.loss['logP'], "step": self.opt['global_step'], } result = sess.run( fetches=fetches, # options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # run_metadata=run_metadata, ) # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # with open(os.path.join(dirs['logdir'], 'timeline.ctf.json'), 'w') as fp: # fp.write(trace.generate_chrome_trace_format()) # Message msg = 'Iter {:05d}: '.format(result['step']) msg += 'log P(x|z, y) = {:.3e} '.format(result['logP']) msg += 'D_KL(z) = {:.3e} '.format(result['D_KL']) print('\r{}'.format(msg), end='', flush=True) logging.info(msg) # def _validate(self, machine, n=10): # N = n * n # # same row same z # z = tf.random_normal(shape=[n, self.arch['z_dim']]) # z = tf.tile(z, [1, n]) # z = tf.reshape(z, [N, -1]) # z = tf.Variable(z, trainable=False, dtype=tf.float32) # # same column same y # y = tf.range(0, 10, 1, dtype=tf.int64) # y = tf.reshape(y, [-1,]) # y = tf.tile(y, [n,]) # Xh = machine.generate(z, y) # 100, 64, 64, 3 # Xh = make_png_thumbnail(Xh, n) # return Xh
def _refresh_status(self, sess): fetches = { "D_KL": self.loss['D_KL'], "logP": self.loss['logP'], "W_dist": self.loss['W_dist'], "gp": self.loss['gp'], "step": self.opt['global_step'], } result = sess.run( fetches=fetches, # options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # run_metadata=run_metadata, ) # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # with open(os.path.join(dirs['logdir'], 'timeline.ctf.json'), 'w') as fp: # fp.write(trace.generate_chrome_trace_format()) # Message msg = 'Iter {:05d}: '.format(result['step']) msg += 'W_dist = {:.4e} '.format(result['W_dist']) msg += 'log P(x|z, y) = {:.4e} '.format(result['logP']) msg += 'D_KL(z) = {:.4e} '.format(result['D_KL']) msg += 'GP = {:.4e} '.format(result['gp']) print('\r{}'.format(msg), end='', flush=True) logging.info(msg)
def before_run(self, _run_context): if not self.is_chief or self._done: return if not self._active: return tf.train.SessionRunArgs(self._global_step) else: log.info("Performing full trace on next step.") run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) return tf.train.SessionRunArgs(self._global_step, options=run_options)
def __init__(self, config_path = None): if config_path is not None: self.load(config_path) if self.time_trace: self.run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) self.run_metadata = tf.RunMetadata() # set workspace self.workspace = os.path.join(self.workspace, self.dataset_name) self.dataset_path = os.path.join(self.workspace, self.file_name) self.map_path = os.path.join(self.workspace, "map/") self.__set_save_path() if self.eval_mode and self.save_ckpt: print("Warning, in evaluation mode, automatically set config.save_ckpt to False") self.save_ckpt = False
def run_op(op): start_time = time.time() print("%10.2f ms: starting op %s\n" % ((start_time-start_time0)*1000, op.name), flush=True, end='') options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() sess.run(op, options=options, run_metadata=run_metadata) end_time = time.time() print("%10.2f ms: ending op %s\n" % ((end_time-start_time0)*1000, op.name), flush=True, end='') run_metadatas.append(run_metadata)
def sessrun(*args, **kwargs): """Helper to do sess.run and save run_metadata""" global sess, run_metadata run_metadata = tf.RunMetadata() kwargs['options'] = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) kwargs['run_metadata'] = run_metadata result = sess.run(*args, **kwargs) first_entry = args[0] # have to do this because sess.run(tensor) is same as sess.run([tensor]) if isinstance(first_entry, list): if len(first_entry) == 0 and len(args) == 1: return None first_entry = first_entry[0]
def run_shit(): sess = tf.Session() run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() sess.run(tf.initialize_all_variables()) train_step_ = sess.run([train_step], options=run_options, run_metadata=run_metadata, )#feed_dict={x: [[2,3],[5,1]]}) tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open('o_100.json', 'w') as f: f.write(ctf)
def train(self, images, labels): self.steps += 1 feed_dict = {self.images: images, self.labels: labels} if self.steps == 1: metadata = tf.RunMetadata() self.session.run(self.training, feed_dict, options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), run_metadata = metadata) self.summary_writer.add_run_metadata(metadata, 'step1') elif self.steps % 100 == 0: _, summary = self.session.run([self.training, self.summaries['training']], feed_dict) self.summary_writer.add_summary(summary, self.steps) else: self.session.run(self.training, feed_dict)
def run_batch(sess, graph, batch_data, learning_rate, do_summary=True, is_training=True, profiler=None): for stack in graph.stacks: stack.reset(sess) # each batch data element has leading batch axis # X: (B, buffer_size, num_stacks) # transitions: (B, num_timesteps, num_stacks) # num_transitions: (B, num_stacks) X, transitions, num_transitions, ys = batch_data # Prepare feed dict feed = { graph.ys: ys, graph.learning_rate: learning_rate, graph.is_training: is_training, } for i, stack in enumerate(graph.stacks): # Swap batch axis to front. X_i = X[:, :, i].T transitions_i = transitions[:, :, i].T feed.update({stack.transitions[t]: transitions_i[t] for t in range(graph.num_timesteps)}) feed[stack.buff] = X_i feed[stack.num_transitions] = num_transitions[:, i] # Sub in a no-op for summary op if we don't want to compute summaries. summary_op_ = graph.summary_op if not do_summary: summary_op_ = graph.train_op kwargs = {} if profiler is not None: kwargs["options"] = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) kwargs["run_metadata"] = profiler _, summary = sess.run([graph.train_op, summary_op_], feed, **kwargs) return summary
def generate_run(self, run_name, include_graph): """Create a run with a text summary, metadata, and optionally a graph.""" tf.reset_default_graph() k1 = tf.constant(math.pi, name='k1') k2 = tf.constant(math.e, name='k2') result = (k1 ** k2) - k1 expected = tf.constant(20.0, name='expected') error = tf.abs(result - expected, name='error') message_prefix_value = 'error ' * 1000 true_length = len(message_prefix_value) assert true_length > self._MESSAGE_PREFIX_LENGTH_LOWER_BOUND, true_length message_prefix = tf.constant(message_prefix_value, name='message_prefix') error_message = tf.string_join([message_prefix, tf.as_string(error, name='error_string')], name='error_message') summary_message = tf.summary.text('summary_message', error_message) sess = tf.Session() writer = tf.summary.FileWriter(os.path.join(self.logdir, run_name)) if include_graph: writer.add_graph(sess.graph) options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() s = sess.run(summary_message, options=options, run_metadata=run_metadata) writer.add_summary(s) writer.add_run_metadata(run_metadata, self._METADATA_TAG) writer.close()
def basic_train(loss_op, update_op, profile=0, save_dir='asset/unamed', **kwargs): profile_state = _ShouldProfile(profile) @stf.sg_train_func def train_func(sess, arg): profile_state.increment() if profile_state.should_profile(): options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() else: options = None run_metadata = None loss = sess.run([loss_op] + update_op, options=options, run_metadata=run_metadata)[0] if profile_state.should_profile(): tl = tf_timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open(path.join(save_dir, 'timeline.json'), 'w') as fd: print(ctf, file=fd) return loss # run train function train_func(save_dir=save_dir, **kwargs)
def main(argv=None): mnist = input_data.read_data_sets('MNIST_data', one_hot=True) with tf.device('/gpu:2'): real_data, z, opt_g, opt_d = build_graph() summary_op = tf.merge_all_summaries() saver = tf.train.Saver() npad = ((0, 0), (2, 2), (2, 2)) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph) for i in xrange(FLAGS.max_iter_step): train_img = mnist.train.next_batch(FLAGS.batch_size)[0] train_img = np.reshape(train_img, (-1, 28, 28)) train_img = np.pad(train_img, pad_width=npad, mode='constant', constant_values=0) train_img = np.expand_dims(train_img, -1) batch_z = np.random.normal(0, 1.0, [FLAGS.batch_size, FLAGS.z_dim]) \ .astype(np.float32) feed_dict = {real_data: train_img, z: batch_z} if i % 100 == 99: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, merged = sess.run([opt_g, summary_op], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) _, merged = sess.run([opt_g, summary_op], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) summary_writer.add_summary(merged, i) summary_writer.add_run_metadata( run_metadata, 'generator_metadata{}'.format(i), i) _, merged = sess.run([opt_d, summary_op], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) summary_writer.add_summary(merged, i) summary_writer.add_run_metadata( run_metadata, 'discriminator_metadata{}'.format(i), i) else: sess.run(opt_g, feed_dict=feed_dict) sess.run(opt_d, feed_dict=feed_dict) if i % 1000 == 999: saver.save(sess, os.path.join( FLAGS.ckpt_dir, "model.ckpt"), global_step=i)
def train(session): batch_size = 200 session.run(tf.global_variables_initializer()) run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) # (*) run_metadata = tf.RunMetadata() # Training cycle for epoch in range(10): epoch_loss = 0.0 batch_steps = mnist.train.num_examples / batch_size for step in range(batch_steps): batch_x, batch_y = mnist.train.next_batch(batch_size) _, c = session.run( [train_op, loss], feed_dict={x: batch_x, y: batch_y}, options=run_options, run_metadata=run_metadata # (*) ) epoch_loss += c / batch_steps print "[%s] Epoch %02d, Loss = %.6f" % (datetime.now(), epoch, epoch_loss) # Dump profiling data (*) prof_timeline = tf.python.client.timeline.Timeline(run_metadata.step_stats) prof_ctf = prof_timeline.generate_chrome_trace_format() with open('./prof_ctf.json', 'w') as fp: print 'Dumped to prof_ctf.json' fp.write(prof_ctf) # Test model correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print "Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels})
def run_training_batch(self, session, batch): """ A batch contains input tensors for words, pos, lemmas, preds, preds_idx, and labels (in that order) Runs the model on the batch (through train_op if train=True) Returns the loss """ feed_dict = self.batch_to_feed(batch) feed_dict[self.use_dropout_placeholder] = 1.0 fetches = [self.loss, self.train_op] # options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) # run_metadata = tf.RunMetadata() loss, _ = session.run(fetches, feed_dict=feed_dict) # loss, _ = session.run(fetches, # feed_dict=feed_dict, # options=options, # run_metadata=run_metadata) # fetched_timeline = timeline.Timeline(run_metadata.step_stats) # chrome_trace = fetched_timeline.generate_chrome_trace_format() # with open('timeline.json', 'w') as f: # f.write(chrome_trace) return loss
def run_step(self): """ Simply run self.train_op""" self.sess.run(self.train_op) #run_metadata = tf.RunMetadata() #self.sess.run([self.train_op], #options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), #run_metadata=run_metadata #) #from tensorflow.python.client import timeline #trace = timeline.Timeline(step_stats=run_metadata.step_stats) #trace_file = open('timeline.ctf.json', 'w') #trace_file.write(trace.generate_chrome_trace_format()) #import sys; sys.exit()
def trace(config, sess, model, train_data): run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() X, Q, Y = random_batch(*train_data, config.batch_size) model.batch_fit(X, Q, Y, learning_rate, run_options, run_metadata) train_writer.add_run_metadata(run_metadata, 'step%d' % step) from tensorflow.python.client import timeline tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open('timeline.json', 'w') as f: f.write(ctf) return
def benchmark_one_step(sess, fetches, step, batch_size, step_train_times, trace_filename, image_producer, params, summary_op=None): """Advance one step of benchmarking.""" if trace_filename and step == -1: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() else: run_options = None run_metadata = None summary_str = None start_time = time.time() if summary_op is None: results = sess.run(fetches, options=run_options, run_metadata=run_metadata) else: (results, summary_str) = sess.run( [fetches, summary_op], options=run_options, run_metadata=run_metadata) if not params.forward_only: lossval = results['total_loss'] else: lossval = 0. image_producer.notify_image_consumption() train_time = time.time() - start_time step_train_times.append(train_time) if step >= 0 and (step == 0 or (step + 1) % params.display_every == 0): log_str = '%i\t%s\t%.3f' % ( step + 1, get_perf_timing_str(batch_size, step_train_times), lossval) if 'top_1_accuracy' in results: log_str += '\t%.3f\t%.3f' % (results['top_1_accuracy'], results['top_5_accuracy']) log_fn(log_str) if trace_filename and step == -1: log_fn('Dumping trace to %s' % trace_filename) trace = timeline.Timeline(step_stats=run_metadata.step_stats) with gfile.Open(trace_filename, 'w') as trace_file: trace_file.write(trace.generate_chrome_trace_format(show_memory=True)) return summary_str
def benchmark_one_step(sess, fetches, step, batch_size, step_train_times, trace_filename, image_producer, params, summary_op=None): """Advance one step of benchmarking.""" if trace_filename is not None and step == -1: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() else: run_options = None run_metadata = None summary_str = None start_time = time.time() if summary_op is None: results = sess.run(fetches, options=run_options, run_metadata=run_metadata) else: (results, summary_str) = sess.run( [fetches, summary_op], options=run_options, run_metadata=run_metadata) if not params.forward_only: lossval = results['total_loss'] else: lossval = 0. image_producer.notify_image_consumption() train_time = time.time() - start_time step_train_times.append(train_time) if step >= 0 and (step == 0 or (step + 1) % params.display_every == 0): log_str = '%i\t%s\t%.3f' % ( step + 1, get_perf_timing_str(batch_size, step_train_times), lossval) if 'top_1_accuracy' in results: log_str += '\t%.3f\t%.3f' % (results['top_1_accuracy'], results['top_5_accuracy']) log_fn(log_str) if trace_filename is not None and step == -1: log_fn('Dumping trace to %s' % trace_filename) trace = timeline.Timeline(step_stats=run_metadata.step_stats) with gfile.Open(trace_filename, 'w') as trace_file: trace_file.write(trace.generate_chrome_trace_format(show_memory=True)) return summary_str
def load_data(self, sess, inputs, full_trace=False): """Bulk loads the specified inputs into device memory. The shape of the inputs must conform to the shapes of the input placeholders this optimizer was constructed with. The data is split equally across all the devices. If the data is not evenly divisible by the batch size, excess data will be discarded. Args: sess: TensorFlow session. inputs: List of Tensors matching the input placeholders specified at construction time of this optimizer. full_trace: Whether to profile data loading. Returns: The number of tuples loaded per device. """ feed_dict = {} assert len(self.input_placeholders) == len(inputs) for ph, arr in zip(self.input_placeholders, inputs): truncated_arr = make_divisible_by(arr, self.batch_size) feed_dict[ph] = truncated_arr truncated_len = len(truncated_arr) if full_trace: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) else: run_options = tf.RunOptions(trace_level=tf.RunOptions.NO_TRACE) run_metadata = tf.RunMetadata() sess.run( [t.init_op for t in self._towers], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) if full_trace: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open(os.path.join(self.logdir, "timeline-load.json"), "w") trace_file.write(trace.generate_chrome_trace_format()) tuples_per_device = truncated_len / len(self.devices) assert tuples_per_device > 0, \ "Too few tuples per batch, trying increasing the training " \ "batch size or decreasing the sgd batch size. Tried to split up " \ "{} rows {}-ways in batches of {} (total across devices).".format( len(arr), len(self.devices), self.batch_size) assert tuples_per_device % self.per_device_batch_size == 0 return tuples_per_device
def optimize(self, sess, batch_index, extra_ops=[], extra_feed_dict={}, file_writer=None): """Run a single step of SGD. Runs a SGD step over a slice of the preloaded batch with size given by self.per_device_batch_size and offset given by the batch_index argument. Updates shared model weights based on the averaged per-device gradients. Args: sess: TensorFlow session. batch_index: Offset into the preloaded data. This value must be between `0` and `tuples_per_device`. The amount of data to process is always fixed to `per_device_batch_size`. extra_ops: Extra ops to run with this step (e.g. for metrics). extra_feed_dict: Extra args to feed into this session run. file_writer: If specified, tf metrics will be written out using this. Returns: The outputs of extra_ops evaluated over the batch. """ if file_writer: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) else: run_options = tf.RunOptions(trace_level=tf.RunOptions.NO_TRACE) run_metadata = tf.RunMetadata() feed_dict = {self._batch_index: batch_index} feed_dict.update(extra_feed_dict) outs = sess.run( [self._train_op] + extra_ops, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) if file_writer: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open(os.path.join(self.logdir, "timeline-sgd.json"), "w") trace_file.write(trace.generate_chrome_trace_format()) file_writer.add_run_metadata( run_metadata, "sgd_train_{}".format(batch_index)) return outs[1:]
def main(): mnist = input_data.read_data_sets('MNIST_data', one_hot=True) with tf.device('/gpu:1'): g_loss_sum, d_loss_sum, img_sum, opt_g, opt_d, z, real_data = build_graph() summary_g = tf.merge_summary([g_loss_sum, img_sum]) summary_d = tf.merge_summary([d_loss_sum, img_sum]) saver = tf.train.Saver() npad = ((0, 0), (2, 2), (2, 2)) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph) for i in xrange(FLAGS.max_iter_step): train_data = mnist.train.next_batch(FLAGS.batch_size) train_img = np.reshape(train_data[0], (-1, 28, 28)) train_img = np.pad(train_img, pad_width=npad, mode='constant', constant_values=0) train_img = np.expand_dims(train_img, -1) batch_z = np.random.uniform(-1, 1, [FLAGS.batch_size, FLAGS.z_dim]) \ .astype(np.float32) feed_dict = {real_data[0]: train_img, z: batch_z, real_data[1]:train_data[1]} if i % 100 == 99: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, merged = sess.run([opt_g, summary_g], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) summary_writer.add_summary(merged, i) summary_writer.add_run_metadata( run_metadata, 'generator_metadata {}'.format(i), i) _, merged = sess.run([opt_g, summary_g], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) summary_writer.add_summary(merged, i) summary_writer.add_run_metadata( run_metadata, 'second_generator_metadata {}'.format(i), i) _, merged = sess.run([opt_d, summary_d], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) summary_writer.add_summary(merged, i) summary_writer.add_run_metadata( run_metadata, 'discriminator_metadata {}'.format(i), i) else: sess.run(opt_g, feed_dict=feed_dict) sess.run(opt_g, feed_dict=feed_dict) sess.run(opt_d, feed_dict=feed_dict) if i % 1000 == 999: saver.save(sess, os.path.join( FLAGS.ckpt_dir, "model.ckpt"), global_step=i)
def train(): batch_size = 10 print "Starting ABC-CNN training" vqa = dl.load_questions_answers('data') # Create subset of data for over-fitting sub_vqa = {} sub_vqa['training'] = vqa['training'][:10] sub_vqa['validation'] = vqa['validation'][:10] sub_vqa['answer_vocab'] = vqa['answer_vocab'] sub_vqa['question_vocab'] = vqa['question_vocab'] sub_vqa['max_question_length'] = vqa['max_question_length'] train_size = len(vqa['training']) max_itr = (train_size // batch_size) * 10 with tf.Session() as sess: image, ques, ans, optimizer, loss, accuracy = abc.model(sess, batch_size) print "Defined ABC model" train_loader = util.get_batch(sess, vqa, batch_size, 'training') print "Created train dataset generator" valid_loader = util.get_batch(sess, vqa, batch_size, 'validation') print "Created validation dataset generator" writer = abc.write_tensorboard(sess) init = tf.global_variables_initializer() merged = tf.summary.merge_all() sess.run(init) print "Initialized Tensor variables" itr = 1 while itr < max_itr: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, vgg_batch, ques_batch, answer_batch = train_loader.next() _, valid_vgg_batch, valid_ques_batch, valid_answer_batch = valid_loader.next() sess.run(optimizer, feed_dict={image: vgg_batch, ques: ques_batch, ans: answer_batch}) [train_summary, train_loss, train_accuracy] = sess.run([merged, loss, accuracy], feed_dict={image: vgg_batch, ques: ques_batch, ans: answer_batch}, options=run_options, run_metadata=run_metadata) [valid_loss, valid_accuracy] = sess.run([loss, accuracy], feed_dict={image: valid_vgg_batch, ques: valid_ques_batch, ans: valid_answer_batch}) writer.add_run_metadata(run_metadata, 'step%03d' % itr) writer.add_summary(train_summary, itr) writer.flush() print "Iteration:%d\tTraining Loss:%f\tTraining Accuracy:%f\tValidation Loss:%f\tValidation Accuracy:%f"%( itr, train_loss, 100.*train_accuracy, valid_loss, 100.*valid_accuracy) itr += 1
def train_step(sess, train_op, global_step, train_step_kwargs): """Function that takes a gradient step and specifies whether to stop. Args: sess: The current session. train_op: A dictionary of `Operation` that evaluates the gradients and returns the total loss (for first) in case of iter_size > 1. global_step: A `Tensor` representing the global training step. train_step_kwargs: A dictionary of keyword arguments. Returns: The total loss and a boolean indicating whether or not to stop training. """ start_time = time.time() if FLAGS.iter_size == 1: # for debugging specific endpoint values, # set the train file to one image and use # pdb here # import pdb # pdb.set_trace() if FLAGS.profile_iterations: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() total_loss, np_global_step = sess.run([train_op, global_step], options=run_options, run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open(os.path.join(FLAGS.train_dir, 'timeline_%08d.json' % np_global_step), 'w') as f: f.write(ctf) else: total_loss, np_global_step = sess.run([train_op, global_step]) else: for j in range(FLAGS.iter_size-1): sess.run([train_op[j]]) total_loss, np_global_step = sess.run( [train_op[FLAGS.iter_size-1], global_step]) time_elapsed = time.time() - start_time if 'should_log' in train_step_kwargs: if sess.run(train_step_kwargs['should_log']): logging.info('%s: global step %d: loss = %.4f (%.2f sec)', datetime.now(), np_global_step, total_loss, time_elapsed) if 'should_stop' in train_step_kwargs: should_stop = sess.run(train_step_kwargs['should_stop']) else: should_stop = False return total_loss, should_stop