我们从Python开源项目中,提取了以下44个代码示例,用于说明如何使用tensorflow.RunMetadata()。
def run(self, fetches, feed_dict=None, options=None, run_metadata=None): # Make sure there is no disagreement doing this. if options is not None: if options.trace_level != self.profiler_options.trace_level: # pragma: no cover raise ValueError( 'In profiler session. Inconsistent trace ' 'level from run call') # pragma: no cover self.profiler_options.update(options) # pragma: no cover self.local_run_metadata = tf.RunMetadata() output = super(TracerSession, self).run( fetches, feed_dict=feed_dict, options=self.profiler_options, run_metadata=self.local_run_metadata) trace_time = timeline.Timeline(self.local_run_metadata.step_stats) ctf = trace_time.generate_chrome_trace_format() with open(self._trace_filename(), 'w') as trace_file: trace_file.write(ctf) if self.each_time: self.counter += 1 return output
def run(self, fetches, feed_dict=None): """like Session.run, but return a Timeline object in Chrome trace format (JSON). Save the json to a file, go to chrome://tracing, and open the file. Args: fetches feed_dict Returns: dict: a JSON dict """ options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() super(ProfiledSession, self).run(fetches, feed_dict, options=options, run_metadata=run_metadata) # Create the Timeline object, and write it to a json tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() return json.loads(ctf)
def traced_run(fetches): """Runs fetches, dumps timeline files in current directory.""" global sess assert sess global timeline_counter run_metadata = tf.RunMetadata() root = os.getcwd()+"/data" from tensorflow.python.client import timeline results = sess.run(fetches, options=run_options, run_metadata=run_metadata); tl = timeline.Timeline(step_stats=run_metadata.step_stats) ctf = tl.generate_chrome_trace_format(show_memory=True, show_dataflow=False) open(root+"/timeline_%d.json"%(timeline_counter,), "w").write(ctf) open(root+"/stepstats_%d.pbtxt"%(timeline_counter,), "w").write(str( run_metadata.step_stats)) timeline_counter+=1 return results
def sessrun(*args, **kwargs): sess = u.get_default_session() if not GLOBAL_PROFILE: return sess.run(*args, **kwargs) run_metadata = tf.RunMetadata() kwargs['options'] = full_trace_options kwargs['run_metadata'] = run_metadata result = sess.run(*args, **kwargs) first_entry = args[0] if isinstance(first_entry, list): if len(first_entry) == 0 and len(args) == 1: return None first_entry = first_entry[0] name = first_entry.name name = name.replace('/', '-') tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open('timelines/%s.json'%(name,), 'w') as f: f.write(ctf) with open('timelines/%s.pbtxt'%(name,), 'w') as f: f.write(str(run_metadata)) return result
def traced_run(fetches): """Runs fetches, dumps timeline files in current directory.""" from tensorflow.python.client import timeline global timeline_counter run_metadata = tf.RunMetadata() results = sess.run(fetches, options=run_options, run_metadata=run_metadata); tl = timeline.Timeline(step_stats=run_metadata.step_stats) ctf = tl.generate_chrome_trace_format(show_memory=True, show_dataflow=False) open("timeline_%d.json"%(timeline_counter,), "w").write(ctf) open("stepstats_%d.pbtxt"%(timeline_counter,), "w").write(str( run_metadata.step_stats)) timeline_counter+=1 return results
def train_it(sess, step=1): _pat_chars_i, _pat_lens = get_batch(__batch_size) inputs = { pat_chars_i: _pat_chars_i, pat_lens: _pat_lens} # Run optimization op (backprop) #run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) #run_metadata = tf.RunMetadata() #sess.run(optimizer, feed_dict=inputs, options=run_options, run_metadata=run_metadata) sess.run(optimizer, feed_dict=inputs) #with open('timeline.json', 'w') as f: # f.write( # timeline.Timeline(run_metadata.step_stats) # .generate_chrome_trace_format()) if step % display_step == 0: # Calculate batch loss cost_f = sess.run(cost, feed_dict=inputs) print ("Iter {}, cost= {:.6f}".format( str(step*__batch_size), cost_f))
def optimize(self, data, with_metrics=False, with_trace=False): """ Optimize a single batch """ run_metadata = tf.RunMetadata() if with_trace else None trace = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) if with_trace else None _, metrics = self.run( self.training_operation, data, run_options=trace, run_metadata=run_metadata) if with_metrics: self.timer_update() steps, elapsed = self.elapsed() num_devices = len(self.towers) examples = steps * self.batch_size * num_devices print('Step {}, examples/sec {:.3f}, ms/batch {:.1f}'.format( self.global_step, examples / elapsed, 1000 * elapsed / num_devices)) self.output_metrics(data, metrics) self.write_summaries(data) if with_trace: step = '{}/step{}'.format(self.name, self.global_step) self.summary_writer.add_run_metadata(run_metadata, step, global_step=self.global_step)
def train(self, images, labels, summaries=False, run_metadata=False): if (summaries or run_metadata) and not self.summary_writer: raise ValueError("Logdir is required for summaries or run_metadata.") args = {"feed_dict": {self.images: images, self.labels: labels}} targets = [self.training] if summaries: targets.append(self.summaries["training"]) if run_metadata: args["options"] = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) args["run_metadata"] = tf.RunMetadata() results = self.session.run(targets, **args) if summaries: self.summary_writer.add_summary(results[-1], self.training_step - 1) if run_metadata: self.summary_writer.add_run_metadata(args["run_metadata"], "step{:05}".format(self.training_step - 1))
def RunMetadata(self, tag): """Given a tag, return the associated session.run() metadata. Args: tag: A string tag associated with the event. Raises: ValueError: If the tag is not found. Returns: The metadata in form of `RunMetadata` proto. """ if tag not in self._tagged_metadata: raise ValueError('There is no run metadata with this tag name') run_metadata = tf.RunMetadata() run_metadata.ParseFromString(self._tagged_metadata[tag]) return run_metadata
def load_metadata(model_dir): """Loads RunMetadata, Graph and OpLog from files """ # Import RunMetadata run_meta_path = os.path.join(model_dir, "metadata/run_meta") run_meta = tf.RunMetadata() if gfile.Exists(run_meta_path): with gfile.GFile(run_meta_path, "rb") as file: run_meta.MergeFromString(file.read()) print("Loaded RunMetadata from {}".format(run_meta_path)) else: print("RunMetadata does not exist a {}. Skipping.".format(run_meta_path)) # Import Graph graph_def_path = os.path.join(model_dir, "graph.pbtxt") graph = tf.Graph() if gfile.Exists(graph_def_path): with graph.as_default(): _register_function_ops(CUSTOM_OP_FUNCTIONS) graph_def = tf.GraphDef() with gfile.GFile(graph_def_path, "rb") as file: text_format.Parse(file.read(), graph_def) tf.import_graph_def(graph_def, name="") print("Loaded Graph from {}".format(graph_def_path)) else: print("Graph does not exist a {}. Skipping.".format(graph_def_path)) # Import OpLog op_log_path = os.path.join(model_dir, "metadata/tfprof_log") op_log = tfprof_log_pb2.OpLog() if gfile.Exists(op_log_path): with gfile.GFile(op_log_path, "rb") as file: op_log.MergeFromString(file.read()) print("Loaded OpLog from {}".format(op_log_path)) else: print("OpLog does not exist a {}. Skipping.".format(op_log_path)) return run_meta, graph, op_log
def train(self, nIter, machine=None, summary_op=None): # Xh = self._validate(machine=machine, n=10) run_metadata = tf.RunMetadata() sv = tf.train.Supervisor( logdir=self.dirs['logdir'], # summary_writer=summary_writer, # summary_op=None, # is_chief=True, save_model_secs=300, global_step=self.opt['global_step']) # sess_config = configure_gpu_settings(args.gpu_cfg) sess_config = tf.ConfigProto( allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True)) with sv.managed_session(config=sess_config) as sess: sv.loop(60, self._refresh_status, (sess,)) for step in range(self.arch['training']['max_iter']): if sv.should_stop(): break # main loop sess.run(self.opt['g']) # # output img # if step % 1000 == 0: # xh = sess.run(Xh) # with tf.gfile.GFile( # os.path.join( # self.dirs['logdir'], # 'img-anime-{:03d}k.png'.format(step // 1000), # ), # mode='wb', # ) as fp: # fp.write(xh)
def train(self, nIter, machine=None, summary_op=None): Xh = self._validate(machine=machine, n=10) run_metadata = tf.RunMetadata() sv = tf.train.Supervisor( logdir=self.dirs['logdir'], # summary_writer=summary_writer, # summary_op=None, # is_chief=True, # save_model_secs=600, global_step=self.opt['global_step']) # sess_config = configure_gpu_settings(args.gpu_cfg) sess_config = tf.ConfigProto( allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True)) with sv.managed_session(config=sess_config) as sess: sv.loop(60, self._refresh_status, (sess,)) for step in range(self.arch['training']['max_iter']): if sv.should_stop(): break # main loop sess.run(self.opt['g']) # output img if step % 1000 == 0: xh = sess.run(Xh) with tf.gfile.GFile( os.path.join( self.dirs['logdir'], 'img-anime-{:03d}k.png'.format(step // 1000), ), mode='wb', ) as fp: fp.write(xh)
def __init__(self, config_path = None): if config_path is not None: self.load(config_path) if self.time_trace: self.run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) self.run_metadata = tf.RunMetadata() # set workspace self.workspace = os.path.join(self.workspace, self.dataset_name) self.dataset_path = os.path.join(self.workspace, self.file_name) self.map_path = os.path.join(self.workspace, "map/") self.__set_save_path() if self.eval_mode and self.save_ckpt: print("Warning, in evaluation mode, automatically set config.save_ckpt to False") self.save_ckpt = False
def run_op(op): start_time = time.time() print("%10.2f ms: starting op %s\n" % ((start_time-start_time0)*1000, op.name), flush=True, end='') options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() sess.run(op, options=options, run_metadata=run_metadata) end_time = time.time() print("%10.2f ms: ending op %s\n" % ((end_time-start_time0)*1000, op.name), flush=True, end='') run_metadatas.append(run_metadata)
def sessrun(*args, **kwargs): """Helper to do sess.run and save run_metadata""" global sess, run_metadata run_metadata = tf.RunMetadata() kwargs['options'] = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) kwargs['run_metadata'] = run_metadata result = sess.run(*args, **kwargs) first_entry = args[0] # have to do this because sess.run(tensor) is same as sess.run([tensor]) if isinstance(first_entry, list): if len(first_entry) == 0 and len(args) == 1: return None first_entry = first_entry[0]
def traced_run(fetches): """Runs fetches, dumps timeline files in current directory.""" global timeline_counter run_metadata = tf.RunMetadata() config = load_config() log_fn = "%s-%s-%s"%(config.task_type, config.task_id, timeline_counter) sess = tf.get_default_session() root = os.getcwd()+"/data" os.system('mkdir -p '+root) from tensorflow.python.client import timeline results = sess.run(fetches, options=run_options, run_metadata=run_metadata); tl = timeline.Timeline(step_stats=run_metadata.step_stats) ctf = tl.generate_chrome_trace_format(show_memory=True, show_dataflow=False) open(root+"/timeline_%s.json"%(log_fn,), "w").write(ctf) open(root+"/stepstats_%s.pbtxt"%(log_fn,), "w").write(str( run_metadata.step_stats)) timeline_counter+=1 return results
def run_shit(): sess = tf.Session() run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() sess.run(tf.initialize_all_variables()) train_step_ = sess.run([train_step], options=run_options, run_metadata=run_metadata, )#feed_dict={x: [[2,3],[5,1]]}) tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open('o_100.json', 'w') as f: f.write(ctf)
def train(self, images, labels): self.steps += 1 feed_dict = {self.images: images, self.labels: labels} if self.steps == 1: metadata = tf.RunMetadata() self.session.run(self.training, feed_dict, options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), run_metadata = metadata) self.summary_writer.add_run_metadata(metadata, 'step1') elif self.steps % 100 == 0: _, summary = self.session.run([self.training, self.summaries['training']], feed_dict) self.summary_writer.add_summary(summary, self.steps) else: self.session.run(self.training, feed_dict)
def testExtractGatedGrpcTensorsFoundGatedGrpcOps(self): with tf.Session() as sess: z, run_options = self._createTestGraphAndRunOptions(sess, gated_grpc=True) sess.run(tf.global_variables_initializer()) run_metadata = tf.RunMetadata() self.assertAllClose( [10.0], sess.run(z, options=run_options, run_metadata=run_metadata)) graph_wrapper = debug_graphs_helper.DebugGraphWrapper( run_metadata.partition_graphs[0]) gated_debug_ops = graph_wrapper.get_gated_grpc_tensors() # Verify that the op types are available. for item in gated_debug_ops: self.assertTrue(item[1]) # Strip out the op types before further checks, because op type names can # change in the future (e.g., 'VariableV2' --> 'VariableV3'). gated_debug_ops = [ (item[0], item[2], item[3]) for item in gated_debug_ops] self.assertIn(('a', 0, 'DebugIdentity'), gated_debug_ops) self.assertIn(('a/read', 0, 'DebugIdentity'), gated_debug_ops) self.assertIn(('b', 0, 'DebugIdentity'), gated_debug_ops) self.assertIn(('b/read', 0, 'DebugIdentity'), gated_debug_ops) self.assertIn(('c', 0, 'DebugIdentity'), gated_debug_ops) self.assertIn(('c/read', 0, 'DebugIdentity'), gated_debug_ops) self.assertIn(('d', 0, 'DebugIdentity'), gated_debug_ops) self.assertIn(('d/read', 0, 'DebugIdentity'), gated_debug_ops) self.assertIn(('x', 0, 'DebugIdentity'), gated_debug_ops) self.assertIn(('y', 0, 'DebugIdentity'), gated_debug_ops) self.assertIn(('z', 0, 'DebugIdentity'), gated_debug_ops)
def testGraphDefProperty(self): with tf.Session() as sess: z, run_options = self._createTestGraphAndRunOptions(sess, gated_grpc=True) sess.run(tf.global_variables_initializer()) run_metadata = tf.RunMetadata() self.assertAllClose( [10.0], sess.run(z, options=run_options, run_metadata=run_metadata)) graph_wrapper = debug_graphs_helper.DebugGraphWrapper( run_metadata.partition_graphs[0]) self.assertProtoEquals( run_metadata.partition_graphs[0], graph_wrapper.graph_def)
def testExtractGatedGrpcTensorsFoundNoGatedGrpcOps(self): with tf.Session() as sess: z, run_options = self._createTestGraphAndRunOptions(sess, gated_grpc=False) sess.run(tf.global_variables_initializer()) run_metadata = tf.RunMetadata() self.assertAllClose( [10.0], sess.run(z, options=run_options, run_metadata=run_metadata)) graph_wrapper = debug_graphs_helper.DebugGraphWrapper( run_metadata.partition_graphs[0]) gated_debug_ops = graph_wrapper.get_gated_grpc_tensors() self.assertEqual([], gated_debug_ops)
def generate_run(self, run_name, include_graph): """Create a run with a text summary, metadata, and optionally a graph.""" tf.reset_default_graph() k1 = tf.constant(math.pi, name='k1') k2 = tf.constant(math.e, name='k2') result = (k1 ** k2) - k1 expected = tf.constant(20.0, name='expected') error = tf.abs(result - expected, name='error') message_prefix_value = 'error ' * 1000 true_length = len(message_prefix_value) assert true_length > self._MESSAGE_PREFIX_LENGTH_LOWER_BOUND, true_length message_prefix = tf.constant(message_prefix_value, name='message_prefix') error_message = tf.string_join([message_prefix, tf.as_string(error, name='error_string')], name='error_message') summary_message = tf.summary.text('summary_message', error_message) sess = tf.Session() writer = tf.summary.FileWriter(os.path.join(self.logdir, run_name)) if include_graph: writer.add_graph(sess.graph) options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() s = sess.run(summary_message, options=options, run_metadata=run_metadata) writer.add_summary(s) writer.add_run_metadata(run_metadata, self._METADATA_TAG) writer.close()
def test_run_metadata(self): self.set_up_with_runs() (metadata_pbtxt, mime_type) = self.plugin.run_metadata_impl( self._RUN_WITH_GRAPH, self._METADATA_TAG) self.assertEqual(mime_type, 'text/x-protobuf') text_format.Parse(metadata_pbtxt, tf.RunMetadata()) # If it parses, we're happy.
def basic_train(loss_op, update_op, profile=0, save_dir='asset/unamed', **kwargs): profile_state = _ShouldProfile(profile) @stf.sg_train_func def train_func(sess, arg): profile_state.increment() if profile_state.should_profile(): options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() else: options = None run_metadata = None loss = sess.run([loss_op] + update_op, options=options, run_metadata=run_metadata)[0] if profile_state.should_profile(): tl = tf_timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open(path.join(save_dir, 'timeline.json'), 'w') as fd: print(ctf, file=fd) return loss # run train function train_func(save_dir=save_dir, **kwargs)
def main(argv=None): mnist = input_data.read_data_sets('MNIST_data', one_hot=True) with tf.device('/gpu:2'): real_data, z, opt_g, opt_d = build_graph() summary_op = tf.merge_all_summaries() saver = tf.train.Saver() npad = ((0, 0), (2, 2), (2, 2)) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph) for i in xrange(FLAGS.max_iter_step): train_img = mnist.train.next_batch(FLAGS.batch_size)[0] train_img = np.reshape(train_img, (-1, 28, 28)) train_img = np.pad(train_img, pad_width=npad, mode='constant', constant_values=0) train_img = np.expand_dims(train_img, -1) batch_z = np.random.normal(0, 1.0, [FLAGS.batch_size, FLAGS.z_dim]) \ .astype(np.float32) feed_dict = {real_data: train_img, z: batch_z} if i % 100 == 99: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, merged = sess.run([opt_g, summary_op], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) _, merged = sess.run([opt_g, summary_op], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) summary_writer.add_summary(merged, i) summary_writer.add_run_metadata( run_metadata, 'generator_metadata{}'.format(i), i) _, merged = sess.run([opt_d, summary_op], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) summary_writer.add_summary(merged, i) summary_writer.add_run_metadata( run_metadata, 'discriminator_metadata{}'.format(i), i) else: sess.run(opt_g, feed_dict=feed_dict) sess.run(opt_d, feed_dict=feed_dict) if i % 1000 == 999: saver.save(sess, os.path.join( FLAGS.ckpt_dir, "model.ckpt"), global_step=i)
def train(session): batch_size = 200 session.run(tf.global_variables_initializer()) run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) # (*) run_metadata = tf.RunMetadata() # Training cycle for epoch in range(10): epoch_loss = 0.0 batch_steps = mnist.train.num_examples / batch_size for step in range(batch_steps): batch_x, batch_y = mnist.train.next_batch(batch_size) _, c = session.run( [train_op, loss], feed_dict={x: batch_x, y: batch_y}, options=run_options, run_metadata=run_metadata # (*) ) epoch_loss += c / batch_steps print "[%s] Epoch %02d, Loss = %.6f" % (datetime.now(), epoch, epoch_loss) # Dump profiling data (*) prof_timeline = tf.python.client.timeline.Timeline(run_metadata.step_stats) prof_ctf = prof_timeline.generate_chrome_trace_format() with open('./prof_ctf.json', 'w') as fp: print 'Dumped to prof_ctf.json' fp.write(prof_ctf) # Test model correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print "Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels})
def run_training_batch(self, session, batch): """ A batch contains input tensors for words, pos, lemmas, preds, preds_idx, and labels (in that order) Runs the model on the batch (through train_op if train=True) Returns the loss """ feed_dict = self.batch_to_feed(batch) feed_dict[self.use_dropout_placeholder] = 1.0 fetches = [self.loss, self.train_op] # options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) # run_metadata = tf.RunMetadata() loss, _ = session.run(fetches, feed_dict=feed_dict) # loss, _ = session.run(fetches, # feed_dict=feed_dict, # options=options, # run_metadata=run_metadata) # fetched_timeline = timeline.Timeline(run_metadata.step_stats) # chrome_trace = fetched_timeline.generate_chrome_trace_format() # with open('timeline.json', 'w') as f: # f.write(chrome_trace) return loss
def run_step(self): """ Simply run self.train_op""" self.sess.run(self.train_op) #run_metadata = tf.RunMetadata() #self.sess.run([self.train_op], #options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), #run_metadata=run_metadata #) #from tensorflow.python.client import timeline #trace = timeline.Timeline(step_stats=run_metadata.step_stats) #trace_file = open('timeline.ctf.json', 'w') #trace_file.write(trace.generate_chrome_trace_format()) #import sys; sys.exit()
def trace(config, sess, model, train_data): run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() X, Q, Y = random_batch(*train_data, config.batch_size) model.batch_fit(X, Q, Y, learning_rate, run_options, run_metadata) train_writer.add_run_metadata(run_metadata, 'step%d' % step) from tensorflow.python.client import timeline tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open('timeline.json', 'w') as f: f.write(ctf) return
def benchmark_one_step(sess, fetches, step, batch_size, step_train_times, trace_filename, image_producer, params, summary_op=None): """Advance one step of benchmarking.""" if trace_filename and step == -1: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() else: run_options = None run_metadata = None summary_str = None start_time = time.time() if summary_op is None: results = sess.run(fetches, options=run_options, run_metadata=run_metadata) else: (results, summary_str) = sess.run( [fetches, summary_op], options=run_options, run_metadata=run_metadata) if not params.forward_only: lossval = results['total_loss'] else: lossval = 0. image_producer.notify_image_consumption() train_time = time.time() - start_time step_train_times.append(train_time) if step >= 0 and (step == 0 or (step + 1) % params.display_every == 0): log_str = '%i\t%s\t%.3f' % ( step + 1, get_perf_timing_str(batch_size, step_train_times), lossval) if 'top_1_accuracy' in results: log_str += '\t%.3f\t%.3f' % (results['top_1_accuracy'], results['top_5_accuracy']) log_fn(log_str) if trace_filename and step == -1: log_fn('Dumping trace to %s' % trace_filename) trace = timeline.Timeline(step_stats=run_metadata.step_stats) with gfile.Open(trace_filename, 'w') as trace_file: trace_file.write(trace.generate_chrome_trace_format(show_memory=True)) return summary_str
def train(self, nIter, machine=None, summary_op=None): # Xh = self._validate(machine=machine, n=10) run_metadata = tf.RunMetadata() # summary_op = tf.summary.merge_all() sv = tf.train.Supervisor( logdir=self.dirs['logdir'], # summary_writer=summary_writer, # summary_op=None, # is_chief=True, # save_model_secs=600, global_step=self.opt['global_step']) # sess_config = configure_gpu_settings(args.gpu_cfg) sess_config = tf.ConfigProto( allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True)) with sv.managed_session(config=sess_config) as sess: sv.loop(60, self._refresh_status, (sess,)) for step in range(self.arch['training']['max_iter']): if sv.should_stop(): break # main loop for _ in range(self.arch['training']['nIterD']): sess.run(self.opt['d']) sess.run(self.opt['g']) # # output img # if step % 1000 == 0: # xh = sess.run(Xh) # with tf.gfile.GFile( # os.path.join( # self.dirs['logdir'], # 'img-anime-{:03d}k.png'.format(step // 1000), # ), # mode='wb', # ) as fp: # fp.write(xh)
def train(self, nIter, machine=None, summary_op=None): Xh = self._validate(machine=machine, n=10) run_metadata = tf.RunMetadata() # summary_op = tf.summary.merge_all() sv = tf.train.Supervisor( logdir=self.dirs['logdir'], # summary_writer=summary_writer, # summary_op=None, # is_chief=True, # save_model_secs=600, global_step=self.opt['global_step']) # sess_config = configure_gpu_settings(args.gpu_cfg) sess_config = tf.ConfigProto( allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True)) with sv.managed_session(config=sess_config) as sess: sv.loop(60, self._refresh_status, (sess,)) for step in range(self.arch['training']['max_iter']): if sv.should_stop(): break # main loop for _ in range(self.arch['training']['nIterD']): sess.run(self.opt['d']) sess.run(self.opt['g']) # output img if step % 1000 == 0: xh = sess.run(Xh) with tf.gfile.GFile( os.path.join( self.dirs['logdir'], 'img-anime-{:03d}k.png'.format(step // 1000), ), mode='wb', ) as fp: fp.write(xh)
def load_data(self, sess, inputs, full_trace=False): """Bulk loads the specified inputs into device memory. The shape of the inputs must conform to the shapes of the input placeholders this optimizer was constructed with. The data is split equally across all the devices. If the data is not evenly divisible by the batch size, excess data will be discarded. Args: sess: TensorFlow session. inputs: List of Tensors matching the input placeholders specified at construction time of this optimizer. full_trace: Whether to profile data loading. Returns: The number of tuples loaded per device. """ feed_dict = {} assert len(self.input_placeholders) == len(inputs) for ph, arr in zip(self.input_placeholders, inputs): truncated_arr = make_divisible_by(arr, self.batch_size) feed_dict[ph] = truncated_arr truncated_len = len(truncated_arr) if full_trace: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) else: run_options = tf.RunOptions(trace_level=tf.RunOptions.NO_TRACE) run_metadata = tf.RunMetadata() sess.run( [t.init_op for t in self._towers], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) if full_trace: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open(os.path.join(self.logdir, "timeline-load.json"), "w") trace_file.write(trace.generate_chrome_trace_format()) tuples_per_device = truncated_len / len(self.devices) assert tuples_per_device > 0, \ "Too few tuples per batch, trying increasing the training " \ "batch size or decreasing the sgd batch size. Tried to split up " \ "{} rows {}-ways in batches of {} (total across devices).".format( len(arr), len(self.devices), self.batch_size) assert tuples_per_device % self.per_device_batch_size == 0 return tuples_per_device
def optimize(self, sess, batch_index, extra_ops=[], extra_feed_dict={}, file_writer=None): """Run a single step of SGD. Runs a SGD step over a slice of the preloaded batch with size given by self.per_device_batch_size and offset given by the batch_index argument. Updates shared model weights based on the averaged per-device gradients. Args: sess: TensorFlow session. batch_index: Offset into the preloaded data. This value must be between `0` and `tuples_per_device`. The amount of data to process is always fixed to `per_device_batch_size`. extra_ops: Extra ops to run with this step (e.g. for metrics). extra_feed_dict: Extra args to feed into this session run. file_writer: If specified, tf metrics will be written out using this. Returns: The outputs of extra_ops evaluated over the batch. """ if file_writer: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) else: run_options = tf.RunOptions(trace_level=tf.RunOptions.NO_TRACE) run_metadata = tf.RunMetadata() feed_dict = {self._batch_index: batch_index} feed_dict.update(extra_feed_dict) outs = sess.run( [self._train_op] + extra_ops, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) if file_writer: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open(os.path.join(self.logdir, "timeline-sgd.json"), "w") trace_file.write(trace.generate_chrome_trace_format()) file_writer.add_run_metadata( run_metadata, "sgd_train_{}".format(batch_index)) return outs[1:]
def main(): mnist = input_data.read_data_sets('MNIST_data', one_hot=True) with tf.device('/gpu:1'): g_loss_sum, d_loss_sum, img_sum, opt_g, opt_d, z, real_data = build_graph() summary_g = tf.merge_summary([g_loss_sum, img_sum]) summary_d = tf.merge_summary([d_loss_sum, img_sum]) saver = tf.train.Saver() npad = ((0, 0), (2, 2), (2, 2)) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph) for i in xrange(FLAGS.max_iter_step): train_data = mnist.train.next_batch(FLAGS.batch_size) train_img = np.reshape(train_data[0], (-1, 28, 28)) train_img = np.pad(train_img, pad_width=npad, mode='constant', constant_values=0) train_img = np.expand_dims(train_img, -1) batch_z = np.random.uniform(-1, 1, [FLAGS.batch_size, FLAGS.z_dim]) \ .astype(np.float32) feed_dict = {real_data[0]: train_img, z: batch_z, real_data[1]:train_data[1]} if i % 100 == 99: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, merged = sess.run([opt_g, summary_g], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) summary_writer.add_summary(merged, i) summary_writer.add_run_metadata( run_metadata, 'generator_metadata {}'.format(i), i) _, merged = sess.run([opt_g, summary_g], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) summary_writer.add_summary(merged, i) summary_writer.add_run_metadata( run_metadata, 'second_generator_metadata {}'.format(i), i) _, merged = sess.run([opt_d, summary_d], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) summary_writer.add_summary(merged, i) summary_writer.add_run_metadata( run_metadata, 'discriminator_metadata {}'.format(i), i) else: sess.run(opt_g, feed_dict=feed_dict) sess.run(opt_g, feed_dict=feed_dict) sess.run(opt_d, feed_dict=feed_dict) if i % 1000 == 999: saver.save(sess, os.path.join( FLAGS.ckpt_dir, "model.ckpt"), global_step=i)
def train(): batch_size = 10 print "Starting ABC-CNN training" vqa = dl.load_questions_answers('data') # Create subset of data for over-fitting sub_vqa = {} sub_vqa['training'] = vqa['training'][:10] sub_vqa['validation'] = vqa['validation'][:10] sub_vqa['answer_vocab'] = vqa['answer_vocab'] sub_vqa['question_vocab'] = vqa['question_vocab'] sub_vqa['max_question_length'] = vqa['max_question_length'] train_size = len(vqa['training']) max_itr = (train_size // batch_size) * 10 with tf.Session() as sess: image, ques, ans, optimizer, loss, accuracy = abc.model(sess, batch_size) print "Defined ABC model" train_loader = util.get_batch(sess, vqa, batch_size, 'training') print "Created train dataset generator" valid_loader = util.get_batch(sess, vqa, batch_size, 'validation') print "Created validation dataset generator" writer = abc.write_tensorboard(sess) init = tf.global_variables_initializer() merged = tf.summary.merge_all() sess.run(init) print "Initialized Tensor variables" itr = 1 while itr < max_itr: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, vgg_batch, ques_batch, answer_batch = train_loader.next() _, valid_vgg_batch, valid_ques_batch, valid_answer_batch = valid_loader.next() sess.run(optimizer, feed_dict={image: vgg_batch, ques: ques_batch, ans: answer_batch}) [train_summary, train_loss, train_accuracy] = sess.run([merged, loss, accuracy], feed_dict={image: vgg_batch, ques: ques_batch, ans: answer_batch}, options=run_options, run_metadata=run_metadata) [valid_loss, valid_accuracy] = sess.run([loss, accuracy], feed_dict={image: valid_vgg_batch, ques: valid_ques_batch, ans: valid_answer_batch}) writer.add_run_metadata(run_metadata, 'step%03d' % itr) writer.add_summary(train_summary, itr) writer.flush() print "Iteration:%d\tTraining Loss:%f\tTraining Accuracy:%f\tValidation Loss:%f\tValidation Accuracy:%f"%( itr, train_loss, 100.*train_accuracy, valid_loss, 100.*valid_accuracy) itr += 1
def train_step(sess, train_op, global_step, train_step_kwargs): """Function that takes a gradient step and specifies whether to stop. Args: sess: The current session. train_op: A dictionary of `Operation` that evaluates the gradients and returns the total loss (for first) in case of iter_size > 1. global_step: A `Tensor` representing the global training step. train_step_kwargs: A dictionary of keyword arguments. Returns: The total loss and a boolean indicating whether or not to stop training. """ start_time = time.time() if FLAGS.iter_size == 1: # for debugging specific endpoint values, # set the train file to one image and use # pdb here # import pdb # pdb.set_trace() if FLAGS.profile_iterations: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() total_loss, np_global_step = sess.run([train_op, global_step], options=run_options, run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open(os.path.join(FLAGS.train_dir, 'timeline_%08d.json' % np_global_step), 'w') as f: f.write(ctf) else: total_loss, np_global_step = sess.run([train_op, global_step]) else: for j in range(FLAGS.iter_size-1): sess.run([train_op[j]]) total_loss, np_global_step = sess.run( [train_op[FLAGS.iter_size-1], global_step]) time_elapsed = time.time() - start_time if 'should_log' in train_step_kwargs: if sess.run(train_step_kwargs['should_log']): logging.info('%s: global step %d: loss = %.4f (%.2f sec)', datetime.now(), np_global_step, total_loss, time_elapsed) if 'should_stop' in train_step_kwargs: should_stop = sess.run(train_step_kwargs['should_stop']) else: should_stop = False return total_loss, should_stop