我们从Python开源项目中,提取了以下3个代码示例,用于说明如何使用data_utils.prepare_custom_data()。
def train(): # prepare dataset print("Starting to train from " + working_directory) enc_train, dec_train, _, _ = data_utils.prepare_custom_data(working_directory,train_enc,train_dec,enc_vocab_size,dec_vocab_size) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666) config = tf.ConfigProto(gpu_options=gpu_options) config.gpu_options.allocator_type = 'BFC' with tf.Session(config=config) as sess: print("Creating model with %d layers and %d cells." % (num_layers, layer_size)) model = create_model(sess, False) train_set = read_data(enc_train, dec_train, max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / steps_per_checkpoint loss += step_loss / steps_per_checkpoint current_step += 1 if current_step % steps_per_checkpoint == 0: #perplexity = math.exp(loss) if loss < 300 else float('inf') print ("Saved model at step %d with time %.2f " % (model.global_step.eval(), step_time)) if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) checkpoint_path = os.path.join(working_directory, "seq2seq.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 sys.stdout.flush()
def train(): # prepare dataset print("Starting to train from " + working_directory) enc_train, dec_train, _, _ = data_utils.prepare_custom_data(working_directory,train_enc,train_dec,enc_vocab_size,dec_vocab_size) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666) config = tf.ConfigProto(gpu_options=gpu_options) config.gpu_options.allocator_type = 'BFC' with tf.Session(config=config) as sess: print("Creating model with %d layers and %d cells." % (num_layers, layer_size)) model = create_model(sess, False) train_set = read_data(enc_train, dec_train, max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] count = 0 while True: count += 1 print('Step: ' + str(count)) random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / steps_per_checkpoint loss += step_loss / steps_per_checkpoint current_step += 1 if current_step % steps_per_checkpoint == 0: perplexity = math.exp(loss) if loss < 300 else float('inf') print ("Saved model at step %d with perplexity %.2f " % (model.global_step.eval(), perplexity)) if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) checkpoint_path = os.path.join(working_directory, "seq2seq.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 sys.stdout.flush()