我们从Python开源项目中,提取了以下10个代码示例,用于说明如何使用utils.TextLoader()。
def main(_): if len(sys.argv) < 2: print("Please enter a prime") sys.exit() prime = sys.argv[1] prime = prime.decode('utf-8') with open("./log/hyperparams.pkl", 'rb') as f: config = cPickle.load(f) if not os.path.exists(config['checkpoint_dir']): print(" [*] Creating checkpoint directory...") os.makedirs(config['checkpoint_dir']) data_loader = TextLoader(os.path.join(config['data_dir'], config['dataset_name']), config['batch_size'], config['seq_length']) vocab_size = data_loader.vocab_size with tf.variable_scope('model'): model = CharRNN(vocab_size, 1, config['rnn_size'], config['layer_depth'], config['num_units'], 1, config['keep_prob'], config['grad_clip'], is_training=False) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(config['checkpoint_dir'] + '/' + config['dataset_name']) tf.train.Saver().restore(sess, ckpt.model_checkpoint_path) res = model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 100, prime) print(res)
def test(test_args): start = time.time() with open(os.path.join(test_args.save_dir, 'config.pkl')) as f: args = cPickle.load(f) data_loader = TextLoader(args, train=False) test_data = data_loader.read_dataset(test_args.test_file) args.word_vocab_size = data_loader.word_vocab_size print "Word vocab size: " + str(data_loader.word_vocab_size) + "\n" # Model lm_model = WordLM print "Begin testing..." # If using gpu: # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) # gpu_config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) # add parameters to the tf session -> tf.Session(config=gpu_config) with tf.Graph().as_default(), tf.Session() as sess: initializer = tf.random_uniform_initializer(-args.init_scale, args.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): mtest = lm_model(args, is_training=False, is_testing=True) # save only the last model saver = tf.train.Saver(tf.all_variables()) tf.initialize_all_variables().run() ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) test_perplexity = run_epoch(sess, mtest, test_data, data_loader, tf.no_op()) print("Test Perplexity: %.3f" % test_perplexity) print("Test time: %.0f" % (time.time() - start))
def setUp(self): self.data_loader = TextLoader("tests/test_data", batch_size=2, seq_length=5)
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: pickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: pickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) train_loss_iterations = {'iteration': [], 'epoch': [], 'train_loss': [], 'val_loss': []} for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = sess.run(model.initial_state) for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) end = time.time() batch_idx = e * data_loader.num_batches + b print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(batch_idx, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) train_loss_iterations['iteration'].append(batch_idx) train_loss_iterations['epoch'].append(e) train_loss_iterations['train_loss'].append(train_loss) if batch_idx % args.save_every == 0: # evaluate state_val = sess.run(model.initial_state) avg_val_loss = 0 for x_val, y_val in data_loader.val_batches: feed_val = {model.input_data: x_val, model.targets: y_val, model.initial_state: state_val} val_loss, state_val, _ = sess.run([model.cost, model.final_state, model.train_op], feed_val) avg_val_loss += val_loss / len(list(data_loader.val_batches)) print('val_loss: {:.3f}'.format(avg_val_loss)) train_loss_iterations['val_loss'].append(avg_val_loss) checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path)) else: train_loss_iterations['val_loss'].append(None) pd.DataFrame(data=train_loss_iterations, columns=train_loss_iterations.keys()).to_csv(os.path.join(args.save_dir, 'log.csv'))
def test(test_args): start = time.time() with open(os.path.join(test_args.save_dir, 'config.pkl'), 'rb') as f: args = pickle.load(f) args.save_dir = test_args.save_dir data_loader = TextLoader(args, train=False) test_data = data_loader.read_dataset(test_args.test_file) print(args.save_dir) print("Unit: " + args.unit) print("Composition: " + args.composition) args.word_vocab_size = data_loader.word_vocab_size if args.unit != "word": args.subword_vocab_size = data_loader.subword_vocab_size # Statistics of words print("Word vocab size: " + str(data_loader.word_vocab_size)) # Statistics of sub units if args.unit != "word": print("Subword vocab size: " + str(data_loader.subword_vocab_size)) if args.composition == "bi-lstm": if args.unit == "char": args.bilstm_num_steps = data_loader.max_word_len print("Max word length:", data_loader.max_word_len) elif args.unit == "char-ngram": args.bilstm_num_steps = data_loader.max_ngram_per_word print("Max ngrams per word:", data_loader.max_ngram_per_word) elif args.unit == "morpheme" or args.unit == "oracle": args.bilstm_num_steps = data_loader.max_morph_per_word print("Max morphemes per word", data_loader.max_morph_per_word) if args.unit == "word": lm_model = WordModel elif args.composition == "addition": lm_model = AdditiveModel elif args.composition == "bi-lstm": lm_model = BiLSTMModel else: sys.exit("Unknown unit or composition.") print("Begin testing...") with tf.Graph().as_default(), tf.Session() as sess: with tf.variable_scope("model"): mtest = lm_model(args, is_training=False, is_testing=True) # save only the last model saver = tf.train.Saver(tf.all_variables(), max_to_keep=1) tf.initialize_all_variables().run() ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) test_perplexity = run_epoch(sess, mtest, test_data, data_loader, tf.no_op()) print("Test Perplexity: %.3f" % test_perplexity) print("Test time: %.0f\n" % (time.time() - start)) print("\n")
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from assert os.path.isfile(os.path.join(args.init_from,"words_vocab.pkl")),"words_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt,"No checkpoint found" assert ckpt.model_checkpoint_path,"No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = cPickle.load(f) need_be_same=["model","rnn_size","num_layers","seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f: saved_words, saved_vocab = cPickle.load(f) assert saved_words==data_loader.words, "Data and loaded model disagreee on word set!" assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!" with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.words, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = sess.run(model.initial_state) for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) end = time.time() print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) if (e * data_loader.num_batches + b) % args.save_every == 0 \ or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path))