我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用data_utils.sentence_to_token_ids()。
def decode_line(sess, model, enc_vocab, rev_dec_vocab, sentence): # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), enc_vocab) # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch({bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] return " ".join([tf.compat.as_str(rev_dec_vocab[output]) for output in outputs])
def decode_line(sess, model, enc_vocab, rev_dec_vocab, sentence): # Get token-ids for the input sentence. #print(sentence) #token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), enc_vocab) token_ids = data_utils.sentence_to_token_ids(sentence, enc_vocab) print(token_ids) # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch({bucket_id: [(token_ids, [])]}, bucket_id) _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] return "".join([tf.compat.as_str(rev_dec_vocab[output]) for output in outputs])
def run(self, sentence): # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(sentence, self.en_vocab) # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = self.model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = self.model.step(self.sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. return "".join([self.rev_fr_vocab[output] for output in outputs])
def get_predicted_sentence(input_sentence, vocab, rev_vocab, model, sess): input_token_ids = data_utils.sentence_to_token_ids(input_sentence, vocab) print(input_token_ids) # Which bucket does it belong to? if len(input_token_ids)>=BUCKETS[-1][0]: input_token_ids = input_token_ids[:BUCKETS[-1][0]-1] bucket_id = min([b for b in xrange(len(BUCKETS)) if BUCKETS[b][0] > len(input_token_ids)]) outputs = [] feed_data = {bucket_id: [(input_token_ids, outputs)]} # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch(feed_data, bucket_id) global_memory['inp']=1 # Get output logits for the sentence. _,_,output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, forward_only=True,beam_search=True) #print('global_output:') #print(global_output) outputs = [] # This is a greedy decoder - outputs are just argmaxes of output_logits. for logit in output_logits: selected_token_id = int(np.argmax(logit, axis=1)) if selected_token_id == data_utils.EOS_ID: break else: outputs.append(selected_token_id) # Forming output sentence on natural language outputs = ' '.join([rev_vocab[i] for i in outputs]) return outputs
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. src_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.source" % FLAGS.src_vocab_size) tar_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.target" % FLAGS.tar_vocab_size) src_vocab, _ = data_utils.initialize_vocabulary(src_vocab_path) _, rev_tar_vocab = data_utils.initialize_vocabulary(tar_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() tokenizer = data_utils.basic_char_tokenizer if FLAGS.char else None while sentence: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), src_vocab, tokenizer) # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out target-text sentence corresponding to outputs. print(("" if FLAGS.char else " ").join([tf.compat.as_str(rev_tar_vocab[output]) for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. _, _, _, _, en_vocab_path, fr_vocab_path = data_utils.prepare_my_data( FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab) # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. print(" ".join([tf.compat.as_str(rev_fr_vocab[output]) for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. enc_vocab_path = os.path.join(gConfig['working_directory'],"vocab%d.enc" % gConfig['enc_vocab_size']) dec_vocab_path = os.path.join(gConfig['working_directory'],"vocab%d.dec" % gConfig['dec_vocab_size']) enc_vocab, _ = data_utils.initialize_vocabulary(enc_vocab_path) _, rev_dec_vocab = data_utils.initialize_vocabulary(dec_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), enc_vocab) # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. print(" ".join([tf.compat.as_str(rev_dec_vocab[output]) for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): """ Decode file sentence-by-sentence """ with tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=NUM_THREADS)) as sess: # Create model and load parameters. with tf.variable_scope("model"): model, steps_done = create_model(sess, True, attention=FLAGS.attention, model_path=FLAGS.model_path) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. sents_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.sents" % FLAGS.input_vocab_size) parse_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.parse" % FLAGS.output_vocab_size) sents_vocab, _ = data_utils.initialize_vocabulary(sents_vocab_path) _, rev_parse_vocab = data_utils.initialize_vocabulary(parse_vocab_path) start_time = time.time() # Decode with open(FLAGS.decode_input_path, 'r') as fin, open(FLAGS.decode_output_path, 'w') as fout: for line in fin: sentence = line.strip() token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), sents_vocab) try: bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) except: print("Input sentence does not fit in any buckets. Skipping... ") print("\t", line) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch({bucket_id: [(token_ids, [])]}, bucket_id) _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs,target_weights, bucket_id, True) outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] decoded_sentence = " ".join([tf.compat.as_str(rev_parse_vocab[output]) for output in outputs]) + '\n' fout.write(decoded_sentence) time_elapsed = time.time() - start_time print("Decoding time: ", time_elapsed)
def post(self): data_received = request.json if not data_received: data_received = eval(request.form["payload"]) sentence = data_received["text"] print(sentence) token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab) # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] >= len(token_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. response = (" ".join([tf.compat.as_str(rev_fr_vocab[output]) for output in outputs])) print(response) return jsonify({"text":response})
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.en" % FLAGS.en_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.fr" % FLAGS.fr_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab) # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. print(" ".join([tf.compat.as_str(rev_fr_vocab[output]) for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) config = tf.ConfigProto(gpu_options=gpu_options) with tf.Session(config=config) as sess: model = create_model(sess, True) model.batch_size = 1 enc_vocab_path = os.path.join(working_directory,"vocab%d.enc" % enc_vocab_size) dec_vocab_path = os.path.join(working_directory,"vocab%d.dec" % dec_vocab_size) enc_vocab, _ = data_utils.initialize_vocabulary(enc_vocab_path) _, rev_dec_vocab = data_utils.initialize_vocabulary(dec_vocab_path) sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), enc_vocab) bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] print(" ".join([tf.compat.as_str(rev_dec_vocab[output]) for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) config = tf.ConfigProto(gpu_options=gpu_options) with tf.Session(config=config) as sess: model = create_model(sess, True) model.batch_size = 1 enc_vocab_path = os.path.join(working_directory,"vocab%d.enc" % enc_vocab_size) dec_vocab_path = os.path.join(working_directory,"vocab%d.dec" % dec_vocab_size) enc_vocab, _ = data_utils.initialize_vocabulary(enc_vocab_path) _, rev_dec_vocab = data_utils.initialize_vocabulary(dec_vocab_path) print('Start chatting...') @bot.message_handler(func=lambda sentence: True) def reply_all(message): sentence = (message.text).lower() token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), enc_vocab) bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] message_text = " ".join([tf.compat.as_str(rev_dec_vocab[output]) for output in outputs]) bot.reply_to(message, message_text) while True: try: bot.polling(none_stop=True) except Exception as ex: print(str(ex)) bot.stop_polling() time.sleep(5) bot.polling()
def decode(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) config = tf.ConfigProto(gpu_options=gpu_options) with tf.Session(config=config) as sess: model = create_model(sess, True) model.batch_size = 1 enc_vocab_path = os.path.join(working_directory,"vocab%d.enc" % enc_vocab_size) dec_vocab_path = os.path.join(working_directory,"vocab%d.dec" % dec_vocab_size) enc_vocab, _ = data_utils.initialize_vocabulary(enc_vocab_path) _, rev_dec_vocab = data_utils.initialize_vocabulary(dec_vocab_path) sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), enc_vocab) bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] if sentence[:-1] in lines: temp_output = " ".join([tf.compat.as_str(rev_dec_vocab[output]) for output in outputs]) trigger_check = trigger_activator(temp_output) if trigger_check == True: print(" ".join([tf.compat.as_str(rev_dec_vocab[output]) for output in outputs[:-1]])) else: print(temp_output) else: print('i dont understand you') print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline() #Check if there is a trigger in the decoded sentence
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. enc_vocab_path = os.path.join(gConfig['working_directory'],"vocab%d_enc.txt" % gConfig['enc_vocab_size']) dec_vocab_path = os.path.join(gConfig['working_directory'],"vocab%d_dec.txt" % gConfig['dec_vocab_size']) enc_vocab, _ = data_utils.initialize_vocabulary(enc_vocab_path) _, rev_dec_vocab = data_utils.initialize_vocabulary(dec_vocab_path) # Decode sentence and store it with open(gConfig["test_enc"], 'r') as test_enc: with open(gConfig["output"], 'w') as predicted_headline: sentence_count = 0 for sentence in test_enc: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(sentence, enc_vocab) # Which bucket does it belong to? And place the sentence to the last bucket if its token length is larger then X. bucket_id = min([b for b in range(len(_buckets)) if _buckets[b][0] > len(token_ids)] + [len(_buckets)-1]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Write predicted headline corresponding to article. predicted_headline.write(" ".join([tf.compat.as_str(rev_dec_vocab[output]) for output in outputs])+'\n') sentence_count += 1 if sentence_count % 100 == 0: print("predicted data line %d" % sentence_count) sys.stdout.flush() predicted_headline.close() test_enc.close() print("Finished decoding and stored predicted results in %s!" % gConfig["output"])
def decode_input(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. enc_vocab_path = os.path.join(gConfig['working_directory'],"vocab%d_enc.txt" % gConfig['enc_vocab_size']) dec_vocab_path = os.path.join(gConfig['working_directory'],"vocab%d_dec.txt" % gConfig['dec_vocab_size']) enc_vocab, _ = data_utils.initialize_vocabulary(enc_vocab_path) _, rev_dec_vocab = data_utils.initialize_vocabulary(dec_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(sentence, enc_vocab) # Which bucket does it belong to? And place the sentence to the last bucket if its token length is larger then the bucket length. bucket_id = min([b for b in range(len(_buckets)) if _buckets[b][0] > len(token_ids)] + [len(_buckets)-1]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. print(" ".join([tf.compat.as_str(rev_dec_vocab[output]) for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.en" % FLAGS.en_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.fr" % FLAGS.fr_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. token_idsgb = data_utils.sentence_to_token_ids(sentence, en_vocab) # Truncate to the maximum bucket size token_ids = token_idsgb[0:119] # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. print(" ".join([rev_fr_vocab[output] for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.en" % FLAGS.en_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.fr" % FLAGS.fr_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. token_idsgb = data_utils.sentence_to_token_ids(sentence, en_vocab) # Truncate sentence to the maximum bucket size token_ids = token_idsgb[0:479] # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. print(" ".join([rev_fr_vocab[output] for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): # Only allocate part of the gpu memory when predicting. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) config = tf.ConfigProto(gpu_options=gpu_options) with tf.Session(config=config) as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. enc_vocab_path = os.path.join(gConfig['working_directory'],"vocab%d.enc" % gConfig['enc_vocab_size']) dec_vocab_path = os.path.join(gConfig['working_directory'],"vocab%d.dec" % gConfig['dec_vocab_size']) enc_vocab, _ = data_utils.initialize_vocabulary(enc_vocab_path) _, rev_dec_vocab = data_utils.initialize_vocabulary(dec_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), enc_vocab) # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. print(" ".join([tf.compat.as_str(rev_dec_vocab[output]) for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.to" % FLAGS.to_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab) # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] >= len(token_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. print(" ".join([tf.compat.as_str(rev_fr_vocab[output]) for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. en_vocab_path = os.path.join(FLAGS.data_dir, "vocab") fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab") en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab) # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] >= len(token_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. print(" ".join([tf.compat.as_str(rev_fr_vocab[output]) for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. first_vocab_path = os.path.join(FLAGS.train_dir, "vocab%d.first" % FLAGS.first_vocab_size) last_vocab_path = os.path.join(FLAGS.train_dir, "vocab%d.last" % FLAGS.last_vocab_size) first_vocab, _ = data_utils.initialize_vocabulary(first_vocab_path) _, rev_last_vocab = data_utils.initialize_vocabulary(last_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = FLAGS.input # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(sentence, first_vocab) # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. result = (" ".join([rev_last_vocab[output] for output in outputs])) print(result) output = os.path.join(FLAGS.output_dir, str(int(time.time())) + ".txt") with open(output, "w") as text_file: text_file.write(result) print(output) sys.stdout.flush()
def decode(): config_ = tf.ConfigProto() config_.gpu_options.allow_growth = True config_.allow_soft_placement = True with tf.Session(config=config_) as sess: # Create model and load parameters. model = create_model(sess, True, False) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. from_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) to_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.to" % FLAGS.to_vocab_size) from_vocab, _ = data_utils.initialize_vocabulary(from_vocab_path) _, rev_to_vocab = data_utils.initialize_vocabulary(to_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(sentence, from_vocab) print(token_ids) # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] >= len(token_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out logical form corresponding to outputs. print(" ".join([tf.compat.as_str(rev_to_vocab[output]) for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.en" % FLAGS.en_vocab_size) amr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.amr" % FLAGS.fr_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(amr_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. token_idsgb = data_utils.sentence_to_token_ids(sentence, en_vocab, normalize_digits=True) # Truncate to the maximum bucket size token_ids = token_idsgb[0:119] # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # print(output_logits) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. # print("Length: %s" % (len(outputs))) print(" ".join([rev_fr_vocab[output] for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode(): ''' Manually input sentence interactively and the headline will be printed out ''' with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = FLAGS.batch_size #repeat single sentence 10 times as one batch # We decode one sentence at a time. # Load vocabularies. vocab_path = os.path.join(FLAGS.data_dir,"vocab") vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) # Decode from standard input interactively sys.stdout.write("> ") sys.stdout.flush() #?????????????? sentence = sys.stdin.readline() while sentence: if (len(sentence.strip('\n')) == 0): sys.stdout.flush() sentence = sys.stdin.readline() # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), vocab) # print (token_ids) # print token ids # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(buckets)) if buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. # print ("current bucket id" + str(bucket_id)) encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits_batch = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. output_logits = [] for item in output_logits_batch: output_logits.append(item[0]) #print (output_logits) #print (len(output_logits)) #print (output_logits[0]) outputs = [int(np.argmax(logit)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] print(" ".join([tf.compat.as_str(rev_vocab[output]) for output in outputs])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def test_decoder(config): train_path = os.path.join(config.train_dir, "chitchat.train") data_path_list = [train_path + ".answer", train_path + ".query"] vocab_path = os.path.join(config.train_dir, "vocab%d.all" % config.vocab_size) data_utils.create_vocabulary(vocab_path, data_path_list, config.vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) with tf.Session() as sess: if config.name_model in [gst_config.name_model, gcc_config.name_model, gbk_config.name_model]: model = create_st_model(sess, config, forward_only=True, name_scope=config.name_model) elif config.name_model in [grl_config.name_model, pre_grl_config.name_model]: model = create_rl_model(sess, config, forward_only=True, name_scope=config.name_model) model.batch_size = 1 sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), vocab) print("token_id: ", token_ids) bucket_id = len(config.buckets) - 1 for i, bucket in enumerate(config.buckets): if bucket[0] >= len(token_ids): bucket_id = i break else: print("Sentence truncated: %s", sentence) encoder_inputs, decoder_inputs, target_weights, _, _ = model.get_batch({bucket_id: [(token_ids, [1])]}, bucket_id) # st_model step if config.name_model in [gst_config.name_model, gcc_config.name_model, gbk_config.name_model]: output_logits, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] print(" ".join([str(rev_vocab[output]) for output in outputs])) # beam_search step elif config.name_model in [grl_config.name_model, pre_grl_config.name_model]: _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, reward=1, bucket_id=bucket_id, forward_only=True) for i, output in enumerate(output_logits): print("index: %d, answer tokens: %s" %(i, str(output))) if data_utils.EOS_ID in output: output = output[:output.index(data_utils.EOS_ID)] print(" ".join([str(rev_vocab[out]) for out in output])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()