我们从Python开源项目中,提取了以下38个代码示例,用于说明如何使用tensorflow.contrib.rnn.LSTMCell()。
def _create_loss(self): ''' Risk estimation loss function. The output is the planed position we should hold to next day. The change rate of next day is self.y, so we loss two categories of money: - self.y * self.position is trade loss, cost * self.position is constant loss because of tax and like missing profit of buying national debt. Therefore, the loss function is formulated as: 100 * (- self.y * self.position + cost * self.position) = -100 * ((self.y - cost) * self.position) :return: ''' #with tf.device("/cpu:0"): xx = tf.unstack(self.x, self.step, 1) lstm_cell = rnn.LSTMCell(self.hidden_size, forget_bias=1.0, initializer=orthogonal_initializer()) dropout_cell = DropoutWrapper(lstm_cell, input_keep_prob=self.keep_rate, output_keep_prob=self.keep_rate, state_keep_prob=self.keep_rate) outputs, states = rnn.static_rnn(dropout_cell, xx, dtype=tf.float32) signal = tf.matmul(outputs[-1], self.weights['out']) + self.biases['out'] scope = "activation_batch_norm" norm_signal = self.batch_norm_layer(signal, scope=scope) # batch_norm(signal, 0.9, center=True, scale=True, epsilon=0.001, activation_fn=tf.nn.relu6, # is_training=is_training, scope="activation_batch_norm", reuse=False) self.position = tf.nn.relu6(norm_signal, name="relu_limit") / 6. self.avg_position = tf.reduce_mean(self.position) # self.cost = 0.0002 self.loss = -100. * tf.reduce_mean(tf.multiply((self.y - self.cost), self.position, name="estimated_risk"))
def _create_rnn_cell(self): """ Creates a single RNN cell according to the architecture of this RNN. Returns ------- rnn cell A single RNN cell according to the architecture of this RNN """ keep_prob = 1.0 if self.keep_prob is None else self.keep_prob if self.cell_type == CellType.GRU: return DropoutWrapper(GRUCell(self.num_units), keep_prob, keep_prob) elif self.cell_type == CellType.LSTM: return DropoutWrapper(LSTMCell(self.num_units), keep_prob, keep_prob) else: raise ValueError("unknown cell type: {}".format(self.cell_type))
def build_permutation(self): with tf.variable_scope("encoder"): with tf.variable_scope("embedding"): # Embed input sequence W_embed =tf.get_variable("weights", [1,self.input_dimension+2, self.input_embed], initializer=self.initializer) # +2 for TW feat. here too embedded_input = tf.nn.conv1d(self.input_, W_embed, 1, "VALID", name="embedded_input") # Batch Normalization embedded_input = tf.layers.batch_normalization(embedded_input, axis=2, training=self.is_training, name='layer_norm', reuse=None) with tf.variable_scope("dynamic_rnn"): # Encode input sequence cell1 = LSTMCell(self.num_neurons, initializer=self.initializer) # BNLSTMCell(self.num_neurons, self.training) or cell1 = DropoutWrapper(cell1, output_keep_prob=0.9) # Return the output activations [Batch size, Sequence Length, Num_neurons] and last hidden state as tensors. encoder_output, encoder_state = tf.nn.dynamic_rnn(cell1, embedded_input, dtype=tf.float32) with tf.variable_scope('decoder'): # Ptr-net returns permutations (self.positions), with their log-probability for backprop self.ptr = Pointer_decoder(encoder_output, self.config) self.positions, self.log_softmax, self.attending, self.pointing = self.ptr.loop_decode(encoder_state) variable_summaries('log_softmax',self.log_softmax, with_max_min = True)
def inference(self,X,reuse = None,trainMode=True): word_verctors = tf.nn.embedding_lookup(self.words,X) length = self.length(word_verctors) length_64 = tf.cast(length,tf.int64) if trainMode: word_verctors = tf.nn.dropout(word_verctors,0.5) with tf.variable_scope('rnn_fwbw',reuse =reuse) as scope: lstm_fw = rnn.LSTMCell(self.numHidden) lsmt_bw = rnn.LSTMCell(self.numHidden) inputs = tf.unstack(word_verctors,nlp_segment.flags.max_sentence_len,1) output,_,_ = rnn.static_bidirectional_rnn(lstm_fw,lsmt_bw,inputs,sequence_length=length_64,dtype=tf.float32) output = tf.reshape(output,[-1,self.numHidden * 2]) matricized_unary_scores = tf.matmul(output,self.W) + self.b unary_scores = tf.reshape(matricized_unary_scores, [-1,nlp_segment.flags.max_sentence_len,self.distinctTagNum]) return unary_scores,length
def inference(self,X,reuse = None,trainMode=True): word_verctors = tf.nn.embedding_lookup(self.words,X) length = self.length(word_verctors) length_64 = tf.cast(length,tf.int64) if trainMode: word_verctors = tf.nn.dropout(word_verctors,0.5) with tf.variable_scope('rnn_fwbw',reuse =reuse) as scope: lstm_fw = rnn.LSTMCell(self.numHidden) lsmt_bw = rnn.LSTMCell(self.numHidden) inputs = tf.unstack(word_verctors,self.sentence_length,1) output,_,_ = rnn.static_bidirectional_rnn(lstm_fw,lsmt_bw,inputs,sequence_length=length_64,dtype=tf.float32) output = tf.reshape(output,[-1,self.numHidden * 2]) matricized_unary_scores = tf.matmul(output,self.W) + self.b unary_scores = tf.reshape(matricized_unary_scores, [-1,self.sentence_length,self.distinctTagNum]) return unary_scores,length
def _set_train_model(self): """ define train graph :return: """ # Create the internal multi-layer cell for our RNN. if use_lstm: single_cell1 = LSTMCell(self.enc_hidden_size) single_cell2 = LSTMCell(self.dec_hidden_size) else: single_cell1 = GRUCell(self.enc_hidden_size) single_cell2 = GRUCell(self.dec_hidden_size) enc_cell = MultiRNNCell([single_cell1 for _ in range(self.enc_num_layers)]) dec_cell = MultiRNNCell([single_cell2 for _ in range(self.dec_num_layers)]) self.encoder_cell = enc_cell self.decoder_cell = dec_cell self._make_graph(forward_only) self.saver = tf.train.Saver(tf.global_variables())
def RNN(_X, _weights, _biases, lens): if FLAGS.unit == 'PLSTM': cell = PhasedLSTMCell(FLAGS.n_hidden, use_peepholes=True) elif FLAGS.unit == 'GRU': cell = GRUCell(FLAGS.n_hidden) elif FLAGS.unit == 'LSTM': cell = LSTMCell(FLAGS.n_hidden, use_peepholes=True) else: raise ValueError('Unit {} not implemented.'.format(FLAGS.unit)) outputs, states = tf.nn.dynamic_rnn(cell, _X, dtype=tf.float32, sequence_length=lens) # TODO better (?) in lack of smart indexing batch_size = tf.shape(outputs)[0] max_len = tf.shape(outputs)[1] out_size = int(outputs.get_shape()[2]) index = tf.range(0, batch_size) * max_len + (lens - 1) flat = tf.reshape(outputs, [-1, out_size]) relevant = tf.gather(flat, index) return tf.nn.bias_add(tf.matmul(relevant, _weights['out']), _biases['out'])
def BiLSTM(input, input_mask, name): with tf.variable_scope(name): lstm_fw_cell = rnn.LSTMCell(n_hidden, forget_bias=1.0) lstm_fw_cell = tf.contrib.rnn.DropoutWrapper(lstm_fw_cell, state_keep_prob=1.0-dropout, # input_keep_prob=1.0-dropout, input_size=tf.shape(input)[1:], variational_recurrent=True, dtype=tf.float32) lstm_bw_cell = rnn.LSTMCell(n_hidden, forget_bias=1.0) lstm_bw_cell = tf.contrib.rnn.DropoutWrapper(lstm_bw_cell, state_keep_prob=1.0-dropout, # input_keep_prob=1.0-dropout, input_size=tf.shape(input)[1:], variational_recurrent=True,dtype=tf.float32) outputs, states = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, input, dtype=tf.float32) outputs = tf.concat(outputs, axis=-1) * input_mask return outputs
def __init__(self): print("tensorflow version: ", tf.__version__) tf.reset_default_graph() self.encoder_vec_file = "./preprocessing/enc.vec" self.decoder_vec_file = "./preprocessing/dec.vec" self.encoder_vocabulary = "./preprocessing/enc.vocab" self.decoder_vocabulary = "./preprocessing/dec.vocab" self.dictFile = './word_dict.txt' self.batch_size = 1 self.max_batches = 10000 self.show_epoch = 100 self.model_path = './model/' # jieba???? jieba.load_userdict(self.dictFile) self.model = dynamicSeq2seq(encoder_cell=LSTMCell(20), decoder_cell=LSTMCell(40), encoder_vocab_size=540, decoder_vocab_size=1600, embedding_size=20, attention=True, bidirectional=True, debug=False, time_major=True) self.location = ["??", "??", "??", "??","??"] self.user_info = {"__username__":"Stephen", "__location__":"??"} self.robot_info = {"__robotname__":"JiJi"} self.dec_vocab = {} self.enc_vocab = {} tag_location = '' with open(self.encoder_vocabulary, "r") as enc_vocab_file: for index, word in enumerate(enc_vocab_file.readlines()): self.enc_vocab[word.strip()] = index with open(self.decoder_vocabulary, "r") as dec_vocab_file: for index, word in enumerate(dec_vocab_file.readlines()): self.dec_vocab[index] = word.strip()
def encoder_pipeline( sess, data_stream, token2id, embedding_size, encoder_size, bidirectional, decoder_size, attention, checkpoint_path, batch_size=32, use_norm=False, lstm_connection=1): encoder_args = { "cell": rnn.LSTMCell(encoder_size), "bidirectional": bidirectional, } # @TODO: rewrite save-load for no-decoder usage decoder_args = { "cell": rnn.LSTMCell(decoder_size), "attention": attention, } spec_symbols_bias = 3 model = create_model( len(token2id) + spec_symbols_bias, embedding_size, encoder_args, decoder_args) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) for embedding_matr in rnn_encoder_encode_stream( sess, data_stream, model, batch_size, use_norm, lstm_connection=lstm_connection): yield embedding_matr
def __call__(self, inputs, name, training=False): """ Runs the bidirectional LSTM, produces outputs and saves both forward and backward states as well as gradients. :param inputs: The inputs should be a list of shape [sequence_length, batch_size, 64] :param name: Name to give to the tensorflow op :param training: Flag that indicates if this is a training or evaluation stage :return: Returns the LSTM outputs, as well as the forward and backward hidden states. """ with tf.name_scope('bid-lstm' + name), tf.variable_scope('bid-lstm', reuse=self.reuse): with tf.variable_scope("encoder"): fw_lstm_cells_encoder = [rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes))] bw_lstm_cells_encoder = [rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes))] outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn( fw_lstm_cells_encoder, bw_lstm_cells_encoder, inputs, dtype=tf.float32 ) print("out shape", tf.stack(outputs, axis=0).get_shape().as_list()) with tf.variable_scope("decoder"): fw_lstm_cells_decoder = [rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes))] bw_lstm_cells_decoder = [rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes))] outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn( fw_lstm_cells_decoder, bw_lstm_cells_decoder, outputs, dtype=tf.float32 ) self.reuse = True self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='bid-lstm') return outputs, output_state_fw, output_state_bw
def create_model(session, restore_only=False): # with bidirectional encoder, decoder state size should be # 2x encoder state size is_training = tf.placeholder(dtype=tf.bool, name='is_training') encoder_cell = LSTMCell(64) encoder_cell = MultiRNNCell([encoder_cell]*5) decoder_cell = LSTMCell(128) decoder_cell = MultiRNNCell([decoder_cell]*5) model = Seq2SeqModel(encoder_cell=encoder_cell, decoder_cell=decoder_cell, vocab_size=wiki.vocab_size, embedding_size=300, attention=True, bidirectional=True, is_training=is_training, device=args.device, debug=False) saver = tf.train.Saver(tf.global_variables(), keep_checkpoint_every_n_hours=1) checkpoint = tf.train.get_checkpoint_state(checkpoint_dir) if checkpoint: print("Reading model parameters from %s" % checkpoint.model_checkpoint_path) saver.restore(session, checkpoint.model_checkpoint_path) elif restore_only: raise FileNotFoundError("Cannot restore model") else: print("Created model with fresh parameters") session.run(tf.global_variables_initializer()) tf.get_default_graph().finalize() return model, saver
def make_seq2seq_model(**kwargs): args = dict(encoder_cell=LSTMCell(10), decoder_cell=LSTMCell(20), vocab_size=10, embedding_size=10, attention=True, bidirectional=True, debug=False) args.update(kwargs) return Seq2SeqModel(**args)
def build_cell(units, cell_type='lstm', num_layers=1): if num_layers > 1: cell = rnn.MultiRNNCell([ build_cell(units, cell_type, 1) for _ in range(num_layers) ]) else: if cell_type == "lstm": cell = rnn.LSTMCell(units) elif cell_type == "gru": cell = rnn.GRUCell(units) else: raise ValueError('Do not support %s' % cell_type) return cell
def bi_lstm_layer(self,inputs): with tf.variable_scope("BILSTM"): fw_cell = rnn.LSTMCell(self.params.neaurl_hidden_dim,use_peepholes=True,initializer=self.initializer) bw_cell = rnn.LSTMCell(self.params.neaurl_hidden_dim,use_peepholes=True,initializer=self.initializer) outputs,_ = tf.nn.bidirectional_dynamic_rnn(fw_cell,bw_cell,inputs,dtype=tf.float32,time_major=True) lstm_features = tf.reshape(outputs, [-1, self.params.neaurl_hidden_dim * 2]) return lstm_features
def lstm_cell(self): cell = rnn.LSTMCell(self.num_hidden) cell = rnn.DropoutWrapper(cell,self.dropout) return cell
def biLSTM_layer(self,inputs,lengths): with tf.variable_scope('bi_lstm'): lstm_cell = {} for direction in ['forward','backward']: with tf.variable_scope(direction): lstm_cell[direction] = rnn.LSTMCell(self.num_hidden,use_peepholes=True,initializer=self.initializer) outputs,_ = tf.nn.bidirectional_dynamic_rnn(lstm_cell['forward'],lstm_cell['backward'], inputs,dtype=tf.float32,sequence_length=lengths) return tf.concat(outputs,axis=2)
def __init__(self): self.embeddingSize = nlp_segment.flags.embedding_size self.num_tags = nlp_segment.flags.num_tags self.num_hidden = nlp_segment.flags.num_hidden self.learning_rate = nlp_segment.flags.learning_rate self.batch_size = nlp_segment.flags.batch_size self.model_save_path = nlp_segment.model_save_path self.input = tf.placeholder(tf.int32, shape=[None, FLAGS.max_sentence_len], name="input_placeholder") self.label = tf.placeholder(tf.int32, shape=[None, FLAGS.max_sentence_len], name="label_placeholder") self.dropout = tf.placeholder(tf.float32,name="dropout") with tf.name_scope("embedding_layer"): self.word_embedding = tf.Variable(data_loader.load_w2v(nlp_segment.word_vec_path), name="word_embedding") inputs_embed = tf.nn.embedding_lookup(self.word_embedding,self.input) length = self.length(self.input) self.length_64 = tf.cast(length, tf.int64) reuse = None #if self.trainMode else True # if trainMode: # word_vectors = tf.nn.dropout(word_vectors, 0.5) with tf.name_scope("rnn_fwbw") as scope: lstm_fw = rnn.LSTMCell(self.num_hidden,use_peepholes=True) lstm_bw = rnn.LSTMCell(self.num_hidden,use_peepholes=True) inputs = tf.unstack(inputs_embed, nlp_segment.flags.max_sentence_len, 1) outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw, lstm_bw, inputs, sequence_length=self.length_64, dtype=tf.float32) output = tf.reshape(outputs, [-1, self.num_hidden * 2]) #if self.trainMode: output = tf.nn.dropout(output, self.dropout) with tf.variable_scope('Softmax') as scope: self.W = tf.get_variable(shape=[self.num_hidden * 2, self.num_tags], initializer=tf.truncated_normal_initializer(stddev=0.01), name='weights', regularizer=l2_regularizer(0.001)) self.b = tf.Variable(tf.zeros([self.num_tags], name='bias')) matricized_unary_scores = tf.matmul(output, self.W) + self.b # matricized_unary_scores = tf.nn.log_softmax(matricized_unary_scores) self.unary_scores = tf.reshape( matricized_unary_scores, [-1, FLAGS.max_sentence_len, self.num_tags]) with tf.name_scope("crf"): self.transition_params = tf.get_variable( "transitions", shape=[self.num_tags, self.num_tags], initializer=self.initializer) log_likelihood, self.transition_params = crf.crf_log_likelihood(self.unary_scores, self.label, self.length_64,self.transition_params) self.loss = tf.reduce_mean(-log_likelihood) self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss) self.saver = tf.train.Saver()
def lstm_cell(self): cell = rnn.LSTMCell(self.hidden_neural_size,use_peepholes=True,initializer=self.initializer) cell = rnn.DropoutWrapper(cell,self.dropout) return cell
def lstm_cell(self): lstm_cell = rnn.LSTMCell(self.hidden_neural_size,forget_bias=1.0) lstm_cell = rnn.DropoutWrapper(lstm_cell,output_keep_prob=self.dropout_keep_prob) return lstm_cell
def lstm_fw(self): lstm_fw = rnn.LSTMCell(self.hidden_neural_size) lstm_fw = rnn.DropoutWrapper(lstm_fw, self.dropout) return lstm_fw
def __init__(self): print("tensorflow version: ", tf.__version__) tf.reset_default_graph() self.encoder_vec_file = "./preprocessing/enc.vec" self.decoder_vec_file = "./preprocessing/dec.vec" self.encoder_vocabulary = "./preprocessing/enc.vocab" self.decoder_vocabulary = "./preprocessing/dec.vocab" self.dictFile = './word_dict.txt' self.batch_size = 1 self.max_batches = 100000 self.show_epoch = 100 self.model_path = './model/' # jieba???? jieba.load_userdict(self.dictFile) self.model = dynamicSeq2seq(encoder_cell=LSTMCell(40), decoder_cell=LSTMCell(40), encoder_vocab_size=600, decoder_vocab_size=1600, embedding_size=20, attention=False, bidirectional=False, debug=False, time_major=True) self.location = ["??", "??", "??", "??"] self.user_info = {"__username__":"yw", "__location__":"??"} self.robot_info = {"__robotname__":"Rr"} self.dec_vocab = {} self.enc_vocab = {} self.dec_vecToSeg = {} tag_location = '' with open(self.encoder_vocabulary, "r") as enc_vocab_file: for index, word in enumerate(enc_vocab_file.readlines()): self.enc_vocab[word.strip()] = index with open(self.decoder_vocabulary, "r") as dec_vocab_file: for index, word in enumerate(dec_vocab_file.readlines()): self.dec_vecToSeg[index] = word.strip() self.dec_vocab[word.strip()] = index
def create_base(self, inputs, is_training): def single_cell(size): if is_training: return tf.contrib.rnn.DropoutWrapper(LSTMCell(size), output_keep_prob=self._config.keep_prob) else: return tf.contrib.rnn.DropoutWrapper(LSTMCell(size), 1.0) with tf.name_scope('Model'): cell = tf.contrib.rnn.MultiRNNCell([single_cell(size) for size in self._config.lstm_params['hidden_sizes']]) cell.zero_state(self._config.batch_size, tf.float32) input_list = tf.unstack(tf.expand_dims(inputs, axis=2), axis=1) outputs, _ = tf.nn.static_rnn(cell, input_list, dtype=tf.float32) # take the last output in the sequence output = outputs[-1] with tf.name_scope("final_layer"): with tf.name_scope("Wx_plus_b"): softmax_w = tf.get_variable("softmax_w", [self._config.lstm_params['hidden_sizes'][-1], self._config.num_classes], initializer=tf.contrib.layers.xavier_initializer()) softmax_b = tf.get_variable("softmax_b", [self._config.num_classes], initializer=tf.constant_initializer(0.1)) logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b, "logits") with tf.name_scope('output'): predicted_classes = tf.to_int32(tf.argmax(logits, dimension=1), name='y') return logits, predicted_classes
def fast_dlstm(s_t, state_in): def dilate_one_time_step(one_h, switcher, num_chunks): h_slices = [] h_size = 256 chunk_step_size = h_size // num_chunks for switch_step, h_step in zip(range(num_chunks), range(0, h_size, chunk_step_size)): one_switch = switcher[switch_step] h_s = conditional_backprop(one_switch, one_h[h_step: h_step + chunk_step_size]) h_slices.append(h_s) dh = tf.stack(h_slices) dh = tf.reshape(dh, [-1, 256]) return dh lstm = rnn.LSTMCell(256, state_is_tuple=True) chunks = 8 def dlstm_scan_fn(previous_output, current_input): out, state_out = lstm(current_input, previous_output[1]) i = previous_output[2] basis_i = tf.one_hot(i, depth=chunks) state_out_dilated = dilate_one_time_step(tf.squeeze(state_out[0]), basis_i, chunks) state_out = rnn.LSTMStateTuple(state_out_dilated, state_out[1]) i += tf.constant(1) new_i = tf.mod(i, chunks) return out, state_out, new_i rnn_outputs, final_states, mod_idxs = tf.scan(dlstm_scan_fn, tf.transpose(s_t, [1, 0, 2]), initializer=( state_in[1], rnn.LSTMStateTuple(*state_in), tf.constant(0))) state_out = [final_states[0][-1, 0, :], final_states[1][-1, 0, :]] cell_states = final_states[0][:, 0, :] out_states = final_states[1][:, 0, :] return out_states, cell_states, state_out
def cell_create(self,scope_name): with tf.variable_scope(scope_name): if self.cell_type == 'tanh': cells = rnn.MultiRNNCell([rnn.BasicRNNCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True) elif self.cell_type == 'LSTM': cells = rnn.MultiRNNCell([rnn.BasicLSTMCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True) elif self.cell_type == 'GRU': cells = rnn.MultiRNNCell([rnn.GRUCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True) elif self.cell_type == 'LSTMP': cells = rnn.MultiRNNCell([rnn.LSTMCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True) cells = rnn.DropoutWrapper(cells, input_keep_prob=self.dropout_ph,output_keep_prob=self.dropout_ph) return cells
def rnn_segment(features, targets, mode, params): seq_feature = features['seq_feature'] seq_length = features['seq_length'] with tf.variable_scope("emb"): embeddings = tf.get_variable("char_emb", shape=[params['num_char'], params['emb_size']]) seq_emb = tf.nn.embedding_lookup(embeddings, seq_feature) batch_size = tf.shape(seq_feature)[0] time_step = tf.shape(seq_feature)[1] flat_seq_emb = tf.reshape(seq_emb, shape=[batch_size, time_step, (params['k'] + 1) * params['emb_size']]) cell = rnn.LSTMCell(params['rnn_units']) if mode == ModeKeys.TRAIN: cell = rnn.DropoutWrapper(cell, params['input_keep_prob'], params['output_keep_prob']) projection_cell = rnn.OutputProjectionWrapper(cell, params['num_class']) logits, _ = tf.nn.dynamic_rnn(projection_cell, flat_seq_emb, sequence_length=seq_length, dtype=tf.float32) weight_mask = tf.to_float(tf.sequence_mask(seq_length)) loss = seq2seq.sequence_loss(logits, targets, weights=weight_mask) train_op = layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=params["learning_rate"], optimizer=tf.train.AdamOptimizer, clip_gradients=params['grad_clip'], summaries=[ "learning_rate", "loss", "gradients", "gradient_norm", ]) pred_classes = tf.to_int32(tf.argmax(input=logits, axis=2)) pred_words = tf.logical_or(tf.equal(pred_classes, 0), tf.equal(pred_classes, 3)) target_words = tf.logical_or(tf.equal(targets, 0), tf.equal(targets, 3)) precision = metrics.streaming_precision(pred_words, target_words, weights=weight_mask) recall = metrics.streaming_recall(pred_words, target_words, weights=weight_mask) predictions = { "classes": pred_classes } eval_metric_ops = { "precision": precision, "recall": recall } return learn.ModelFnOps(mode, predictions, loss, train_op, eval_metric_ops=eval_metric_ops)
def __init__(self, num_units, use_peepholes=False, forget_bias=1.0): super(Grid1LSTMCell, self).__init__( num_units=num_units, num_dims=1, input_dims=0, output_dims=0, priority_dims=0, cell_fn=lambda n, i: rnn.LSTMCell( num_units=n, input_size=i, use_peepholes=use_peepholes, forget_bias=forget_bias, state_is_tuple=False))
def __init__(self, num_units, tied=False, non_recurrent_fn=None, use_peepholes=False, forget_bias=1.0): super(Grid2LSTMCell, self).__init__( num_units=num_units, num_dims=2, input_dims=0, output_dims=0, priority_dims=0, tied=tied, non_recurrent_dims=None if non_recurrent_fn is None else 0, cell_fn=lambda n, i: rnn.LSTMCell( num_units=n, input_size=i, forget_bias=forget_bias, use_peepholes=use_peepholes, state_is_tuple=False), non_recurrent_fn=non_recurrent_fn)
def __init__(self, num_units, tied=False, non_recurrent_fn=None, use_peepholes=False, forget_bias=1.0): super(Grid3LSTMCell, self).__init__( num_units=num_units, num_dims=3, input_dims=0, output_dims=0, priority_dims=0, tied=tied, non_recurrent_dims=None if non_recurrent_fn is None else 0, cell_fn=lambda n, i: rnn.LSTMCell( num_units=n, input_size=i, forget_bias=forget_bias, use_peepholes=use_peepholes, state_is_tuple=False), non_recurrent_fn=non_recurrent_fn)
def test_switchable_dropout_wrapper_state_is_tuple(self): tf.set_random_seed(0) batch_size = 3 sequence_len = 3 word_embedding_dim = 5 lstm_input = tf.random_normal([batch_size, sequence_len, word_embedding_dim]) sequence_length = tf.constant(np.array([2, 1, 3]), dtype="int32") is_train = tf.placeholder('bool', []) rnn_hidden_size = 3 output_keep_prob = 0.75 rnn_cell = LSTMCell(rnn_hidden_size, state_is_tuple=True) d_rnn_cell = SwitchableDropoutWrapper(rnn_cell, is_train, output_keep_prob=output_keep_prob) rnn_output, (rnn_c_state, rnn_m_state) = tf.nn.dynamic_rnn( cell=d_rnn_cell, dtype="float", sequence_length=sequence_length, inputs=lstm_input) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) output_no_train = rnn_output.eval(feed_dict={is_train: False}) expected_output_no_train = np.array([[[0.10523333, -0.03578992, 0.16407447], [-0.07642615, -0.1346959, 0.07218226], [0.0, 0.0, 0.0]], [[-0.31979755, -0.12604457, -0.24436688], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.27140033, -0.01063369, 0.11808267], [0.15138564, -0.10808259, 0.13118345], [0.20397078, -0.06317351, 0.21408504]]]) assert_allclose(output_no_train, expected_output_no_train * d_rnn_cell._output_keep_prob, rtol=1e-06) output_train = rnn_output.eval(feed_dict={is_train: True}) expected_output_train = np.array([[[-0.0, -0.21935862, -0.11160457], [-0.0, -0.0, 0.09479073], [0.0, 0.0, 0.0]], [[0.02565068, 0.21709232, -0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.07740743], [0.04682902, -0.14770079, 0.14597748], [0.0, 0.09399685, 0.0]]]) # low precision test, this one seems flaky assert_allclose(output_train, expected_output_train, rtol=1e-06)
def test_switchable_dropout_wrapper_state_is_not_tuple(self): tf.set_random_seed(0) batch_size = 3 sequence_len = 3 word_embedding_dim = 5 lstm_input = tf.random_normal([batch_size, sequence_len, word_embedding_dim]) sequence_length = tf.constant(np.array([2, 1, 3]), dtype="int32") is_train = tf.placeholder('bool', []) rnn_hidden_size = 3 output_keep_prob = 0.75 rnn_cell = LSTMCell(rnn_hidden_size, state_is_tuple=False) d_rnn_cell = SwitchableDropoutWrapper(rnn_cell, is_train, output_keep_prob=output_keep_prob) rnn_output, rnn_state = tf.nn.dynamic_rnn( cell=d_rnn_cell, dtype="float", sequence_length=sequence_length, inputs=lstm_input) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) output_no_train = rnn_output.eval(feed_dict={is_train: False}) expected_output_no_train = np.array( [[[-0.10366952, -0.01751264, -0.02237115], [-0.07636562, 0.06660741, 0.02946584], [0.0, 0.0, 0.0]], [[-0.09134783, 0.15928121, 0.05786164], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[-0.00575439, -0.22505699, -0.27295753], [-0.12970942, -0.16395324, -0.06502352], [-0.16302694, -0.27601245, -0.20045257]]]) assert_allclose(output_no_train, expected_output_no_train * d_rnn_cell._output_keep_prob, rtol=1e-06) output_train = rnn_output.eval(feed_dict={is_train: True}) expected_output_train = np.array([[[-0.0, 0.13120674, -0.02568678], [-0.0, 0.0, -0.20105337], [0.0, 0.0, 0.0]], [[-0.02063255, 0.25306353, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, -0.0, -0.0595048], [0.03207482, -0.07930075, -0.09382694], [0.0, -0.00405498, -0.0]]]) assert_allclose(output_train, expected_output_train, rtol=1e-04)
def build_infer_graph(x, batch_size, vocab_size=VOCAB_SIZE, embedding_size=32, rnn_size=128, num_layers=2, p_keep=1.0): """ builds inference graph """ infer_args = {"batch_size": batch_size, "vocab_size": vocab_size, "embedding_size": embedding_size, "rnn_size": rnn_size, "num_layers": num_layers, "p_keep": p_keep} logger.debug("building inference graph: %s.", infer_args) # other placeholders p_keep = tf.placeholder_with_default(p_keep, [], "p_keep") batch_size = tf.placeholder_with_default(batch_size, [], "batch_size") # embedding layer embed_seq = layers.embed_sequence(x, vocab_size, embedding_size) # shape: [batch_size, seq_len, embedding_size] embed_seq = tf.nn.dropout(embed_seq, keep_prob=p_keep) # shape: [batch_size, seq_len, embedding_size] # RNN layers cells = [rnn.LSTMCell(rnn_size) for _ in range(num_layers)] cells = [rnn.DropoutWrapper(cell, output_keep_prob=p_keep) for cell in cells] cells = rnn.MultiRNNCell(cells) input_state = cells.zero_state(batch_size, tf.float32) # shape: [num_layers, 2, batch_size, rnn_size] rnn_out, output_state = tf.nn.dynamic_rnn(cells, embed_seq, initial_state=input_state) # rnn_out shape: [batch_size, seq_len, rnn_size] # output_state shape: [num_layers, 2, batch_size, rnn_size] with tf.name_scope("lstm"): tf.summary.histogram("outputs", rnn_out) for c_state, h_state in output_state: tf.summary.histogram("c_state", c_state) tf.summary.histogram("h_state", h_state) # fully connected layer logits = layers.fully_connected(rnn_out, vocab_size, activation_fn=None) # shape: [batch_size, seq_len, vocab_size] # predictions with tf.name_scope("softmax"): probs = tf.nn.softmax(logits) # shape: [batch_size, seq_len, vocab_size] with tf.name_scope("sequence"): tf.summary.histogram("embeddings", embed_seq) tf.summary.histogram("logits", logits) model = {"logits": logits, "probs": probs, "input_state": input_state, "output_state": output_state, "p_keep": p_keep, "batch_size": batch_size, "infer_args": infer_args} return model
def predict_rewards(self,input_): with tf.variable_scope("encoder"): with tf.variable_scope("embedding"): # Embed input sequence W_embed =tf.get_variable("weights", [1,self.input_dimension+2, self.input_embed], initializer=self.initializer) embedded_input = tf.nn.conv1d(input_, W_embed, 1, "VALID", name="embedded_input") # Batch Normalization embedded_input = tf.layers.batch_normalization(embedded_input, axis=2, training=self.is_training, name='layer_norm', reuse=None) with tf.variable_scope("dynamic_rnn"): # Encode input sequence cell1 = LSTMCell(self.num_neurons, initializer=self.initializer) # Return the output activations [Batch size, Sequence Length, Num_neurons] and last hidden state (c,h) as tensors. encoder_output, encoder_state = tf.nn.dynamic_rnn(cell1, embedded_input, dtype=tf.float32) #frame = tf.reduce_mean(encoder_output, 1) # [Batch size, Sequence Length, Num_neurons] to [Batch size, Num_neurons] frame = encoder_state[0] # [Batch size, Num_neurons] # Glimpse with tf.variable_scope("glimpse"): self.W_ref_g =tf.get_variable("W_ref_g",[1,self.num_neurons,self.num_neurons],initializer=self.initializer) self.W_q_g =tf.get_variable("W_q_g",[self.num_neurons,self.num_neurons],initializer=self.initializer) self.v_g =tf.get_variable("v_g",[self.num_neurons],initializer=self.initializer) # Attending mechanism encoded_ref_g = tf.nn.conv1d(encoder_output, self.W_ref_g, 1, "VALID", name="encoded_ref_g") # [Batch size, seq_length, n_hidden] encoded_query_g = tf.expand_dims(tf.matmul(frame, self.W_q_g, name="encoded_query_g"), 1) # [Batch size, 1, n_hidden] scores_g = tf.reduce_sum(self.v_g * tf.tanh(encoded_ref_g + encoded_query_g), [-1], name="scores_g") # [Batch size, seq_length] attention_g = tf.nn.softmax(scores_g, name="attention_g") # 1 glimpse = Linear combination of reference vectors (defines new query vector) glimpse = tf.multiply(encoder_output, tf.expand_dims(attention_g,2)) glimpse = tf.reduce_sum(glimpse,1) with tf.variable_scope("ffn"): # ffn 1 h0 = tf.layers.dense(glimpse, self.num_neurons, activation=tf.nn.relu, kernel_initializer=self.initializer) # ffn 2 w1 =tf.get_variable("w1", [self.num_neurons, 1], initializer=self.initializer) b1 = tf.Variable(self.init_baseline, name="b1") self.predictions = tf.squeeze(tf.matmul(h0, w1)+b1)
def __init__(self, encoder_size, decoder_size, encoder_vocab_size, decoder_vocab_size, encoder_layer_size, decoder_layer_size, RNN_type='LSTM', encoder_input_keep_prob=1.0, encoder_output_keep_prob=1.0, decoder_input_keep_prob=1.0, decoder_output_keep_prob=1.0, learning_rate=0.01, hidden_size=128): self.encoder_size = encoder_size self.decoder_size = decoder_size self.encoder_vocab_size = encoder_vocab_size self.decoder_vocab_size = decoder_vocab_size self.encoder_layer_size = encoder_layer_size self.decoder_layer_size = decoder_layer_size self.encoder_input_keep_prob = encoder_input_keep_prob self.encoder_output_keep_prob = encoder_output_keep_prob self.decoder_input_keep_prob = decoder_input_keep_prob self.decoder_output_keep_prob = decoder_output_keep_prob self.learning_rate = learning_rate self.hidden_size = hidden_size self.encoder_input = tf.placeholder(tf.float32, shape=(None, self.encoder_size, self.encoder_vocab_size)) self.decoder_input = tf.placeholder(tf.float32, shape=(None, self.decoder_size, self.decoder_vocab_size)) self.target_input = tf.placeholder(tf.int32, shape=(None, self.decoder_size)) self.weight = tf.get_variable(shape=[self.hidden_size, self.decoder_vocab_size], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32, name='weight') self.bias = tf.get_variable(shape=[self.decoder_vocab_size], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32, name='bias') self.logits = None self.cost = None self.train_op = None self.RNNCell = None self.outputs = None self.merged = None if RNN_type == 'LSTM': self.RNNCell = rnn.LSTMCell elif RNN_type == 'GRU': self.RNNCell = rnn.GRUCell else: raise Exception('not support {} RNN type'.format(RNN_type)) self.build_model() self.saver = tf.train.Saver(tf.global_variables())
def __init__(self, n_hidden, cell="GRU"): """ qa_rnn module init. :param n_hidden: num of hidden units :param cell: gru|lstm|basic_rnn """ self.rnn_cell = rnn.BasicRNNCell(num_units=n_hidden) if cell == "GRU": self.rnn_cell = rnn.GRUCell(num_units=n_hidden) elif cell == "LSTM": self.rnn_cell = rnn.LSTMCell(num_units=n_hidden) else: raise Exception(cell + " not supported.")
def __init__(self, numberOfUnits, dictionarySize, maximumLength, inputFeatures = None, alwaysProvideInput = False): self.model = rnn.LSTMCell(numberOfUnits) self.loadingMatrix = tf.Variable(tf.random_uniform([numberOfUnits,dictionarySize],-1.0,1.0),name = 'LOADINGMATRIX') self.lengthPlaceholder = tf.placeholder(tf.int32, shape = [None],name = 'LENGTH') self.maximumLength = maximumLength self.dictionarySize = dictionarySize if inputFeatures != None: self.transformedInputFeatures = [ tf.layers.dense(inputs = inputFeatures, units = s, activation = tf.nn.tanh) for s in self.model.state_size ] self.transformedInputFeatures = rnn.LSTMStateTuple(*self.transformedInputFeatures) if alwaysProvideInput: self.alwaysProvidedInput = tf.layers.dense(inputs = inputFeatures, units = numberOfUnits, activation = tf.nn.tanh) else: self.alwaysProvidedInput = None else: self.transformedInputFeatures = None self.alwaysProvidedInput = None # Unrolls some number of steps maximumLength self.inputPlaceholder = tf.placeholder(tf.int32, shape = [None,maximumLength],name = 'INPUT') embeddedInputs = tf.nn.embedding_lookup(tf.transpose(self.loadingMatrix),self.inputPlaceholder) if alwaysProvideInput: # alwaysProvidedInput: [None,numberOfUnits] # we want to duplicate it along the time axis to get [None,numberOfTimesSteps,numberOfUnits] alwaysProvidedInput2 = tf.reshape(self.alwaysProvidedInput,[-1,1,numberOfUnits]) alwaysProvidedInput3 = tf.tile(alwaysProvidedInput2, [1,maximumLength,1]) embeddedInputs = embeddedInputs + alwaysProvidedInput3 self.outputs, self.states = tf.nn.dynamic_rnn(self.model, inputs = embeddedInputs, dtype = tf.float32, sequence_length = self.lengthPlaceholder, initial_state = self.transformedInputFeatures) # projectedOutputs: None x timeSteps x dictionarySize projectedOutputs = tf.tensordot(self.outputs, self.loadingMatrix, axes = [[2],[0]]) self.outputDistribution = tf.nn.log_softmax(projectedOutputs) self.hardOutputs = tf.cast(tf.argmax(projectedOutputs,dimension = 2),tf.int32) # A small graph for running the recurrence network forward one step self.statePlaceholders = [ tf.placeholder(tf.float32, [None,numberOfUnits], name = 'state0'), tf.placeholder(tf.float32, [None,numberOfUnits], name = 'state1')] self.oneInputPlaceholder = tf.placeholder(tf.int32, shape = [None], name = 'inputForOneStep') projectedInputs = tf.nn.embedding_lookup(tf.transpose(self.loadingMatrix),self.oneInputPlaceholder) if alwaysProvideInput: projectedInputs = projectedInputs + self.alwaysProvidedInput self.oneOutput, self.oneNewState = self.model(projectedInputs, rnn.LSTMStateTuple(*self.statePlaceholders)) self.oneNewState = [self.oneNewState[0],self.oneNewState[1]] self.oneOutputDistribution = tf.nn.log_softmax(tf.matmul(self.oneOutput, self.loadingMatrix)) # sequence prediction model with prediction fed into input
def __init__(self, num_units, num_dims=1, input_dims=None, output_dims=None, priority_dims=None, non_recurrent_dims=None, tied=False, cell_fn=None, non_recurrent_fn=None): """Initialize the parameters of a Grid RNN cell Args: num_units: int, The number of units in all dimensions of this GridRNN cell num_dims: int, Number of dimensions of this grid. input_dims: int or list, List of dimensions which will receive input data. output_dims: int or list, List of dimensions from which the output will be recorded. priority_dims: int or list, List of dimensions to be considered as priority dimensions. If None, no dimension is prioritized. non_recurrent_dims: int or list, List of dimensions that are not recurrent. The transfer function for non-recurrent dimensions is specified via `non_recurrent_fn`, which is default to be `tensorflow.nn.relu`. tied: bool, Whether to share the weights among the dimensions of this GridRNN cell. If there are non-recurrent dimensions in the grid, weights are shared between each group of recurrent and non-recurrent dimensions. cell_fn: function, a function which returns the recurrent cell object. Has to be in the following signature: def cell_func(num_units, input_size): # ... and returns an object of type `RNNCell`. If None, LSTMCell with default parameters will be used. non_recurrent_fn: a tensorflow Op that will be the transfer function of the non-recurrent dimensions """ if num_dims < 1: raise ValueError('dims must be >= 1: {}'.format(num_dims)) self._config = _parse_rnn_config(num_dims, input_dims, output_dims, priority_dims, non_recurrent_dims, non_recurrent_fn or nn.relu, tied, num_units) cell_input_size = (self._config.num_dims - 1) * num_units if cell_fn is None: self._cell = rnn.LSTMCell( num_units=num_units, input_size=cell_input_size, state_is_tuple=False) else: self._cell = cell_fn(num_units, cell_input_size) if not isinstance(self._cell, rnn.RNNCell): raise ValueError('cell_fn must return an object of type RNNCell')