我们从Python开源项目中,提取了以下23个代码示例,用于说明如何使用tensorflow.python.ops.rnn_cell.BasicLSTMCell()。
def baseline_forward(self, X, size, n_class): shape = X.get_shape() _X = tf.transpose(X, [1, 0, 2]) # batch_size x sentence_length x word_length -> batch_size x sentence_length x word_length _X = tf.reshape(_X, [-1, int(shape[2])]) # (batch_size x sentence_length) x word_length seq = tf.split(0, int(shape[1]), _X) # sentence_length x (batch_size x word_length) with tf.name_scope("LSTM"): lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=1.0) outputs, states = rnn.rnn(lstm_cell, seq, dtype=tf.float32) with tf.name_scope("LSTM-Classifier"): W = tf.Variable(tf.random_normal([size, n_class]), name="W") b = tf.Variable(tf.random_normal([n_class]), name="b") output = tf.matmul(outputs[-1], W) + b return output
def BiRNN(x, n_input, n_steps, n_hidden): # Prepare data shape to match `bidirectional_rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshape to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output outputs, _, _ = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) return outputs
def BiRNN(x, n_hidden): # Prepare data shape to match `bidirectional_rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshape to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output outputs, _, _ = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) return outputs
def __init__(self, dim_image, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, bias_init_vector=None): self.dim_image = np.int(dim_image) self.dim_embed = np.int(dim_embed) self.dim_hidden = np.int(dim_hidden) self.batch_size = np.int(batch_size) self.n_lstm_steps = np.int(n_lstm_steps) self.n_words = np.int(n_words) with tf.device("/cpu:0"): self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_embed], -0.1, 0.1), name='Wemb') self.bemb = self.init_bias(dim_embed, name='bemb') self.lstm = rnn_cell.BasicLSTMCell(dim_hidden) self.encode_img_W = tf.Variable(tf.random_uniform([dim_image, dim_hidden], -0.1, 0.1), name='encode_img_W') self.encode_img_b = self.init_bias(dim_hidden, name='encode_img_b') self.embed_word_W = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name='embed_word_W') if bias_init_vector is not None: self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b') else: self.embed_word_b = self.init_bias(n_words, name='embed_word_b')
def rnn_model(x, weights, biases): """RNN (LSTM or GRU) model for image""" x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_input]) x = tf.split(0, n_steps, x) lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) return tf.matmul(outputs[-1], weights) + biases
def rnn_model(x, weights, biases): """Build a rnn model for image""" x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_input]) x = tf.split(0, n_steps, x) lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) return tf.matmul(outputs[-1], weights) + biases
def __init__(self, num_units, forget_bias=1): super(Grid1BasicLSTMCell, self).__init__( num_units=num_units, num_dims=1, input_dims=0, output_dims=0, priority_dims=0, tied=False, cell_fn=lambda n, i: rnn_cell.BasicLSTMCell( num_units=n, forget_bias=forget_bias, input_size=i, state_is_tuple=False))
def __init__(self, num_units, tied=False, non_recurrent_fn=None, forget_bias=1): super(Grid2BasicLSTMCell, self).__init__( num_units=num_units, num_dims=2, input_dims=0, output_dims=0, priority_dims=0, tied=tied, non_recurrent_dims=None if non_recurrent_fn is None else 0, cell_fn=lambda n, i: rnn_cell.BasicLSTMCell( num_units=n, forget_bias=forget_bias, input_size=i, state_is_tuple=False), non_recurrent_fn=non_recurrent_fn)
def __init__(self, num_units, forget_bias=1.0, input_size=None, state_is_tuple=False, activation=tanh, hyper_num_units=128, hyper_embedding_size=32, is_layer_norm = True): """Initialize the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. hyper_num_units: int, The number of units in the HyperLSTM cell. forget_bias: float, The bias added to forget gates (see above). input_size: Deprecated and unused. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. By default (False), they are concatenated along the column axis. This default behavior will soon be deprecated. activation: Activation function of the inner states. """ if not state_is_tuple: print("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if input_size is not None: print("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation self.hyper_num_units = hyper_num_units self.total_num_units = self._num_units + self.hyper_num_units self.hyper_cell = rnn_cell.BasicLSTMCell(hyper_num_units) self.hyper_embedding_size= hyper_embedding_size self.is_layer_norm = is_layer_norm
def __init__(self, args): self.args = args if args.disc_model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.disc_model == 'gru': cell_fn = rnn_cell.GRUCell elif args.disc_model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) self.embedding = tf.Variable(tf.random_uniform([self.args.vocab_size, self.args.rnn_size], minval=-.05, maxval=.05, dtype=tf.float32), name='embedding') with tf.variable_scope('DISC') as scope: cell = cell_fn(args.rnn_size) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) # If the input data is given as word tokens, feed this value self.input_data_text = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='input_data_text') #self.input_data_text = tf.Variable(tf.zeros((args.batch_size, args.seq_length), dtype=tf.int32), name='input_data_text') self.initial_state = cell.zero_state(args.batch_size, tf.float32) # Fully connected layer is applied to the final state to determine the output class self.fc_layer = tf.Variable(tf.random_normal([args.rnn_size, 1], stddev=0.35, dtype=tf.float32), name='disc_fc_layer') self.lr = tf.Variable(0.0, trainable=False, name='learning_rate') self.has_init_seq2seq = False
def __init__(self, args): self.args = args if args.gen_model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.gen_model == 'gru': cell_fn = rnn_cell.GRUCell elif args.gen_model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) with tf.variable_scope('GEN') as scope: cell = cell_fn(args.rnn_size) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) # sequence of word tokens taken as input self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='input_data') self.latent_state = tf.placeholder(tf.float32, [args.batch_size, args.latent_size]) # weights to map the latent state into the (usually) bigger initial state # right now this only works for rnn (other more complex models have more than # one initial state which needs to be given a value) # Right now we support up to two layers (state1 and state2) self.latent_to_initial_state1 = tf.Variable(tf.random_normal([args.latent_size, args.rnn_size], stddev=0.35, dtype=tf.float32), name='latent_to_intial_state1') self.latent_to_initial_state2 = tf.Variable(tf.random_normal([args.latent_size, args.rnn_size], stddev=0.35, dtype=tf.float32), name='latent_to_intial_state2') self.initial_state1 = tf.matmul(self.latent_state, self.latent_to_initial_state1) self.initial_state2 = tf.matmul(self.latent_state, self.latent_to_initial_state2) # these are the actual approximate word vectors generated by the model self.outputs = tf.placeholder(tf.float32, [args.seq_length, args.batch_size, args.rnn_size]) self.lr = tf.Variable(0.0, trainable=False, name='learning_rate') self.has_init_seq2seq = False
def RNN(x, weights, biases): # permuting batch_size and n_input x = tf.transpose(x, [1, 0, 2]) # reshape into (n_steo*batch_size,n_input) x = tf.reshape(x, [-1, n_input]) # split to get a list of 'n_steps' x = tf.split(0, n_steps, x) with tf.variable_scope('n_steps4'): lstm_cell = rnn_cell.BasicLSTMCell( n_hidden, forget_bias=1.0) outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) return tf.matmul(outputs[-1], weights['out']) + biases['out']
def get_output_for(self): """Perform the convolution operation, activation and return the output ``tf.Tensor``. Returns ------- ``tf.Tensor`` Output tensor of this layer. """ states = [] outputs = [] lstm = rnn_cell.BasicLSTMCell(self.num_units, state_is_tuple=True) initial_state = state = lstm.zero_state(batch_size, tf.float32) with tf.name_scope(self.name) as scope: for _id in xrange(self.num_of_cells): if _id > 0: scope.reuse_variables() output, state = lstm(self.input_layer, state) if self.activation is not None: output = self.activation(output) outputs.append(output) states.append(state) final_state = state if self.return_cell_out: output = tf.reshape(tf.concat(1, outputs), [-1, size]) else: output = outputs[-1] tf.add_to_collection(BerryKeys.LAYER_OUTPUTS, output) return output
def BiRNN(x, weights, biases): # Prepare data shape to match `bidirectional_rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshape to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define lstm cells with tensorflow with tf.variable_scope("lstm1") as scope1: lstm_fw_cell_1 = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) lstm_bw_cell_1 = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) outputs_1, _, _ = rnn.bidirectional_rnn(lstm_fw_cell_1, lstm_bw_cell_1, x, dtype=tf.float32) with tf.variable_scope("lstm2") as scope2: lstm_fw_cell_2 = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) lstm_bw_cell_2 = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) outputs_2, _, _ = rnn.bidirectional_rnn(lstm_fw_cell_2, lstm_bw_cell_2, outputs_1, dtype=tf.float32) with tf.variable_scope("lstm3") as scope3: lstm_fw_cell_3 = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) lstm_bw_cell_3 = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) outputs_3, _, _ = rnn.bidirectional_rnn(lstm_fw_cell_3, lstm_bw_cell_3, outputs_2, dtype=tf.float32) outputs = outputs_3 outputs = tf.reshape(tf.concat(0, outputs), [MAX_LEN*BATCH_SIZE,n_hidden*2]) # Linear activation, using rnn inner loop last output return tf.matmul(outputs, weights['out']) + biases['out']
def __init__(self, args, data, infer=False): if infer: args.batch_size = 1 args.seq_length = 1 with tf.name_scope('inputs'): self.input_data = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) self.target_data = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) with tf.name_scope('model'): self.cell = rnn_cell.BasicLSTMCell(args.state_size) self.cell = rnn_cell.MultiRNNCell([self.cell] * args.num_layers) self.initial_state = self.cell.zero_state( args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): w = tf.get_variable( 'softmax_w', [args.state_size, data.vocab_size]) b = tf.get_variable('softmax_b', [data.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable( 'embedding', [data.vocab_size, args.state_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) outputs, last_state = tf.nn.dynamic_rnn( self.cell, inputs, initial_state=self.initial_state) with tf.name_scope('loss'): output = tf.reshape(outputs, [-1, args.state_size]) self.logits = tf.matmul(output, w) + b self.probs = tf.nn.softmax(self.logits) self.last_state = last_state targets = tf.reshape(self.target_data, [-1]) loss = seq2seq.sequence_loss_by_example([self.logits], [targets], [tf.ones_like(targets, dtype=tf.float32)]) self.cost = tf.reduce_sum(loss) / args.batch_size tf.summary.scalar('loss', self.cost) with tf.name_scope('optimize'): self.lr = tf.placeholder(tf.float32, []) tf.summary.scalar('learning_rate', self.lr) optimizer = tf.train.AdamOptimizer(self.lr) tvars = tf.trainable_variables() grads = tf.gradients(self.cost, tvars) for g in grads: tf.summary.histogram(g.name, g) grads, _ = tf.clip_by_global_norm(grads, args.grad_clip) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.merged_op = tf.summary.merge_all()
def build_graph(self): config = self.config self.reader = utils.DataReader(seq_len=config.seq_length, batch_size=config.batch_size, data_filename=config.data_filename) self.cell = rnn_cell.BasicLSTMCell(config.rnn_size, state_is_tuple=True) self.input_data = tf.placeholder(tf.int32, [None, config.input_length]) self.targets = tf.placeholder(tf.int32, [None, 1]) self.initial_state = self.cell.zero_state(tf.shape(self.targets)[0], tf.float32) with tf.variable_scope("input_embedding"): embedding = tf.get_variable("embedding", [config.vocab_size, config.rnn_size]) inputs = tf.split(1, config.input_length, tf.nn.embedding_lookup(embedding, self.input_data)) inputs = [tf.squeeze(input, [1]) for input in inputs] with tf.variable_scope("send_to_rnn"): state = self.initial_state output = None for i, input in enumerate(inputs): if i > 0: tf.get_variable_scope().reuse_variables() output, state = self.cell(input, state) with tf.variable_scope("softmax"): softmax_w = tf.get_variable("softmax_w", [config.rnn_size, config.vocab_size]) softmax_b = tf.get_variable("softmax_b", [config.vocab_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) self.output = tf.cast(tf.reshape(tf.arg_max(self.probs, 1), [-1, 1]), tf.int32) self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.output, self.targets), tf.float32)) loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([config.batch_size])], config.vocab_size) self.cost = tf.reduce_mean(loss) self.final_state = state # self.lr = tf.Variable(0.001, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), config.grad_clip) optimizer = tf.train.AdamOptimizer()#self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.summary_accuracy = tf.scalar_summary('accuracy', self.accuracy) tf.scalar_summary('cost', self.cost) self.summary_all = tf.merge_all_summaries()
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data)) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.rnncell == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.rnncell == 'gru': cell_fn = rnn_cell.GRUCell elif args.rnncell == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("rnncell type not supported: {}".format(args.rnncell)) cell = cell_fn(args.rnn_size) self.cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = self.cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = build_weight([args.rnn_size, args.vocab_size],name='soft_w') softmax_b = build_weight([args.vocab_size],name='soft_b') word_embedding = build_weight([args.vocab_size, args.embedding_size],name='word_embedding') inputs_list = tf.split(1, args.seq_length, tf.nn.embedding_lookup(word_embedding, self.input_data)) inputs_list = [tf.squeeze(input_, [1]) for input_ in inputs_list] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) if not args.attention: outputs, last_state = seq2seq.rnn_decoder(inputs_list, self.initial_state, self.cell, loop_function=loop if infer else None, scope='rnnlm') else: self.attn_length = 5 self.attn_size = 32 self.attention_states = build_weight([args.batch_size, self.attn_length, self.attn_size]) outputs, last_state = seq2seq.attention_decoder(inputs_list, self.initial_state, self.attention_states, self.cell, loop_function=loop if infer else None, scope='rnnlm') self.final_state = last_state output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) # average loss for each word of each timestep self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.lr = tf.Variable(0.0, trainable=False) self.var_trainable_op = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, self.var_trainable_op), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, self.var_trainable_op)) self.initial_op = tf.initialize_all_variables() self.saver = tf.train.Saver(tf.all_variables(),max_to_keep=5,keep_checkpoint_every_n_hours=1) self.logfile = args.log_dir+str(datetime.datetime.strftime(datetime.datetime.now(),'%Y-%m-%d %H:%M:%S')+'.txt').replace(' ','').replace('/','') self.var_op = tf.all_variables()
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with vs.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(1, 2, state) s1 = vs.get_variable("s1", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32) s2 = vs.get_variable("s2", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32) s3 = vs.get_variable("s3", initializer=tf.ones([self._num_units]), dtype=tf.float32) b1 = vs.get_variable("b1", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32) b2 = vs.get_variable("b2", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32) b3 = vs.get_variable("b3", initializer=tf.zeros([self._num_units]), dtype=tf.float32) # s1 = tf.Variable(tf.ones([4 * self._num_units]), name="s1") # s2 = tf.Variable(tf.ones([4 * self._num_units]), name="s2") # s3 = tf.Variable(tf.ones([self._num_units]), name="s3") # # b1 = tf.Variable(tf.zeros([4 * self._num_units]), name="b1") # b2 = tf.Variable(tf.zeros([4 * self._num_units]), name="b2") # b3 = tf.Variable(tf.zeros([self._num_units]), name="b3") input_below_ = rnn_cell._linear([inputs], 4 * self._num_units, False, scope="out_1") input_below_ = ln(input_below_, s1, b1) state_below_ = rnn_cell._linear([h], 4 * self._num_units, False, scope="out_2") state_below_ = ln(state_below_, s2, b2) lstm_matrix = tf.add(input_below_, state_below_) i, j, f, o = array_ops.split(1, 4, lstm_matrix) new_c = (c * sigmoid(f) + sigmoid(i) * self._activation(j)) # Currently normalizing c causes lot of nan's in the model, thus commenting it out for now. # new_c_ = ln(new_c, s3, b3) new_c_ = new_c new_h = self._activation(new_c_) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat(1, [new_c, new_h]) return new_h, new_state
def __init__(self, args, embedding): self.args = args if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_input') self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_targets') self.initial_state = cell.zero_state(args.batch_size, tf.float32) self.embedding = embedding with tf.variable_scope('STAND'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(self.embedding, self.input_data)) inputs = map(lambda i: tf.nn.l2_normalize(i, 1), [tf.squeeze(input_, [1]) for input_ in inputs]) def loop(prev, i): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.l2_normalize(tf.nn.embedding_lookup(embedding, prev_symbol), 1) o, _ = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=None, scope='STAND') with tf.variable_scope('STAND', reuse=True) as scope: sf_o, _ = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop, scope=scope) output = tf.reshape(tf.concat(1, o), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) sf_output = tf.reshape(tf.concat(1, sf_o), [-1, args.rnn_size]) self_feed_logits = tf.matmul(sf_output, softmax_w) + softmax_b self.self_feed_probs = tf.nn.softmax(self_feed_logits) loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.loss = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), args.grad_clip) for g, v in zip(grads, tvars): print v.name optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))