我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.ops.math_ops.tanh()。
def __init__ (self,n_in,hidden_layer_size,n_out,hidden_layer_type,output_type="linear",dropout_rate=0,loss_function="mse",optimizer="adam"): #self.session=tf.InteractiveSession() self.n_in = int(n_in) self.n_out = int(n_out) self.n_layers = len(hidden_layer_size) self.hidden_layer_size = hidden_layer_size self.hidden_layer_type = hidden_layer_type assert len(self.hidden_layer_size) == len(self.hidden_layer_type) self.output_type = output_type self.dropout_rate = dropout_rate self.loss_function = loss_function self.optimizer = optimizer #self.activation ={"tanh":tf.nn.tanh,"sigmoid":tf.nn.sigmoid} self.graph=tf.Graph() #self.saver=tf.train.Saver()
def encoder(self,inputs,inputs_sequence_length): with tf.variable_scope("encoder"): basic_cell=[] for i in xrange(len(self.hidden_layer_size)): if self.hidden_layer_type[i]=="tanh": basic_cell.append(tf.contrib.rnn.BasicRNNCell(num_units=self.encoder_layer_size[i])) if self.hidden_layer_type[i]=="lstm": basic_cell.append(tf.contrib.rnn.BasicLSTMCell(num_units=self.encoder_layer_size[i])) if self.hidden_layer_type[i]=="gru": basic_cell.append(GRUCell(num_units=self.encoder_layer_size[i])) multicell=MultiRNNCell(basic_cell) enc_output, enc_state=tf.nn.bidirectional_dynamic_rnn(cell_fw=multicell,cell_bw=multicell,inputs=inputs,\ sequence_length=inputs_sequence_length,dtype=tf.float32) enc_output=tf.concat(enc_output,2) #enc_state=(tf.concat(enc_state[0]) return enc_output, enc_state
def decoder(self,decoder_inputs,enc_output,enc_states,target_sequence_length): """Memory is a tuple containing the forward and backward final states (output_states_fw,output_states_bw)""" with tf.variable_scope("decoder"): basic_cell=[] for i in xrange(len(self.hidden_layer_size)): if self.hidden_layer_type[i]=="tanh": basic_cell.append(tf.contrib.rnn.BasicRNNCell(num_units=self.encoder_layer_size[i])) if self.hidden_layer_type[i]=="lstm": basic_cell.append(tf.contrib.rnn.BasicLSTMCell(num_units=self.encoder_layer_size[i])) if self.hidden_layer_type[i]=="gru": basic_cell.append(GRUCell(num_units=self.encoder_layer_size[i])) multicell=MultiRNNCell(basic_cell) if not self.attention: dec_output,_=tf.nn.bidirectional_dynamic_rnn(cell_fw=multicell,cell_bw=multicell,inputs=decoder_inputs,initial_state_fw=enc_states[0],\ sequence_length=target_sequence_length,initial_state_bw=enc_states[1]) else: attention_size=decoder_inputs.get_shape().as_list()[-1] attention_mechanism=tf.contrib.seq2seq.BahdanauAttention(attention_size,enc_output,target_sequence_length,normalize=True,probability_fn=tf.nn.softmax) cell_with_attention=tf.contrib.seq2seq.AttentionWrapper(multicell,attention_mechanism,attention_size) dec_output,_=tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_with_attention,cell_bw=cell_with_attention,inputs=decoder_inputs,dtype=tf.float32) return dec_output
def _attention(self, query, attn_states): conv2d = nn_ops.conv2d reduce_sum = math_ops.reduce_sum softmax = nn_ops.softmax tanh = math_ops.tanh with vs.variable_scope("Attention"): k = vs.get_variable("AttnW", [1, 1, self._attn_size, self._attn_vec_size]) v = vs.get_variable("AttnV", [self._attn_vec_size]) hidden = array_ops.reshape(attn_states, [-1, self._attn_length, 1, self._attn_size]) hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") y = _linear(query, self._attn_vec_size, True) y = array_ops.reshape(y, [-1, 1, 1, self._attn_vec_size]) s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) a = softmax(s) d = reduce_sum( array_ops.reshape(a, [-1, self._attn_length, 1, 1]) * hidden, [1, 2]) new_attns = array_ops.reshape(d, [-1, self._attn_size]) new_attn_states = array_ops.slice(attn_states, [0, 1, 0], [-1, -1, -1]) return new_attns, new_attn_states
def __call__(self, inputs, state, mask, scope=None): """Long short-term memory cell (LSTM).""" with vs.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. c, h = array_ops.split(1, 2, state) concat = linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(1, 4, concat) new_c = c * sigmoid(f + self._forget_bias) + sigmoid(i) * tanh(j) mask = array_ops.expand_dims(mask, 1) new_c = mask * new_c + (1. - mask) * c new_h = tanh(new_c) * sigmoid(o) new_h = mask * new_h + (1. - mask) * h return new_h, array_ops.concat(1, [new_c, new_h])
def __init__(self, num_units, memory, params, self_matching = False, memory_len = None, reuse=None, kernel_initializer=None, bias_initializer=None, is_training = True, use_SRU = False): super(gated_attention_Wrapper, self).__init__(_reuse=reuse) cell = SRUCell if use_SRU else GRUCell self._cell = cell(num_units, is_training = is_training) self._num_units = num_units self._activation = math_ops.tanh self._kernel_initializer = kernel_initializer self._bias_initializer = bias_initializer self._attention = memory self._params = params self._self_matching = self_matching self._memory_len = memory_len self._is_training = is_training
def _attention(self, query, attn_states): conv2d = nn_ops.conv2d reduce_sum = math_ops.reduce_sum softmax = nn_ops.softmax tanh = math_ops.tanh with tf.variable_scope("attention"): k = tf.get_variable( "attn_w", [1, 1, self._attn_size, self._attn_vec_size]) v = tf.get_variable("attn_v", [self._attn_vec_size]) hidden = array_ops.reshape(attn_states, [-1, self._attn_length, 1, self._attn_size]) hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") y = _linear(query, self._attn_vec_size, True) y = array_ops.reshape(y, [-1, 1, 1, self._attn_vec_size]) s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) a = softmax(s) d = reduce_sum( array_ops.reshape(a, [-1, self._attn_length, 1, 1]) * hidden, [1, 2]) new_attns = array_ops.reshape(d, [-1, self._attn_size]) new_attn_states = array_ops.slice(attn_states, [0, 1, 0], [-1, -1, -1]) return new_attns, new_attn_states
def __init__(self, num_units, forget_bias=1.0, input_size=None, state_is_tuple=False, activation=tanh): """Initialize the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. forget_bias: float, The bias added to forget gates (see above). input_size: Deprecated and unused. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. By default (False), they are concatenated along the column axis. This default behavior will soon be deprecated. activation: Activation function of the inner states. """ if not state_is_tuple: print("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if input_size is not None: print("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation
def __init__(self, num_units, forget_bias=1.0, input_size=None, state_is_tuple=False, activation=tanh): """Initialize the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. forget_bias: float, The bias added to forget gates (see above). input_size: Deprecated and unused. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. By default (False), they are concatenated along the column axis. This default behavior will soon be deprecated. activation: Activation function of the inner states. """ if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation
def __init__(self, num_units, forget_bias=1.0, input_size=None, state_is_tuple=True, activation=tanh): """Initialize the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. forget_bias: float, The bias added to forget gates (see above). input_size: Deprecated and unused. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. The latter behavior will soon be deprecated. activation: Activation function of the inner states. """ if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation
def __call__(self, inputs, state, scope=None): with _checked_scope(self, scope or "rwa_cell", reuse=self._reuse): h, n, d = state with vs.variable_scope("u"): u = linear(inputs, self._num_units, True, normalize=self._normalize) with vs.variable_scope("g"): g = linear([inputs, h], self._num_units, True, normalize=self._normalize) with vs.variable_scope("a"): # The bias term when factored out of the numerator and denominator cancels and is unnecessary a = tf.exp(linear([inputs, h], self._num_units, True, normalize=self._normalize)) with vs.variable_scope("discount_factor"): discount_factor = tf.nn.sigmoid(linear([inputs, h], self._num_units, True, normalize=self._normalize)) z = tf.multiply(u, tanh(g)) n = tf.multiply(n, discount_factor) + tf.multiply(z, a) # Numerically stable update of numerator d = tf.multiply(d, discount_factor) + a # Numerically stable update of denominator h_new = self._activation(tf.div(n, d)) new_state = RDACellTuple(h_new, n, d) return h_new, new_state
def __init__(self, num_units, input_size=None, use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=1, num_proj_shards=1, forget_bias=1.0, state_is_tuple=False, activation=tanh): # if not state_is_tuple: # logging.warn( # "%s: Using a concatenated state is slower and will soon be " # "deprecated. Use state_is_tuple=True." % self) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated." % self) #self._use_peepholes = use_peepholes #self._cell_clip = cell_clip #self._initializer = initializer #self._num_proj = num_proj #self._num_unit_shards = num_unit_shards #self._num_proj_shards = num_proj_shards self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation
def __call__(self, inputs, state, scope=None): with vs.variable_scope(scope or type(self).__name__): # "SCRNNCell" if self._state_is_tuple: s, h = state else: s, h = array_ops.split(1, 2, state) new_s = tf.nn.rnn_cell._linear([(1 - self._alpha) * inputs, self._alpha * s], self._num_units, True, scope="SlowLinear") new_h = tanh(tf.nn.rnn_cell._linear([inputs, new_s, h], self._num_units, True, scope="FastLinear")) if self._state_is_tuple: new_state = tf.nn.rnn_cell.LSTMStateTuple(new_s, new_h) else: new_state = array_ops.concat(1, [new_s, new_h]) return new_h, new_state
def __init__(self, num_units, forget_bias=1.0, state_is_tuple=True, activation=None, reuse=None): """Initialize the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. forget_bias: float, The bias added to forget gates (see above). state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. The latter behavior will soon be deprecated. activation: Activation function of the inner states. Default: `tanh`. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. """ super(BasicLSTMCell, self).__init__(_reuse=reuse) if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation or math_ops.tanh
def __init__(self, num_units, input_size=None, activation=tanh, is_training=True, batch_norm=True): self._is_training = is_training self._batch_norm = batch_norm super().__init__(num_units, input_size, activation)
def __init__(self, num_units, factor_size, initializer=None, num_proj=None, forget_bias=1.0, activation=tanh): """ Initializes parameters of F-LSTM cell :param num_units: int, The number of units in the G-LSTM cell :param initializer: (optional) The initializer to use for the weight and projection matrices. :param num_proj: (optional) int, The output dimensionality for the projection matrices. If None, no projection is performed. :param factor_size: factorization size :param forget_bias: Biases of the forget gate are initialized by default to 1 in order to reduce the scale of forgetting at the beginning of the training. :param activation: Activation function of the inner states. """ self._num_units = num_units self._initializer = initializer self._num_proj = num_proj self._forget_bias = forget_bias self._activation = activation self._factor_size = factor_size assert (self._num_units > self._factor_size) if self._num_proj: assert (self._num_proj > self._factor_size) if num_proj: self._state_size = (LSTMStateTuple(num_units, num_proj)) self._output_size = num_proj else: self._state_size = (LSTMStateTuple(num_units, num_units)) self._output_size = num_units
def __call__(self, inputs, state, episodic_gate, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" with vs.variable_scope("MGRUCell"): # "GRUCell" with vs.variable_scope("Gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. r = rnn_cell.linear([inputs, state], self._num_units, True, 1.0, scope=scope) r = sigmoid(r) with vs.variable_scope("Candidate"): c = tanh(rnn_cell.linear([inputs, r * state], self._num_units, True)) new_h = tf.mul(episodic_gate, c) + tf.mul((1 - episodic_gate), state) return new_h, new_h
def __init__(self, state_size, input_size, scope=None, activation=tanh): self._state_size = state_size self._output_size = state_size self._input_size = input_size self._activation = activation self._scope = scope
def __init__(self, num_units, activation=tanh, use_fp16=False): self._num_units = num_units self._activation = activation self.use_fp16 = use_fp16
def __init__(self, num_units, input_size=None, activation=tanh, normalize=False, reuse=None): if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._activation = activation self._normalize = normalize self._reuse = reuse
def __init__(self, num_units, input_size=None, activation=tanh, reuse=None): if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._activation = activation self._reuse = reuse
def __call__(self, inputs, state, scope=None): with _checked_scope(self, scope or "rwa_cell", reuse=self._reuse): h, n, d, a_max = state with vs.variable_scope("u"): u = _linear(inputs, self._num_units, True) with vs.variable_scope("g"): g = _linear([inputs, h], self._num_units, True) with vs.variable_scope("a"): a = _linear([inputs, h], self._num_units, False) # The bias term when factored out of the numerator and denominator cancels and is unnecessary z = tf.multiply(u, tanh(g)) a_newmax = tf.maximum(a_max, a) exp_diff = tf.exp(a_max - a_newmax) exp_scaled = tf.exp(a - a_newmax) n = tf.multiply(n, exp_diff) + tf.multiply(z, exp_scaled) # Numerically stable update of numerator d = tf.multiply(d, exp_diff) + exp_scaled # Numerically stable update of denominator h_new = self._activation(tf.div(n, d)) new_state = RWACellTuple(h_new, n, d, a_newmax) return h_new, new_state
def __init__(self, num_units, forget_bias=1.0, input_size=None, activation=math_ops.tanh, layer_norm=True, norm_gain=1.0, norm_shift=0.0, dropout_keep_prob=1.0, dropout_prob_seed=None): """Initializes the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. forget_bias: float, The bias added to forget gates (see above). input_size: Deprecated and unused. activation: Activation function of the inner states. layer_norm: If `True`, layer normalization will be applied. norm_gain: float, The layer normalization gain initial value. If `layer_norm` has been set to `False`, this argument will be ignored. norm_shift: float, The layer normalization shift initial value. If `layer_norm` has been set to `False`, this argument will be ignored. dropout_keep_prob: unit Tensor or float between 0 and 1 representing the recurrent dropout probability value. If float and 1.0, no dropout will be applied. dropout_prob_seed: (optional) integer, the randomness seed. """ if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._activation = activation self._forget_bias = forget_bias self._keep_prob = dropout_keep_prob self._seed = dropout_prob_seed self._layer_norm = layer_norm self._g = norm_gain self._b = norm_shift
def __init__(self, num_units, input_size=None, activation=tanh): if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._activation = activation
def __init__(self, batch_size, mem_size, mem_dim, name, activation=tanh, dummy_value=0.0, stddev=0.5): self._mem_size = mem_size self._mem_dim = mem_dim self._activation = activation self._batch_size = batch_size # memory M_init = tf.get_variable("%s_M_init"%name, [1, self._mem_size, self._mem_dim], tf.float32, tf.random_normal_initializer(stddev=stddev)) self._memory = tf.tile(M_init, [batch_size, 1, 1], name='%s_Tile_M'%name)
def __init__(self, batch_size, num_mem, num_round, input_offset, cell=None, echocell=None, mem_size=2, mem_dim=1024, activation=tanh, dummy_value=0.0): """ args: num_mem: number of cells mem_size: number of memory lines, only work for MemGrid mem_dim: length of memory line, only work for MemGrid num_round: the round number of processing in the cell """ self._batch_size = batch_size self._num_mem = num_mem self._mem_dim = mem_dim self._num_round = num_round self._input_offset = input_offset if cell is None: self.check = True self._mem_cells = [MemGrid(batch_size, mem_size, mem_dim, "Mem_%d"%i, activation=activation, dummy_value=dummy_value) for i in xrange(num_mem)] else: self.check = False self._mem_cells = [cell] * num_mem self.echocell = echocell
def __call__(self, inputs, state, scope=None): """Most basic RNN: output = new_state = tanh(W * input + U * state + B).""" with vs.variable_scope(scope or type(self).__name__): # "BasicRNNCell" output = tanh(linear([inputs, state], self._num_units, True)) return output, output
def __call__(self, inputs, state, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" with vs.variable_scope(scope or type(self).__name__): # "GRUCell" with vs.variable_scope("Gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. r, u = array_ops.split(1, 2, linear([inputs, state], 2 * self._num_units, True, 1.0)) r, u = sigmoid(r), sigmoid(u) with vs.variable_scope("Candidate"): c = tanh(linear([inputs, r * state], self._num_units, True)) new_h = u * state + (1 - u) * c return new_h, new_h
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with vs.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. c, h = array_ops.split(1, 2, state) concat = linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(1, 4, concat) new_c = c * sigmoid(f + self._forget_bias) + sigmoid(i) * tanh(j) new_h = tanh(new_c) * sigmoid(o) return new_h, array_ops.concat(1, [new_c, new_h])
def __init__(self, num_units, k_size=3, height=23, width=30, input_size=None, activation=tanh, initializer=None): if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._activation = activation self._initializer = initializer self._k_size = k_size self._height = height self._width = width
def __init__(self, num_units, k_size=3, batch_size=4, height=23, width=30, input_size=None, use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=1, num_proj_shards=1, forget_bias=1.0, state_is_tuple=False, activation=tanh): if not state_is_tuple: logging.warn( "%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True." % self) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated." % self) #self._use_peepholes = use_peepholes #self._cell_clip = cell_clip #self._initializer = initializer #self._num_proj = num_proj #self._num_unit_shards = num_unit_shards #self._num_proj_shards = num_proj_shards self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation self._initializer = initializer self._k_size = k_size self._height = height self._width = width self._batch_size = batch_size
def __init__(self, num_units, activation=None, is_training = True, reuse=None): self._num_units = num_units self._activation = activation or tf.tanh self._is_training = is_training
def __init__(self, num_units, activation=None, reuse=None, kernel_initializer=None, bias_initializer=None, is_training = True): super(GRUCell, self).__init__(_reuse=reuse) self._num_units = num_units self._activation = activation or math_ops.tanh self._kernel_initializer = kernel_initializer self._bias_initializer = bias_initializer self._is_training = is_training
def __init__(self, num_units, kernel_initializer=None, bias_initializer=tf.constant_initializer(value=0.), activation=None, reuse=None): super(BasicRNNCell, self).__init__(_reuse=reuse) self._num_units = num_units self._activation = activation or tf.nn.tanh self._bias_initializer = bias_initializer self._kernel_initializer = kernel_initializer
def __init__(self, num_units, input_size=None, activation=tanh): if input_size is not None: print("%s: The input_size parameter is deprecated." % self) self._num_units = num_units self._activation = activation
def __init__(self, num_units, forget_bias=1.0, input_size=None, state_is_tuple=False, activation=tanh, hyper_num_units=128, hyper_embedding_size=32, is_layer_norm = True): """Initialize the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. hyper_num_units: int, The number of units in the HyperLSTM cell. forget_bias: float, The bias added to forget gates (see above). input_size: Deprecated and unused. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. By default (False), they are concatenated along the column axis. This default behavior will soon be deprecated. activation: Activation function of the inner states. """ if not state_is_tuple: print("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if input_size is not None: print("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation self.hyper_num_units = hyper_num_units self.total_num_units = self._num_units + self.hyper_num_units self.hyper_cell = rnn_cell.BasicLSTMCell(hyper_num_units) self.hyper_embedding_size= hyper_embedding_size self.is_layer_norm = is_layer_norm
def call(self, inputs, state, scope=None): with vs.variable_scope(scope or type(self).__name__): # "GruRcnCell" with vs.variable_scope("Gates"): # Reset gate and update gate. # We start with bias of 1.0. w_zrw = self._conv(inputs, self._num_outputs*3, self._ih_filter_h_length, self._ih_filter_w_length, self._ih_strides, self._ih_pandding, init_ops.truncated_normal_initializer(stddev=0.01), scope="WzrwConv") u_zr = self._conv(state, self._num_outputs*2, self._hh_filter_h_length, self._hh_filter_w_length, [1, 1, 1, 1], "SAME", init_ops.truncated_normal_initializer(stddev=0.01), scope="UzrConv") w_z, w_r, w =tf.split(value=w_zrw, num_or_size_splits=3, axis=3, name="w_split") u_z, u_r =tf.split(value=u_zr, num_or_size_splits=2, axis=3, name="u_split") z_bias = tf.get_variable( name="z_biases", shape=[self._num_outputs], initializer=init_ops.ones_initializer() ) z_gate = math_ops.sigmoid(tf.nn.bias_add(w_z + u_z, z_bias)) r_bias = tf.get_variable( name="r_biases", shape=[self._num_outputs], initializer=init_ops.ones_initializer()) r_gate = math_ops.sigmoid(tf.nn.bias_add(w_r + u_r, r_bias)) with vs.variable_scope("Candidate"): # w = self._conv(inputs, self._num_outputs, self._ih_filter_h_length, self._ih_filter_w_length, # self._ih_strides, self._ih_pandding, init_ops.truncated_normal_initializer(stddev=0.01), scope="WConv") u = self._conv(r_gate * state, self._num_outputs, self._hh_filter_h_length, self._hh_filter_w_length, [1, 1, 1, 1], "SAME", init_ops.truncated_normal_initializer(stddev=0.01), scope="UConv") c_bias = tf.get_variable( name="c_biases", shape=[self._num_outputs], initializer=init_ops.ones_initializer()) c = math_ops.tanh(tf.nn.bias_add(w + u, c_bias)) new_h = z_gate * state + (1 - z_gate) * c return new_h, new_h