我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.sigmoid()。
def model(self, features, labels): x = features["observation"] x = tf.contrib.layers.convolution2d(x, 2, kernel_size=[3, 3], stride=[2, 2], activation_fn=tf.nn.elu) x = tf.contrib.layers.convolution2d(x, 2, kernel_size=[3, 3], stride=[2, 2], activation_fn=tf.nn.elu) actions = tf.one_hot(tf.reshape(features["action"],[-1]), depth=6, on_value=1.0, off_value=0.0, axis=1) x = tf.concat(1, [tf.contrib.layers.flatten(x), actions]) x = tf.contrib.layers.fully_connected(x, 100, activation_fn=tf.nn.elu) x = tf.contrib.layers.fully_connected(x, 100, activation_fn=tf.nn.elu) logits = tf.contrib.layers.fully_connected(x, 1, activation_fn=None) prediction = tf.sigmoid(logits, name="prediction") loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits, tf.expand_dims(labels, axis=1)),name="loss") train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=self.learning_rate) tf.add_to_collection('prediction', prediction) tf.add_to_collection('loss', loss) return prediction, loss, train_op
def ae(x): if nonlinearity_name == 'relu': f = tf.nn.relu elif nonlinearity_name == 'elu': f = tf.nn.elu elif nonlinearity_name == 'gelu': # def gelu(x): # return tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.) # f = gelu def gelu_fast(_x): return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3)))) f = gelu_fast elif nonlinearity_name == 'silu': def silu(_x): return _x * tf.sigmoid(_x) f = silu # elif nonlinearity_name == 'soi': # def soi_map(x): # u = tf.random_uniform(tf.shape(x)) # mask = tf.to_float(tf.less(u, (1 + tf.erf(x / tf.sqrt(2.))) / 2.)) # return tf.cond(is_training, lambda: tf.mul(mask, x), # lambda: tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.)) # f = soi_map else: raise NameError("Need 'relu', 'elu', 'gelu', or 'silu' for nonlinearity_name") h1 = f(tf.matmul(x, W['1']) + b['1']) h2 = f(tf.matmul(h1, W['2']) + b['2']) h3 = f(tf.matmul(h2, W['3']) + b['3']) h4 = f(tf.matmul(h3, W['4']) + b['4']) h5 = f(tf.matmul(h4, W['5']) + b['5']) h6 = f(tf.matmul(h5, W['6']) + b['6']) h7 = f(tf.matmul(h6, W['7']) + b['7']) return tf.matmul(h7, W['8']) + b['8']
def __call__(self, left_state, right_state, extra_input=None): with tf.variable_scope('TreeLSTM'): c1, h1 = left_state c2, h2 = right_state if extra_input is not None: input_concat = tf.concat((extra_input, h1, h2), axis=1) else: input_concat = tf.concat((h1, h2), axis=1) concat = tf.layers.dense(input_concat, 5 * self._num_cells) i, f1, f2, o, g = tf.split(concat, 5, axis=1) i = tf.sigmoid(i) f1 = tf.sigmoid(f1) f2 = tf.sigmoid(f2) o = tf.sigmoid(o) g = tf.tanh(g) cnew = f1 * c1 + f2 * c2 + i * g hnew = o * cnew newstate = LSTMStateTuple(c=cnew, h=hnew) return hnew, newstate
def highway(self, input_1, input_2, size_1, size_2, l2_penalty=1e-8, layer_size=1): output = input_2 for idx in range(layer_size): with tf.name_scope('output_lin_%d' % idx): W = tf.Variable(tf.truncated_normal([size_2,size_1], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[size_1]), name="b") tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(W)) tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(b)) output = tf.nn.relu(tf.nn.xw_plus_b(output,W,b)) with tf.name_scope('transform_lin_%d' % idx): W = tf.Variable(tf.truncated_normal([size_1,size_1], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[size_1]), name="b") tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(W)) tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(b)) transform_gate = tf.sigmoid(tf.nn.xw_plus_b(input_1,W,b)) carry_gate = tf.constant(1.0) - transform_gate output = transform_gate * output + carry_gate * input_1 return output
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope(scope or type(self).__name__): c, h = state # Parameters of gates are concatenated into one multiply for efficiency. concat = rnn_ops.linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(value=concat, num_or_size_splits=4, axis=1) if self._layer_norm: i = rnn_ops.layer_norm(i, name="i") j = rnn_ops.layer_norm(j, name="j") f = rnn_ops.layer_norm(f, name="f") o = rnn_ops.layer_norm(o, name="o") new_c = (c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * tf.sigmoid(o) new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h) return new_h, new_state
def model(self, features, labels): x = features["observation"] x = tf.contrib.layers.convolution2d(x, 2, kernel_size=[3, 3], stride=[2, 2], activation_fn=tf.nn.elu) x = tf.contrib.layers.convolution2d(x, 2, kernel_size=[3, 3], stride=[2, 2], activation_fn=tf.nn.elu) x = tf.contrib.layers.flatten(x) x = tf.contrib.layers.fully_connected(x, 100, activation_fn=tf.nn.elu) x = tf.contrib.layers.fully_connected(x, 100, activation_fn=tf.nn.elu) logits = tf.contrib.layers.fully_connected(x, 1, activation_fn=None) prediction = tf.sigmoid(logits) loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits, tf.expand_dims(labels, axis=1))) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=0.01) tf.add_to_collection('prediction', prediction) tf.add_to_collection('loss', loss) return prediction, loss, train_op
def __discriminator(self, x, scope, reuse): with tf.variable_scope(scope, reuse=reuse): x1 = tf.layers.conv2d(x, 64, 5, strides=2, padding='same') x1 = LeakyReLU(x1, self.alpha) # 16x16x64 x2 = tf.layers.conv2d(x1, 128, 5, strides=2, padding='same') x2 = tf.layers.batch_normalization(x2, training=self.training) x2 = LeakyReLU(x2, self.alpha) # 8x8x128 x3 = tf.layers.conv2d(x2, 256, 5, strides=2, padding='same') x3 = tf.layers.batch_normalization(x3, training=self.training) x3 = LeakyReLU(x3, self.alpha) # 4x4x256 # Flatten it flat = tf.reshape(x3, (-1, 4*4*256)) logits = tf.layers.dense(flat, 1) out = tf.sigmoid(logits) return out, logits #---------------------------------------------------------------------------
def __init__(self, sigma=0.1, beta_sampling=True, **kwargs): """ sigma: Standard deviation of input data, for use in sampling. beta_sampling: Use beta distribution for sampling, instead of Gaussian. """ RBM.__init__(self, **kwargs) if not kwargs.get('fromfile'): self.sigma = sigma self.beta_sampling = beta_sampling if self.sigma is None: raise AssertionError('Need to supply sigma param.') self.hidden = tf.placeholder(self.dtype, name='hidden', shape=[None, self.n_hidden]) self.mean_v = tf.sigmoid(tf.matmul(self.hidden, self.params['W'], transpose_b=True) + self.params['bvis'])
def update(self, state, input, output=True): u_gate = tf.matmul(input, self.params['Uxh']) if state is not None: u_gate += tf.matmul(state, self.params['Uhh']) r_gate = tf.sigmoid(tf.matmul(input, self.params['Rxh']) + tf.matmul(state, self.params['Rhh'])) u_gate = tf.sigmoid(u_gate) operand = tf.matmul(input, self.params['Wxh']) + self.params['bhid'] if state is not None: operand += tf.matmul(state * r_gate, self.params['Whh']) new_state = self.coding(operand) * (1. - u_gate) if state is not None: new_state += state * u_gate if not output: return new_state return new_state, self.get_output(new_state)
def custom_loss(y_true, y_pred): # Get prediction pred_box_xy = tf.sigmoid(y_pred[..., :2]) pred_box_wh = y_pred[..., 2:4] pred_box_conf = tf.sigmoid(y_pred[..., 4]) # Get ground truth true_box_xy = y_true[..., :2] true_box_wh = y_true[..., 2:4] true_box_conf = y_true[..., 4] # Determine the mask: simply the position of the ground truth boxes (the predictors) true_mask = tf.expand_dims(y_true[..., 4], axis=-1) # Calculate the loss. A scale can be associated with each loss, indicating how important # the loss is. The bigger the scale, more important the loss is. loss_xy = tf.reduce_sum(tf.square(true_box_xy - pred_box_xy) * true_mask) * 1.0 loss_wh = tf.reduce_sum(tf.square(true_box_wh - pred_box_wh) * true_mask) * 1.0 loss_conf = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf)) * 1.2 loss = loss_xy + loss_wh + loss_conf return loss
def _setup(self, x, prev_state, prev_output): """Setup the cell. :param x: Input tensor. :param prev_state: Previous cell state tensor. :param prev_output: Previous cell output tensor. :return: Tuple of cell state and cell output tensors. """ # Input gate. i = tf.nn.sigmoid(tf.matmul(x, self._wi) + tf.matmul(prev_output, self._ui) + self._bi) # Forget gate. f = tf.nn.sigmoid(tf.matmul(x, self._wf) + tf.matmul(prev_output, self._uf) + self._bf) # Output gate. o = tf.nn.sigmoid(tf.matmul(x, self._wo) + tf.matmul(prev_output, self._uo) + self._bo) # Output and state. lin_state = tf.matmul(x, self._wc) + tf.matmul(prev_output, self._uc) + self._bc state = self._activation(lin_state) if self._activation is not None else lin_state state = f * prev_state + i * state output = o * state return state, output
def __init__(self,input,name='disc'): with tf.variable_scope(name): conv1=conv_layer(input,[3,3,3,64],1) lrelu1=leaky_relu(conv1) ochannels=[64,128,128,256,256,512,512] stride=[2,1] block=[lrelu1] for i in xrange(7): block.append(self.get_block(block[-1],ochannels[i],stride[i%2])) dense1=tf.layers.dense(block[-1],1024, kernel_initializer=tf.truncated_normal_initializer() ) lrelu2=leaky_relu(dense1) self.dense2=tf.layers.dense(lrelu2,1, kernel_initializer=tf.truncated_normal_initializer(), activation=tf.sigmoid)
def __call__(self, inputs, state, scope=None): num_proj = self._num_units if self._num_proj is None else self._num_proj c_prev = tf.slice(state, [0, 0], [-1, self._num_units]) m_prev = tf.slice(state, [0, self._num_units], [-1, num_proj]) input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError("Could not infer input size from inputs.get_shape()[-1]") with tf.variable_scope(type(self).__name__, initializer=self._initializer): # "LSTMCell" # i = input_gate, j = new_input, f = forget_gate, o = output_gate cell_inputs = tf.concat(1, [inputs, m_prev]) lstm_matrix = tf.nn.bias_add(tf.matmul(cell_inputs, self._concat_w), self._b) i, j, f, o = tf.split(1, 4, lstm_matrix) c = tf.sigmoid(f + 1.0) * c_prev + tf.sigmoid(i) * tf.tanh(j) m = tf.sigmoid(o) * tf.tanh(c) if self._num_proj is not None: m = tf.matmul(m, self._concat_w_proj) new_state = tf.concat(1, [c, m]) return m, new_state
def _build(self): assert(self.d is not None) assert(self.lr is not None) assert(self.l2_penalty is not None) assert(self.loss_function is not None) # Get input placeholders and sentence features self._create_placeholders() sentence_feats, save_kwargs = self._embed_sentences() # Define linear model s1, s2 = self.seed, (self.seed + 1 if self.seed is not None else None) w = tf.Variable(tf.random_normal((self.d, 1), stddev=SD, seed=s1)) b = tf.Variable(tf.random_normal((1, 1), stddev=SD, seed=s2)) h = tf.squeeze(tf.matmul(sentence_feats, w) + b) # Define training procedure self.loss = self._get_loss(h, self.y) self.loss += self.l2_penalty * tf.nn.l2_loss(w) self.prediction = tf.sigmoid(h) self.train_fn = tf.train.AdamOptimizer(self.lr).minimize(self.loss) self.save_dict = save_kwargs.update({'w': w, 'b': b})
def __call__(self, inputs, state, scope=None): with tf.variable_scope(scope or type(self).__name__): lhs, rhs = state c0, h0 = lhs c1, h1 = rhs concat = tf.contrib.layers.linear( tf.concat([inputs, h0, h1], 1), 5 * self._num_units) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f0, f1, o = tf.split(value=concat, num_or_size_splits=5, axis=1) j = self._activation(j) if not isinstance(self._keep_prob, float) or self._keep_prob < 1: j = tf.nn.dropout(j, self._keep_prob, seed=self._seed) new_c = (c0 * tf.sigmoid(f0 + self._forget_bias) + c1 * tf.sigmoid(f1 + self._forget_bias) + tf.sigmoid(i) * j) new_h = self._activation(new_c) * tf.sigmoid(o) new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h) return new_h, new_state
def create_discriminator(hr_images_fake, hr_images, cfg): n_layers = 3 layers = [] input = tf.concat([hr_images_fake, hr_images ], axis = 3) conv = slim.conv2d(input, cfg.ndf, [3,3], stride = 2, activation_fn = lrelu, scope = 'layers%d'%(0)) layers.append(conv) for i in range(n_layers): out_channels = cfg.ndf*min(2**(i+1), 8) stride = 1 if i == n_layers -1 else 2 conv = slim.conv2d(layers[-1], out_channels, [3,3], stride = stride, activation_fn = lrelu, scope = 'layers_%d'%(i+2)) layers.append(conv) conv = slim.conv2d(layers[-1], 1, [3,3], stride = 1) output = tf.sigmoid(conv) return output
def mce_loss(positive_scores, negative_scores): """ Minimum Classification Error (MCE) loss [1]: loss(p, n) = \sum_i \sigma(- p_i + n_i) [1] http://yann.lecun.com/exdb/publis/pdf/lecun-06.pdf Args: positive_scores: (N,) Tensor containing scores of positive examples. negative_scores: (N,) Tensor containing scores of negative examples. Returns: Loss value. """ mce_losses = tf.sigmoid(- positive_scores + negative_scores) loss = tf.reduce_sum(mce_losses) return loss
def __call__(self, inputs, initial_state=None, dtype=tf.float32, sequence_length=None, scope=None): num_gates = 3 if self._with_residual else 2 transformed = tf.layers.dense(inputs, num_gates * self._num_units, bias_initializer=tf.constant_initializer(self._constant_bias)) gates = tf.split(transformed, num_gates, axis=2) forget_gate = tf.sigmoid(gates[1]) transformed_inputs = (1.0 - forget_gate) * gates[0] if self._with_residual: residual_gate = tf.sigmoid(gates[2]) inputs *= (1.0 - residual_gate) new_inputs = tf.concat([inputs, transformed_inputs, forget_gate, residual_gate], axis=2) else: new_inputs = tf.concat([transformed_inputs, forget_gate], axis=2) return self._rnn(new_inputs, initial_state, dtype, sequence_length, scope)
def pre(self, inputs, scope=None): """Preprocess inputs to be used by the cell. Assumes [N, J, *] [x, u]""" is_train = self._is_train keep_prob = self._keep_prob gate_size = self._gate_size with tf.variable_scope(scope or "pre"): x, u, _, _ = tf.split(2, 4, tf.slice(inputs, [0, 0, gate_size], [-1, -1, -1])) # [N, J, d] a_raw = linear([x * u], gate_size, True, scope='a_raw', var_on_cpu=self._var_on_cpu, wd=self._wd, initializer=self._initializer) a = tf.sigmoid(a_raw - self._forget_bias, name='a') if keep_prob < 1.0: x = tf.cond(is_train, lambda: tf.nn.dropout(x, keep_prob), lambda: x) u = tf.cond(is_train, lambda: tf.nn.dropout(u, keep_prob), lambda: u) v_t = tf.nn.tanh(linear([x, u], self._num_units, True, var_on_cpu=self._var_on_cpu, wd=self._wd, scope='v_raw'), name='v') new_inputs = tf.concat(2, [a, x, u, v_t]) # [N, J, 3*d + 1] return new_inputs
def __call__(self, inputs, state, scope=None): gate_size = self._gate_size with tf.variable_scope(scope or type(self).__name__): # "RSMCell" with tf.name_scope("Split"): # Reset gate and update gate. a = tf.slice(inputs, [0, 0], [-1, gate_size]) x, u, v_t = tf.split(1, 3, tf.slice(inputs, [0, gate_size], [-1, -1])) o = tf.slice(state, [0, 0], [-1, 1]) h, v = tf.split(1, 2, tf.slice(state, [0, gate_size], [-1, -1])) with tf.variable_scope("Main"): r_raw = linear([x * u], 1, True, scope='r_raw', var_on_cpu=self._var_on_cpu, initializer=self._initializer) r = tf.sigmoid(r_raw, name='a') new_o = a * r + (1 - a) * o new_v = a * v_t + (1 - a) * v g = r * v_t new_h = a * g + (1 - a) * h with tf.name_scope("Concat"): new_state = tf.concat(1, [new_o, new_h, new_v]) outputs = tf.concat(1, [a, r, x, new_h, new_v, g]) return outputs, new_state
def _cond_prob(self, a, w_dec_i, b_dec_i): """Gets the conditional probability for a single dimension. Args: a: Model's hidden state, sized `[batch_size, num_hidden]`. w_dec_i: The decoder weight terms for the dimension, sized `[num_hidden, 1]`. b_dec_i: The decoder bias terms, sized `[batch_size, 1]`. Returns: The conditional probability of the dimension, sized `[batch_size, 1]`. """ # Decode hidden units to get conditional probability. h = tf.sigmoid(a) p_cond_i = tf.sigmoid(b_dec_i + tf.matmul(h, w_dec_i)) return p_cond_i
def get_activation(activation=None): """ Get activation function accord to the parameter 'activation' Args: activation: str: ??????? Return: ???? """ if activation is None: return None elif activation == 'tanh': return tf.nn.tanh elif activation == 'relu': return tf.nn.relu elif activation == 'softmax': return tf.nn.softmax elif activation == 'sigmoid': return tf.sigmoid else: raise Exception('Unknow activation function: %s' % activation)
def generator(observed, n, n_z, is_training): with zs.BayesianNet(observed=observed) as generator: z_min = -tf.ones([n, n_z]) z_max = tf.ones([n, n_z]) z = zs.Uniform('z', z_min, z_max) lx_z = tf.reshape(z, [-1, 1, 1, n_z]) ngf = 32 lx_z = tf.layers.conv2d_transpose(lx_z, ngf * 4, 3, use_bias=False) lx_z = tf.layers.batch_normalization(lx_z, training=is_training, scale=False) lx_z = tf.nn.relu(lx_z) lx_z = tf.layers.conv2d_transpose(lx_z, ngf * 2, 5, use_bias=False) lx_z = tf.layers.batch_normalization(lx_z, training=is_training, scale=False) lx_z = tf.nn.relu(lx_z) lx_z = tf.layers.conv2d_transpose(lx_z, ngf, 5, strides=(2, 2), padding='same', use_bias=False) lx_z = tf.layers.batch_normalization(lx_z, training=is_training, scale=False) lx_z = tf.nn.relu(lx_z) lx_z = tf.layers.conv2d_transpose( lx_z, 1, 5, strides=(2, 2), padding='same', activation=tf.sigmoid) return generator, lx_z
def generator(observed, n, n_z, is_training): with zs.BayesianNet(observed=observed) as generator: ngf = 64 z_min = -tf.ones([n, n_z]) z_max = tf.ones([n, n_z]) z = zs.Uniform('z', z_min, z_max) lx_z = tf.layers.dense(z, ngf * 8 * 4 * 4, use_bias=False) lx_z = tf.layers.batch_normalization(lx_z, training=is_training) lx_z = tf.nn.relu(lx_z) lx_z = tf.reshape(lx_z, [-1, 4, 4, ngf * 8]) lx_z = tf.layers.conv2d_transpose(lx_z, ngf * 4, 5, strides=(2, 2), padding='same', use_bias=False) lx_z = tf.layers.batch_normalization(lx_z, training=is_training) lx_z = tf.nn.relu(lx_z) lx_z = tf.layers.conv2d_transpose(lx_z, ngf * 2, 5, strides=(2, 2), padding='same', use_bias=False) lx_z = tf.layers.batch_normalization(lx_z, training=is_training) lx_z = tf.nn.relu(lx_z) lx_z = tf.layers.conv2d_transpose(lx_z, 3, 5, strides=(2, 2), padding='same', activation=tf.sigmoid) return generator, lx_z
def _sample(self, n_samples): logits, temperature = self.logits, self.temperature if not self.is_reparameterized: logits = tf.stop_gradient(logits) temperature = tf.stop_gradient(temperature) shape = tf.concat([[n_samples], self.batch_shape], 0) uniform = open_interval_standard_uniform(shape, self.dtype) # TODO: add Logistic distribution logistic = tf.log(uniform) - tf.log(1 - uniform) samples = tf.sigmoid((logits + logistic) / temperature) static_n_samples = n_samples if isinstance(n_samples, int) else None samples.set_shape( tf.TensorShape([static_n_samples]).concatenate( self.get_batch_shape())) return samples
def __gibbs_sampling(self): # Gibbs sampling # Sample visible units with tf.name_scope('visible') as _: signal_back = self.__conv2d(self.hid_state0, self.weights_flipped) + self.cias if self.is_continuous: # Visible units are continuous normal_dist = tf.contrib.distributions.Normal( mu=signal_back, sigma=1.) self.vis_1 = tf.reshape( tf.div(normal_dist.sample_n(1), self.weight_size * self.weight_size), self.input_shape, name='vis_1') else: # Visible units are binary vis1_prob = tf.sigmoid(signal_back, name='vis_1') self.vis_1 = self.__sample(vis1_prob, 'vis_1') # Sample hidden units with tf.name_scope('hidden') as _: self.hid_prob1 = tf.sigmoid(self.__conv2d(self.vis_1, self.weights) + self.bias, name='hid_prob_1')
def __init__(self, num_units, input_size=None, activation=tf.tanh, inner_activation=tf.sigmoid, bias=True, weights_init=None, trainable=True, restore=True, reuse=False): if input_size is not None: logging.warn("%s: The input_size parameter is deprecated." % self) self._num_units = num_units if isinstance(activation, str): self._activation = activations.get(activation) elif hasattr(activation, '__call__'): self._activation = activation else: raise ValueError("Invalid Activation.") if isinstance(inner_activation, str): self._inner_activation = activations.get(inner_activation) elif hasattr(inner_activation, '__call__'): self._inner_activation = inner_activation else: raise ValueError("Invalid Activation.") self.bias = bias self.weights_init = weights_init if isinstance(weights_init, str): self.weights_init = initializations.get(weights_init)() self.trainable = trainable self.restore = restore self.reuse = reuse
def _build(self, inputs, state): hidden, cell = state input_conv = self._convolutions["input"] hidden_conv = self._convolutions["hidden"] next_hidden = input_conv(inputs) + hidden_conv(hidden) gates = tf.split(value=next_hidden, num_or_size_splits=4, axis=self._conv_ndims+1) input_gate, next_input, forget_gate, output_gate = gates next_cell = tf.sigmoid(forget_gate + self._forget_bias) * cell next_cell += tf.sigmoid(input_gate) * tf.tanh(next_input) output = tf.tanh(next_cell) * tf.sigmoid(output_gate) if self._skip_connection: output = tf.concat([output, inputs], axis=-1) return output, (output, next_cell)
def _body(self, x, cumul_out, prev_state, cumul_state, cumul_halting, iteration, remainder, halting_linear, x_ones): """The `body` of `tf.while_loop`.""" # Increase iteration count only for those elements that are still running. all_ones = tf.constant(1, shape=(self._batch_size, 1), dtype=self._dtype) is_iteration_over = tf.equal(cumul_halting, all_ones) next_iteration = tf.where(is_iteration_over, iteration, iteration + 1) out, next_state = self._core(x, prev_state) # Get part of state used to compute halting values. halting_input = halting_linear(self._get_state_for_halting(next_state)) halting = tf.sigmoid(halting_input, name="halting") next_cumul_halting_raw = cumul_halting + halting over_threshold = next_cumul_halting_raw > self._threshold next_cumul_halting = tf.where(over_threshold, all_ones, next_cumul_halting_raw) next_remainder = tf.where(over_threshold, remainder, 1 - next_cumul_halting_raw) p = next_cumul_halting - cumul_halting next_cumul_state = _nested_add(cumul_state, _nested_unary_mul(next_state, p)) next_cumul_out = cumul_out + p * out return (x_ones, next_cumul_out, next_state, next_cumul_state, next_cumul_halting, next_iteration, next_remainder)
def dis(self, X, Y): with tf.device('/gpu:'+GPU0): X = tf.reshape(X,[batch_size,resolution,resolution,resolution,1]) Y = tf.reshape(Y,[batch_size,resolution,resolution,resolution,1]) layer = tf.concat([X,Y],axis=4) c_d = [1,64,128,256,512] s_d = [0,2,2,2,2] layers_d =[] layers_d.append(layer) for i in range(1,5,1): layer = tools.Ops.conv3d(layers_d[-1],k=4,out_c=c_d[i],str=s_d[i],name='d'+str(i)) if i!=4: layer = tools.Ops.xxlu(layer, name='lrelu') layers_d.append(layer) y = tf.reshape(layers_d[-1],[batch_size,-1]) return tf.nn.sigmoid(y)
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope(scope or type(self).__name__): # "DilatedLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. c, h = tf.split(state, 2, axis=1) concat = self._linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(concat, 4, axis=1) new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(j) new_h = tf.tanh(new_c) * tf.sigmoid(o) # update relevant cores timestep = tf.assign_add(self._timestep, 1) core_to_update = tf.mod(timestep, self._cores) updated_h = self._hold_mask[core_to_update] * h + self._dilated_mask[core_to_update] * new_h return updated_h, tf.concat([new_c, updated_h], axis=1)
def last_conv(input, reuse=False, use_sigmoid=False, name=None): """ Last convolutional layer of discriminator network (1 filter with size 4x4, stride 1) Args: input: 4D tensor reuse: boolean use_sigmoid: boolean (False if use lsgan) name: string, e.g. 'C64' """ with tf.variable_scope(name, reuse=reuse): weights = _weights("weights", shape=[4, 4, input.get_shape()[3], 1]) biases = _biases("biases", [1]) conv = tf.nn.conv2d(input, weights, strides=[1, 1, 1, 1], padding='SAME') output = conv + biases if use_sigmoid: output = tf.sigmoid(output) return output ### Helpers
def __call__(self, inputs, state, scope=None): current_state = state[0] noise_i = state[1] noise_h = state[2] for i in range(self.depth): with tf.variable_scope('h_'+str(i)): if i == 0: h = tf.tanh(linear([inputs * noise_i, current_state * noise_h], self._num_units, True)) else: h = tf.tanh(linear([current_state * noise_h], self._num_units, True)) with tf.variable_scope('t_'+str(i)): if i == 0: t = tf.sigmoid(linear([inputs * noise_i, current_state * noise_h], self._num_units, True, self.forget_bias)) else: t = tf.sigmoid(linear([current_state * noise_h], self._num_units, True, self.forget_bias)) current_state = (h - current_state)* t + current_state return current_state, [current_state, noise_i, noise_h]
def _lstm(self, input_h, input_c, input_x, reuse=False): with tf.variable_scope('level2_lstm', reuse=reuse): w_i2h_ = np.transpose(self.model_load['/core/i2h_1/weight'][:], (1, 0)) b_i2h_ = self.model_load['/core/i2h_1/bias'][:] w_h2h_ = np.transpose(self.model_load['/core/h2h_1/weight'][:], (1, 0)) b_h2h_ = self.model_load['/core/h2h_1/bias'][:] w_i2h = tf.get_variable('w_i2h', initializer=w_i2h_) b_i2h = tf.get_variable('b_i2h', initializer=b_i2h_) w_h2h = tf.get_variable('w_h2h', initializer=w_h2h_) b_h2h = tf.get_variable('b_h2h', initializer=b_h2h_) input_x = tf.cast(input_x, tf.float32) i2h = tf.matmul(input_x, w_i2h) + b_i2h h2h = tf.matmul(input_h, w_h2h) + b_h2h all_input_sums = i2h + h2h reshaped = tf.reshape(all_input_sums, [-1, 4, self.H]) n1, n2, n3, n4 = tf.unstack(reshaped, axis=1) in_gate = tf.sigmoid(n1) forget_gate = tf.sigmoid(n2) out_gate = tf.sigmoid(n3) in_transform = tf.tanh(n4) c = tf.multiply(forget_gate, input_c) + tf.multiply(in_gate, in_transform) h = tf.multiply(out_gate, tf.tanh(c)) return c, h
def top_K_loss_margin(self,sentence,image,K=50,margin=0.2): sim_matrix = tf.matmul(sentence, image, transpose_b=True) s_square = tf.reduce_sum(tf.square(sentence), axis=1) im_square = tf.reduce_sum(tf.square(image), axis=1) d = 1-tf.sigmoid(sim_matrix) positive = tf.stack([tf.matrix_diag_part(d)] * K, axis=1) length = tf.shape(d)[-1] dd = tf.matrix_set_diag(d, 8 * tf.ones([length])) flag =8-7*tf.sign(tf.nn.relu(self.sen_margin-self.sen_similarity)) sen_loss_K ,_ = tf.nn.top_k(-1.0 * dd *flag, K, sorted=False) # note: this is negative value im_loss_K,_ = tf.nn.top_k(-tf.transpose(1.0 * dd*flag), K, sorted=False) # note: this is negative value sentence_center_loss = -tf.log(1-positive+1e-12)-tf.log(-sen_loss_K+1e-12) image_center_loss = -tf.log(1-positive+1e-12)-tf.log(-im_loss_K+1e-12) self.d_neg = tf.reduce_mean((sen_loss_K + im_loss_K)/-2.0) self.d_pos =tf.reduce_mean(positive) self.endpoint['debug/im_loss_topK'] = -1.0 * im_loss_K self.endpoint['debug/sen_loss_topK'] = -1.0 * sen_loss_K self.endpoint['debug/d_Matrix'] = d self.endpoint['debug/positive'] = positive self.endpoint['debug/s_center_loss'] = sentence_center_loss self.endpoint['debug/i_center_loss'] = image_center_loss self.endpoint['debug/S'] = sim_matrix self.endpoint['debug/sentence_square'] = s_square self.endpoint['debug/image_square'] = im_square return tf.reduce_sum(sentence_center_loss), tf.reduce_sum(image_center_loss)
def lstm_func(x, h, c, wx, wh, b): """ x: (N, D) h: (N, H) c: (N, H) wx: (D, 4H) wh: (H, 4H) b: (4H, ) """ N, H = tf.shape(h)[0], tf.shape(h)[1] a = tf.reshape(tf.matmul(x, wx) + tf.matmul(h, wh) + b, (N, -1, H)) i, f, o, g = a[:,0,:], a[:,1,:], a[:,2,:], a[:,3,:] i = tf.sigmoid(i) f = tf.sigmoid(f) o = tf.sigmoid(o) g = tf.tanh(g) next_c = f * c + i * g next_h = o * tf.tanh(next_c) return next_h, next_c
def generator_graph(fake_imgs, units_size, out_size, alpha=0.01): # ????????????? ????scope with tf.variable_scope('generator'): # ???????? layer = tf.layers.dense(fake_imgs, units_size) # leaky ReLU ???? relu = tf.maximum(alpha * layer, layer) # dropout ????? drop = tf.layers.dropout(relu, rate=0.2) # logits # out_size??????size?? logits = tf.layers.dense(drop, out_size) # ???? ??????????? ? ???????? # ??tanh????sigmoid??? # ????(-1, 1) ??sigmoid??[0, 1] outputs = tf.tanh(logits) return logits, outputs
def make_dcgan_generator(Xk_g, n_lat, n_chan=1): n_g_hid1 = 1024 # size of hidden layer in generator layer 1 n_g_hid2 = 128 # size of hidden layer in generator layer 2 x = Dense(n_g_hid1)(Xk_g) x = BatchNormalization(mode=2)(x) x = Activation('relu')(x) x = Dense(n_g_hid2*7*7)(x) x = BatchNormalization(mode=2)(x) x = Activation('relu')(x) x = Reshape((n_g_hid2, 7, 7))(x) x = Deconvolution2D(64, 5, 5, output_shape=(128, 64, 14, 14), border_mode='same', activation=None, subsample=(2,2), init='orthogonal', dim_ordering='th')(x) x = BatchNormalization(mode=2, axis=1)(x) x = Activation('relu')(x) g = Deconvolution2D(n_chan, 5, 5, output_shape=(128, n_chan, 28, 28), border_mode='same', activation='sigmoid', subsample=(2,2), init='orthogonal', dim_ordering='th')(x) return g
def silu(_x): return _x * tf.sigmoid(_x)
def create_model(self, model_input, vocab_size, num_frames, **unused_params): """Creates a model which uses a logistic classifier over the average of the frame-level features. This class is intended to be an example for implementors of frame level models. If you want to train a model over averaged features it is more efficient to average them beforehand rather than on the fly. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32) feature_size = model_input.get_shape().as_list()[2] max_frames = model_input.get_shape().as_list()[1] denominators = tf.reshape( tf.tile(num_frames, [1, feature_size]), [-1, feature_size]) avg_pooled = tf.reduce_sum(model_input, axis=[1]) / denominators output = slim.fully_connected( avg_pooled, vocab_size, activation_fn=tf.nn.sigmoid, weights_regularizer=slim.l2_regularizer(1e-8)) return {"predictions": output}
def sub_moe(self, model_input, vocab_size, num_mixtures = None, l2_penalty=1e-8, scopename="", **unused_params): num_mixtures = num_mixtures or FLAGS.moe_num_mixtures gate_activations = slim.fully_connected( model_input, vocab_size * (num_mixtures + 1), activation_fn=None, biases_initializer=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="gates"+scopename) expert_activations = slim.fully_connected( model_input, vocab_size * num_mixtures, activation_fn=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="experts"+scopename) gating_distribution = tf.nn.softmax(tf.reshape( gate_activations, [-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1) expert_distribution = tf.nn.sigmoid(tf.reshape( expert_activations, [-1, num_mixtures])) # (Batch * #Labels) x num_mixtures final_probabilities_by_class_and_batch = tf.reduce_sum( gating_distribution[:, :num_mixtures] * expert_distribution, 1) final_probabilities = tf.reshape(final_probabilities_by_class_and_batch, [-1, vocab_size]) return model_input, final_probabilities
def sub_model(self, model_input, vocab_size, num_mixtures=None, l2_penalty=1e-8, sub_scope="", distill_labels=None,**unused_params): num_mixtures = num_mixtures or FLAGS.moe_num_mixtures class_size = 256 if distill_labels is not None: class_input = slim.fully_connected( distill_labels, class_size, activation_fn=tf.nn.relu, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="class_inputs") class_input = tf.nn.l2_normalize(class_input, dim=1) model_input = tf.concat((model_input, class_input),axis=1) gate_activations = slim.fully_connected( model_input, vocab_size * (num_mixtures + 1), activation_fn=None, biases_initializer=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="gates-"+sub_scope) expert_activations = slim.fully_connected( model_input, vocab_size * num_mixtures, activation_fn=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="experts-"+sub_scope) gating_distribution = tf.nn.softmax(tf.reshape( gate_activations, [-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1) expert_distribution = tf.nn.sigmoid(tf.reshape( expert_activations, [-1, num_mixtures])) # (Batch * #Labels) x num_mixtures final_probabilities_by_class_and_batch = tf.reduce_sum( gating_distribution[:, :num_mixtures] * expert_distribution, 1) final_probabilities = tf.reshape(final_probabilities_by_class_and_batch, [-1, vocab_size]) return final_probabilities
def decoder(z, reuse=False): with tf.variable_scope('decoder') as vs: if reuse: vs.reuse_variables() fc1 = fc_relu(z, 1024) fc2 = fc_relu(fc1, 7*7*128) fc2 = tf.reshape(fc2, tf.stack([tf.shape(fc2)[0], 7, 7, 128])) conv1 = conv2d_t_relu(fc2, 64, 4, 2) output = tf.contrib.layers.convolution2d_transpose(conv1, 1, 4, 2, activation_fn=tf.sigmoid) return output
def encoder(x, z_dim): with tf.variable_scope('encoder'): conv1 = conv2d_lrelu(x, 64, 4, 2) # None x 14 x 14 x 64 conv2 = conv2d_lrelu(conv1, 128, 4, 2) # None x 7 x 7 x 128 conv2 = tf.reshape(conv2, [-1, np.prod(conv2.get_shape().as_list()[1:])]) # None x (7x7x128) fc1 = fc_lrelu(conv2, 1024) mean = tf.contrib.layers.fully_connected(fc1, z_dim, activation_fn=tf.identity) stddev = tf.contrib.layers.fully_connected(fc1, z_dim, activation_fn=tf.sigmoid) stddev = tf.maximum(stddev, 0.005) return mean, stddev
def decoder(z, reuse=False): with tf.variable_scope('decoder') as vs: if reuse: vs.reuse_variables() fc1 = fc_relu(z, 1024) fc2 = fc_relu(fc1, 7*7*128) fc2 = tf.reshape(fc2, tf.stack([tf.shape(fc2)[0], 7, 7, 128])) conv1 = conv2d_t_relu(fc2, 64, 4, 2) mean = tf.contrib.layers.convolution2d_transpose(conv1, 1, 4, 2, activation_fn=tf.sigmoid) stddev = tf.contrib.layers.convolution2d_transpose(conv1, 1, 4, 2, activation_fn=tf.sigmoid) stddev = tf.maximum(stddev, 0.005) return mean, stddev # Build the computation graph for training
def __call__(self, inputs, state, scope=None): with tf.variable_scope(scope or type(self).__name__): c_prev, h_prev, update_prob_prev, cum_update_prob_prev = state # Parameters of gates are concatenated into one multiply for efficiency. concat = rnn_ops.linear([inputs, h_prev], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(value=concat, num_or_size_splits=4, axis=1) if self._layer_norm: i = rnn_ops.layer_norm(i, name="i") j = rnn_ops.layer_norm(j, name="j") f = rnn_ops.layer_norm(f, name="f") o = rnn_ops.layer_norm(o, name="o") new_c_tilde = (c_prev * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * self._activation(j)) new_h_tilde = self._activation(new_c_tilde) * tf.sigmoid(o) # Compute value for the update prob with tf.variable_scope('state_update_prob'): new_update_prob_tilde = rnn_ops.linear(new_c_tilde, 1, True, bias_start=self._update_bias) new_update_prob_tilde = tf.sigmoid(new_update_prob_tilde) # Compute value for the update gate cum_update_prob = cum_update_prob_prev + tf.minimum(update_prob_prev, 1. - cum_update_prob_prev) update_gate = _binary_round(cum_update_prob) # Apply update gate new_c = update_gate * new_c_tilde + (1. - update_gate) * c_prev new_h = update_gate * new_h_tilde + (1. - update_gate) * h_prev new_update_prob = update_gate * new_update_prob_tilde + (1. - update_gate) * update_prob_prev new_cum_update_prob = update_gate * 0. + (1. - update_gate) * cum_update_prob new_state = SkipLSTMStateTuple(new_c, new_h, new_update_prob, new_cum_update_prob) new_output = SkipLSTMOutputTuple(new_h, update_gate) return new_output, new_state
def __call__(self, inputs, state, scope=None): with tf.variable_scope(scope or type(self).__name__): h_prev, update_prob_prev, cum_update_prob_prev = state # Parameters of gates are concatenated into one multiply for efficiency. with tf.variable_scope("gates"): concat = rnn_ops.linear([inputs, h_prev], 2 * self._num_units, bias=True, bias_start=1.0) # r = reset_gate, u = update_gate r, u = tf.split(value=concat, num_or_size_splits=2, axis=1) if self._layer_norm: r = rnn_ops.layer_norm(r, name="r") u = rnn_ops.layer_norm(u, name="u") # Apply non-linearity after layer normalization r = tf.sigmoid(r) u = tf.sigmoid(u) with tf.variable_scope("candidate"): new_c_tilde = self._activation(rnn_ops.linear([inputs, r * h_prev], self._num_units, True)) new_h_tilde = u * h_prev + (1 - u) * new_c_tilde # Compute value for the update prob with tf.variable_scope('state_update_prob'): new_update_prob_tilde = rnn_ops.linear(new_h_tilde, 1, True, bias_start=self._update_bias) new_update_prob_tilde = tf.sigmoid(new_update_prob_tilde) # Compute value for the update gate cum_update_prob = cum_update_prob_prev + tf.minimum(update_prob_prev, 1. - cum_update_prob_prev) update_gate = _binary_round(cum_update_prob) # Apply update gate new_h = update_gate * new_h_tilde + (1. - update_gate) * h_prev new_update_prob = update_gate * new_update_prob_tilde + (1. - update_gate) * update_prob_prev new_cum_update_prob = update_gate * 0. + (1. - update_gate) * cum_update_prob new_state = SkipGRUStateTuple(new_h, new_update_prob, new_cum_update_prob) new_output = SkipGRUOutputTuple(new_h, update_gate) return new_output, new_state
def __init__(self, logits): self.logits = logits self.ps = tf.sigmoid(logits)
def _step(self, f, z, o): with tf.variable_scope("fo-Pool"): # f,z,o is batch_size x size f = tf.sigmoid(f) z = tf.tanh(z) o = tf.sigmoid(o) self.c = tf.mul(f, self.c) + tf.mul(1 - f, z) self.h = tf.mul(o, self.c) # h is size vector return self.h
def __discriminator(self, x, scope, reuse, hidden_units): with tf.variable_scope(scope, reuse=reuse): h1 = tf.layers.dense(x, hidden_units, activation=None) h1 = LeakyReLU(h1, self.alpha) logits = tf.layers.dense(h1, 1, activation=None) out = tf.sigmoid(logits) return out, logits #---------------------------------------------------------------------------