我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用tensorflow.get_variable()。
def dense(inputs, units, bias_shape, w_i, b_i=None, activation=tf.nn.relu): # ??tf.layers?????flatten # dense1 = tf.layers.dense(tf.contrib.layers.flatten(relu5), activation=tf.nn.relu, units=50) if not isinstance(inputs, ops.Tensor): inputs = ops.convert_to_tensor(inputs, dtype='float') # dim_list = inputs.get_shape().as_list() # flatten_shape = dim_list[1] if len(dim_list) <= 2 else reduce(lambda x, y: x * y, dim_list[1:]) # reshaped = tf.reshape(inputs, [dim_list[0], flatten_shape]) if len(inputs.shape) > 2: inputs = tf.contrib.layers.flatten(inputs) flatten_shape = inputs.shape[1] weights = tf.get_variable('weights', shape=[flatten_shape, units], initializer=w_i) dense = tf.matmul(inputs, weights) if bias_shape is not None: assert bias_shape[0] == units biases = tf.get_variable('biases', shape=bias_shape, initializer=b_i) return activation(dense + biases) if activation is not None else dense + biases return activation(dense) if activation is not None else dense
def variable_on_worker_level(name, shape, initializer): r''' Next we concern ourselves with graph creation. However, before we do so we must introduce a utility function ``variable_on_worker_level()`` used to create a variable in CPU memory. ''' # Use the /cpu:0 device on worker_device for scoped operations if len(FLAGS.ps_hosts) == 0: device = worker_device else: device = tf.train.replica_device_setter(worker_device=worker_device, cluster=cluster) with tf.device(device): # Create or get apropos variable var = tf.get_variable(name=name, shape=shape, initializer=initializer) return var
def _variable_on_device(name, shape, initializer, trainable=True): """Helper to create a Variable. Args: name: name of the variable shape: list of ints initializer: initializer for Variable Returns: Variable Tensor """ # TODO(bichen): fix the hard-coded data type below dtype = tf.float32 if not callable(initializer): var = tf.get_variable(name, initializer=initializer, trainable=trainable) else: var = tf.get_variable( name, shape, initializer=initializer, dtype=dtype, trainable=trainable) return var
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): """ A super model that combine one or more models """ models = FLAGS.wide_and_deep_models outputs = [] for model_name in map(lambda x: x.strip(), models.split(",")): model = getattr(frame_level_models, model_name, None)() output = model.create_model(model_input, vocab_size, num_frames, l2_penalty=l2_penalty, **unused_params)["predictions"] outputs.append(tf.expand_dims(output, axis=2)) num_models = len(outputs) model_outputs = tf.concat(outputs, axis=2) # linear_combination = tf.get_variable("combine", shape=[vocab_size,num_models], # dtype=tf.float32, initializer=tf.zeros_initializer(), # regularizer=slim.l2_regularizer(l2_penalty)) # combination = tf.nn.softmax(linear_combination) combination = tf.fill(dims=[vocab_size,num_models], value=1.0/num_models) output_sum = tf.einsum("ijk,jk->ij", model_outputs, combination) return {"predictions": output_sum}
def get_video_weights(video_id_batch): video_id_to_index = tf.contrib.lookup.string_to_index_table_from_file( vocabulary_file=FLAGS.sample_vocab_file, default_value=0) indexes = video_id_to_index.lookup(video_id_batch) weights, length = get_video_weights_array() weights_input = tf.placeholder(tf.float32, shape=[length], name="sample_weights_input") weights_tensor = tf.get_variable("sample_weights", shape=[length], trainable=False, dtype=tf.float32, initializer=tf.constant_initializer(weights)) weights_assignment = tf.assign(weights_tensor, weights_input) tf.add_to_collection("weights_input", weights_input) tf.add_to_collection("weights_assignment", weights_assignment) video_weight_batch = tf.nn.embedding_lookup(weights_tensor, indexes) return video_weight_batch
def create_model(self, model_input, vocab_size, l2_penalty=1e-8, original_input=None, **unused_params): """Creates a matrix regression model. Args: model_input: 'batch' x 'num_features' x 'num_methods' matrix of input features. vocab_size: The number of classes in the dataset. Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are batch_size x num_classes.""" num_features = model_input.get_shape().as_list()[-2] num_methods = model_input.get_shape().as_list()[-1] weight1d = tf.get_variable("ensemble_weight1d", shape=[num_methods], regularizer=slim.l2_regularizer(l2_penalty)) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_features, num_methods], regularizer=slim.l2_regularizer(10 * l2_penalty)) weight = tf.nn.softmax(tf.einsum("ij,j->ij", weight2d, weight1d), dim=-1) output = tf.einsum("ijk,jk->ij", model_input, weight) return {"predictions": output}
def create_model(self, model_input, vocab_size, l2_penalty=1e-8, original_input=None, **unused_params): """Creates a linear regression model. Args: model_input: 'batch' x 'num_features' x 'num_methods' matrix of input features. vocab_size: The number of classes in the dataset. Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are batch_size x num_classes.""" num_methods = model_input.get_shape().as_list()[-1] weight = tf.get_variable("ensemble_weight", shape=[num_methods], regularizer=slim.l2_regularizer(l2_penalty)) weight = tf.nn.softmax(weight) output = tf.einsum("ijk,k->ij", model_input, weight) return {"predictions": output}
def create_model(self, model_input, vocab_size, l2_penalty=1e-8, original_input=None, epsilon=1e-5, **unused_params): """Creates a non-unified matrix regression model. Args: model_input: 'batch' x 'num_features' x 'num_methods' matrix of input features. vocab_size: The number of classes in the dataset. Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are batch_size x num_classes.""" num_features = model_input.get_shape().as_list()[-2] num_methods = model_input.get_shape().as_list()[-1] log_model_input = tf.stop_gradient(tf.log((epsilon + model_input) / (1.0 + epsilon - model_input))) weight = tf.get_variable("ensemble_weight", shape=[num_features, num_methods], regularizer=slim.l2_regularizer(l2_penalty)) weight = tf.nn.softmax(weight) output = tf.nn.sigmoid(tf.einsum("ijk,jk->ij", log_model_input, weight)) return {"predictions": output}
def trainable_initial_state(self, batch_size): """ Create a trainable initial state for the BasicLSTMCell :param batch_size: number of samples per batch :return: LSTMStateTuple """ def _create_initial_state(batch_size, state_size, trainable=True, initializer=tf.random_normal_initializer()): with tf.device('/cpu:0'): s = tf.get_variable('initial_state', shape=[1, state_size], dtype=tf.float32, trainable=trainable, initializer=initializer) state = tf.tile(s, tf.stack([batch_size] + [1])) return state with tf.variable_scope('initial_c'): initial_c = _create_initial_state(batch_size, self._num_units) with tf.variable_scope('initial_h'): initial_h = _create_initial_state(batch_size, self._num_units) return tf.contrib.rnn.LSTMStateTuple(initial_c, initial_h)
def conv2d(x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad="SAME", dtype=tf.float32, collections=None): with tf.variable_scope(name): stride_shape = [1, stride[0], stride[1], 1] filter_shape = [filter_size[0], filter_size[1], int(x.get_shape()[3]), num_filters] # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = np.prod(filter_shape[:3]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = np.prod(filter_shape[:2]) * num_filters # initialize weights with random weights w_bound = np.sqrt(6. / (fan_in + fan_out)) w = tf.get_variable("W", filter_shape, dtype, tf.random_uniform_initializer(-w_bound, w_bound), collections=collections) b = tf.get_variable("b", [1, 1, 1, num_filters], initializer=tf.constant_initializer(0.0), collections=collections) return tf.nn.conv2d(x, w, stride_shape, pad) + b
def batchnorm(x, name, phase, updates, gamma=0.96): k = x.get_shape()[1] runningmean = tf.get_variable(name+"/mean", shape=[1, k], initializer=tf.constant_initializer(0.0), trainable=False) runningvar = tf.get_variable(name+"/var", shape=[1, k], initializer=tf.constant_initializer(1e-4), trainable=False) testy = (x - runningmean) / tf.sqrt(runningvar) mean_ = mean(x, axis=0, keepdims=True) var_ = mean(tf.square(x), axis=0, keepdims=True) std = tf.sqrt(var_) trainy = (x - mean_) / std updates.extend([ tf.assign(runningmean, runningmean * gamma + mean_ * (1 - gamma)), tf.assign(runningvar, runningvar * gamma + var_ * (1 - gamma)) ]) y = switch(phase, trainy, testy) out = y * tf.get_variable(name+"/scaling", shape=[1, k], initializer=tf.constant_initializer(1.0), trainable=True)\ + tf.get_variable(name+"/translation", shape=[1,k], initializer=tf.constant_initializer(0.0), trainable=True) return out # ================================================================ # Mathematical utils # ================================================================
def test_with_dynamic_inputs(self): embeddings = tf.get_variable("W_embed", [self.vocab_size, self.input_depth]) helper = decode_helper.GreedyEmbeddingHelper( embedding=embeddings, start_tokens=[0] * self.batch_size, end_token=-1) decoder_fn = self.create_decoder( helper=helper, mode=tf.contrib.learn.ModeKeys.INFER) initial_state = decoder_fn.cell.zero_state( self.batch_size, dtype=tf.float32) decoder_output, _ = decoder_fn(initial_state, helper) #pylint: disable=E1101 with self.test_session() as sess: sess.run(tf.global_variables_initializer()) decoder_output_ = sess.run(decoder_output) np.testing.assert_array_equal( decoder_output_.logits.shape, [self.max_decode_length, self.batch_size, self.vocab_size]) np.testing.assert_array_equal(decoder_output_.predicted_ids.shape, [self.max_decode_length, self.batch_size])
def makeDNN(hidden_layer): # input from X prevLayer = X # make layers for i in range(hidden_layer): if i==0: newWeight = tf.get_variable("W0%d" % i, shape=[features, wide], initializer=tf.contrib.layers.xavier_initializer()) else: newWeight = tf.get_variable("W0%d" % i, shape=[wide, wide], initializer=tf.contrib.layers.xavier_initializer()) newBias = tf.Variable(tf.random_normal([wide])) newLayer = tf.nn.relu(tf.matmul(prevLayer, newWeight) + newBias) newDropLayer = tf.nn.dropout(newLayer, dropout_rate) prevLayer = newDropLayer # make output layers Wo = tf.get_variable("Wo", shape=[wide, labels], initializer=tf.contrib.layers.xavier_initializer()) bo = tf.Variable(tf.random_normal([labels])) return tf.matmul(prevLayer, Wo) + bo # tf Graph Input
def _conv2d_impl(self, input_layer, num_channels_in, filters, kernel_size, strides, padding, kernel_initializer): if self.use_tf_layers: return conv_layers.conv2d(input_layer, filters, kernel_size, strides, padding, self.channel_pos, kernel_initializer=kernel_initializer, use_bias=False) else: weights_shape = [kernel_size[0], kernel_size[1], num_channels_in, filters] # We use the name 'conv2d/kernel' so the variable has the same name as its # tf.layers equivalent. This way, if a checkpoint is written when # self.use_tf_layers == True, it can be loaded when # self.use_tf_layers == False, and vice versa. weights = self.get_variable('conv2d/kernel', weights_shape, self.variable_dtype, self.dtype, initializer=kernel_initializer) if self.data_format == 'NHWC': strides = [1] + strides + [1] else: strides = [1, 1] + strides return tf.nn.conv2d(input_layer, weights, strides, padding, data_format=self.data_format)
def get_gradients_to_apply(self, device_num, gradient_state): device_grads = gradient_state # From 2nd result of preprocess_device_grads. avg_grads, self.grad_has_inf_nan = ( variable_mgr_util.aggregate_gradients_using_copy_with_device_selection( self.benchmark_cnn, device_grads, use_mean=True, check_inf_nan=self.benchmark_cnn.enable_auto_loss_scale)) # Make shadow variable on a parameter server for each original trainable # variable. for i, (g, v) in enumerate(avg_grads): my_name = variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/' + v.name if my_name.endswith(':0'): my_name = my_name[:-2] new_v = tf.get_variable( my_name, dtype=v.dtype.base_dtype, initializer=v.initial_value, trainable=True) avg_grads[i] = (g, new_v) return avg_grads
def fix_variables(self, sess, pretrained_model): print('Fix VGG16 layers..') with tf.variable_scope('Fix_VGG16') as scope: with tf.device("/cpu:0"): # fix the vgg16 issue from conv weights to fc weights # fix RGB to BGR fc6_conv = tf.get_variable("fc6_conv", [7, 7, 512, 4096], trainable=False) fc7_conv = tf.get_variable("fc7_conv", [1, 1, 4096, 4096], trainable=False) conv1_rgb = tf.get_variable("conv1_rgb", [3, 3, 3, 64], trainable=False) restorer_fc = tf.train.Saver({self._scope + "/fc6/weights": fc6_conv, self._scope + "/fc7/weights": fc7_conv, self._scope + "/conv1/conv1_1/weights": conv1_rgb}) restorer_fc.restore(sess, pretrained_model) sess.run(tf.assign(self._variables_to_fix[self._scope + '/fc6/weights:0'], tf.reshape(fc6_conv, self._variables_to_fix[self._scope + '/fc6/weights:0'].get_shape()))) sess.run(tf.assign(self._variables_to_fix[self._scope + '/fc7/weights:0'], tf.reshape(fc7_conv, self._variables_to_fix[self._scope + '/fc7/weights:0'].get_shape()))) sess.run(tf.assign(self._variables_to_fix[self._scope + '/conv1/conv1_1/weights:0'], tf.reverse(conv1_rgb, [2])))
def _create_model(self, **kwargs) -> None: """ Create your TensorFlow model. Every model has to define: - loss tensor named according to given ``loss_name`` - input placeholders and output tensors named according to the specified input and output names .. warning:: To support multi-GPU training, all the variables must be created with ``tf.get_variable`` and appropriate variable scopes. :param kwargs: model configuration as specified in ``model`` section of the configuration file """ raise NotImplementedError('`_create_model` method must be implemented in order to construct a new model.')
def conv3d(input_, output_dim, f_size, is_training, scope='conv3d'): with tf.variable_scope(scope) as scope: # VGG network uses two 3*3 conv layers to effectively increase receptive field w1 = tf.get_variable('w1', [f_size, f_size, f_size, input_.get_shape()[-1], output_dim], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv1 = tf.nn.conv3d(input_, w1, strides=[1, 1, 1, 1, 1], padding='SAME') b1 = tf.get_variable('b1', [output_dim], initializer=tf.constant_initializer(0.0)) conv1 = tf.nn.bias_add(conv1, b1) bn1 = tf.contrib.layers.batch_norm(conv1, is_training=is_training, scope='bn1', variables_collections=['bn_collections']) r1 = tf.nn.relu(bn1) w2 = tf.get_variable('w2', [f_size, f_size, f_size, output_dim, output_dim], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv2 = tf.nn.conv3d(r1, w2, strides=[1, 1, 1, 1, 1], padding='SAME') b2 = tf.get_variable('b2', [output_dim], initializer=tf.constant_initializer(0.0)) conv2 = tf.nn.bias_add(conv2, b2) bn2 = tf.contrib.layers.batch_norm(conv2, is_training=is_training, scope='bn2', variables_collections=['bn_collections']) r2 = tf.nn.relu(bn2) return r2
def deconv2d(input_, output_shape, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, name="deconv2d", with_w=False): with tf.variable_scope(name): # filter : [height, width, output_channels, in_channels] w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]], initializer=tf.random_normal_initializer(stddev=stddev)) tf_output_shape=tf.stack(output_shape) deconv = tf.nn.conv2d_transpose(input_, w, output_shape=tf_output_shape, strides=[1, d_h, d_w, 1]) biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0)) #deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape()) deconv = tf.reshape(tf.nn.bias_add(deconv, biases), tf_output_shape) if with_w: return deconv, w, biases else: return deconv
def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False): shape = input_.get_shape().as_list() #mat_shape=tf.stack([tf.shape(input_)[1],output_size]) mat_shape=[shape[1],output_size] with tf.variable_scope(scope or "Linear"): #matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32, matrix = tf.get_variable("Matrix", mat_shape, tf.float32, tf.random_normal_initializer(stddev=stddev)) bias = tf.get_variable("bias", [output_size], initializer=tf.constant_initializer(bias_start)) if with_w: return tf.matmul(input_, matrix) + bias, matrix, bias else: return tf.matmul(input_, matrix) + bias #minibatch method that improves on openai #because it doesn't fix batchsize: #TODO: recheck when not sleepy
def _get_weight_variable(self, layer_name, name, shape, L2=1): wname = '%s/%s:0'%(layer_name,name) fanin, fanout = shape[-2:] for dim in shape[:-2]: fanin *= float(dim) fanout *= float(dim) sigma = self._xavi_norm(fanin, fanout) if self.weights is None or wname not in self.weights: w1 = tf.get_variable(name,initializer=tf.truncated_normal(shape = shape, mean=0,stddev = sigma)) print('{:>23} {:>23}'.format(wname, 'randomly initialize')) else: w1 = tf.get_variable(name, shape = shape, initializer=tf.constant_initializer(value=self.weights[wname],dtype=tf.float32)) self.loaded_weights[wname]=1 if wname != w1.name: print(wname,w1.name) assert False tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w1)*L2) return w1
def global_step(device=''): """Returns the global step variable. Args: device: Optional device to place the variable. It can be an string or a function that is called to get the device for the variable. Returns: the tensor representing the global step variable. """ global_step_ref = tf.get_collection(tf.GraphKeys.GLOBAL_STEP) if global_step_ref: return global_step_ref[0] else: collections = [ VARIABLES_TO_RESTORE, tf.GraphKeys.VARIABLES, tf.GraphKeys.GLOBAL_STEP, ] # Get the device for the variable. with tf.device(variable_device(device, 'global_step')): return tf.get_variable('global_step', shape=[], dtype=tf.int64, initializer=tf.zeros_initializer, trainable=False, collections=collections)
def __call__(self, x, train=True): shape = x.get_shape().as_list() if train: with tf.variable_scope(self.name) as scope: self.beta = tf.get_variable("beta", [shape[-1]], initializer=tf.constant_initializer(0.)) self.gamma = tf.get_variable("gamma", [shape[-1]], initializer=tf.random_normal_initializer(1., 0.02)) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema_apply_op = self.ema.apply([batch_mean, batch_var]) self.ema_mean, self.ema_var = self.ema.average(batch_mean), self.ema.average(batch_var) with tf.control_dependencies([ema_apply_op]): mean, var = tf.identity(batch_mean), tf.identity(batch_var) else: mean, var = self.ema_mean, self.ema_var normed = tf.nn.batch_norm_with_global_normalization( x, mean, var, self.beta, self.gamma, self.epsilon, scale_after_normalization=True) return normed # standard convolution layer
def Minibatch_Discriminator(input, num_kernels=100, dim_per_kernel=5, init=False, name='MD'): num_inputs=df_dim*4 theta = tf.get_variable(name+"/theta",[num_inputs, num_kernels, dim_per_kernel], initializer=tf.random_normal_initializer(stddev=0.05)) log_weight_scale = tf.get_variable(name+"/lws",[num_kernels, dim_per_kernel], initializer=tf.constant_initializer(0.0)) W = tf.mul(theta, tf.expand_dims(tf.exp(log_weight_scale)/tf.sqrt(tf.reduce_sum(tf.square(theta),0)),0)) W = tf.reshape(W,[-1,num_kernels*dim_per_kernel]) x = input x=tf.reshape(x, [batchsize,num_inputs]) activation = tf.matmul(x, W) activation = tf.reshape(activation,[-1,num_kernels,dim_per_kernel]) abs_dif = tf.mul(tf.reduce_sum(tf.abs(tf.sub(tf.expand_dims(activation,3),tf.expand_dims(tf.transpose(activation,[1,2,0]),0))),2), 1-tf.expand_dims(tf.constant(np.eye(batchsize),dtype=np.float32),1)) f = tf.reduce_sum(tf.exp(-abs_dif),2)/tf.reduce_sum(tf.exp(-abs_dif)) print(f.get_shape()) print(input.get_shape()) return tf.concat(1,[x, f])
def instantiate_weights(self): """define all weights here""" with tf.variable_scope("gru_cell"): self.W_z = tf.get_variable("W_z", shape=[self.embed_size, self.hidden_size], initializer=self.initializer) self.U_z = tf.get_variable("U_z", shape=[self.embed_size, self.hidden_size], initializer=self.initializer) self.b_z = tf.get_variable("b_z", shape=[self.hidden_size]) # GRU parameters:reset gate related self.W_r = tf.get_variable("W_r", shape=[self.embed_size, self.hidden_size], initializer=self.initializer) self.U_r = tf.get_variable("U_r", shape=[self.embed_size, self.hidden_size], initializer=self.initializer) self.b_r = tf.get_variable("b_r", shape=[self.hidden_size]) self.W_h = tf.get_variable("W_h", shape=[self.embed_size, self.hidden_size], initializer=self.initializer) self.U_h = tf.get_variable("U_h", shape=[self.embed_size, self.hidden_size], initializer=self.initializer) self.b_h = tf.get_variable("b_h", shape=[self.hidden_size]) with tf.variable_scope("embedding_projection"): # embedding matrix self.Embedding = tf.get_variable("Embedding", shape=[self.vocab_size, self.embed_size],initializer=self.initializer) # test: learn to count. weight of query and story is different #two step to test #step1. run train function to train the model. it will save checkpoint #step2. run predict function to make a prediction based on the model restore from the checkpoint.
def sub_layer_multi_head_attention(self ,layer_index ,Q ,K_s,type,mask=None,is_training=None,dropout_keep_prob=None) :# COMMON FUNCTION """ multi head attention as sub layer :param layer_index: index of layer number :param Q: shape should be: [batch_size,sequence_length,embed_size] :param k_s: shape should be: [batch_size,sequence_length,embed_size] :param type: encoder,decoder or encoder_decoder_attention :param mask: when use mask,illegal connection will be mask as huge big negative value.so it's possiblitity will become zero. :return: output of multi head attention.shape:[batch_size,sequence_length,d_model] """ with tf.variable_scope("base_mode_sub_layer_multi_head_attention_" + type+str(layer_index)): # below is to handle attention for encoder and decoder with difference length: #length=self.decoder_sent_length if (type!='encoder' and self.sequence_length!=self.decoder_sent_length) else self.sequence_length #TODO this may be useful length=self.sequence_length #1. get V as learned parameters V_s = tf.get_variable("V_s", shape=(self.batch_size,length,self.d_model),initializer=self.initializer) #2. call function of multi head attention to get result multi_head_attention_class = MultiHeadAttention(Q, K_s, V_s, self.d_model, self.d_k, self.d_v, self.sequence_length, self.h,type=type,is_training=is_training,mask=mask,dropout_rate=(1.0-dropout_keep_prob)) sub_layer_multi_head_attention_output = multi_head_attention_class.multi_head_attention_fn() # [batch_size*sequence_length,d_model] return sub_layer_multi_head_attention_output # [batch_size,sequence_length,d_model]
def inference(self): """ building blocks: encoder:6 layers.each layers has two sub-layers. the first is multi-head self-attention mechanism; the second is position-wise fully connected feed-forward network. for each sublayer. use LayerNorm(x+Sublayer(x)). all dimension=512. decoder:6 layers.each layers has three sub-layers. the second layer is performs multi-head attention over the ouput of the encoder stack. for each sublayer. use LayerNorm(x+Sublayer(x)). """ # 1.embedding for encoder input & decoder input # 1.1 position embedding for encoder input input_x_embeded = tf.nn.embedding_lookup(self.Embedding,self.input_x) #[None,sequence_length, embed_size] input_x_embeded=tf.multiply(input_x_embeded,tf.sqrt(tf.cast(self.d_model,dtype=tf.float32))) input_mask=tf.get_variable("input_mask",[self.sequence_length,1],initializer=self.initializer) input_x_embeded=tf.add(input_x_embeded,input_mask) #[None,sequence_length,embed_size].position embedding. # 2. encoder encoder_class=Encoder(self.d_model,self.d_k,self.d_v,self.sequence_length,self.h,self.batch_size,self.num_layer,input_x_embeded,input_x_embeded,dropout_keep_prob=self.dropout_keep_prob,use_residual_conn=self.use_residual_conn) Q_encoded,K_encoded = encoder_class.encoder_fn() #K_v_encoder Q_encoded=tf.reshape(Q_encoded,shape=(self.batch_size,-1)) #[batch_size,sequence_length*d_model] with tf.variable_scope("output"): logits = tf.matmul(Q_encoded, self.W_projection) + self.b_projection #logits shape:[batch_size*decoder_sent_length,self.num_classes] print("logits:",logits) return logits
def init(): #1. assign value to fields vocab_size=1000 d_model = 512 d_k = 64 d_v = 64 sequence_length = 5*10 h = 8 batch_size=4*32 initializer = tf.random_normal_initializer(stddev=0.1) # 2.set values for Q,K,V vocab_size=1000 embed_size=d_model Embedding = tf.get_variable("Embedding_E", shape=[vocab_size, embed_size],initializer=initializer) input_x = tf.placeholder(tf.int32, [batch_size,sequence_length], name="input_x") #[4,10] print("input_x:",input_x) embedded_words = tf.nn.embedding_lookup(Embedding, input_x) #[batch_size*sequence_length,embed_size] Q = embedded_words # [batch_size*sequence_length,embed_size] K_s = embedded_words # [batch_size*sequence_length,embed_size] num_layer=6 mask = get_mask(batch_size, sequence_length) #3. get class object encoder_class=Encoder(d_model,d_k,d_v,sequence_length,h,batch_size,num_layer,Q,K_s,mask=mask) #Q,K_s,embedded_words return encoder_class,Q,K_s
def weightVariable(shape,std=1.0,name=None): # Create a set of weights initialized with truncated normal random values name = 'weights' if name is None else name return tf.get_variable(name,shape,initializer=tf.truncated_normal_initializer(stddev=std/math.sqrt(shape[0])))
def biasVariable(shape,bias=0.1,name=None): # create a set of bias nodes initialized with a constant 0.1 name = 'biases' if name is None else name return tf.get_variable(name,shape,initializer=tf.constant_initializer(bias))
def conv(inputs, kernel_shape, bias_shape, strides, w_i, b_i=None, activation=tf.nn.relu): # ??tf.layers # relu1 = tf.layers.conv2d(input_imgs, filters=24, kernel_size=[5, 5], strides=[2, 2], # padding='SAME', activation=tf.nn.relu, # kernel_initializer=w_i, bias_initializer=b_i) weights = tf.get_variable('weights', shape=kernel_shape, initializer=w_i) conv = tf.nn.conv2d(inputs, weights, strides=strides, padding='SAME') if bias_shape is not None: biases = tf.get_variable('biases', shape=bias_shape, initializer=b_i) return activation(conv + biases) if activation is not None else conv + biases return activation(conv) if activation is not None else conv # ???bias??????relu
def noisy_dense(inputs, units, bias_shape, c_names, w_i, b_i=None, activation=tf.nn.relu, noisy_distribution='factorised'): def f(e_list): return tf.multiply(tf.sign(e_list), tf.pow(tf.abs(e_list), 0.5)) # ??tf.layers?????flatten # dense1 = tf.layers.dense(tf.contrib.layers.flatten(relu5), activation=tf.nn.relu, units=50) if not isinstance(inputs, ops.Tensor): inputs = ops.convert_to_tensor(inputs, dtype='float') # dim_list = inputs.get_shape().as_list() # flatten_shape = dim_list[1] if len(dim_list) <= 2 else reduce(lambda x, y: x * y, dim_list[1:]) # reshaped = tf.reshape(inputs, [dim_list[0], flatten_shape]) if len(inputs.shape) > 2: inputs = tf.contrib.layers.flatten(inputs) flatten_shape = inputs.shape[1] weights = tf.get_variable('weights', shape=[flatten_shape, units], initializer=w_i) w_noise = tf.get_variable('w_noise', [flatten_shape, units], initializer=w_i, collections=c_names) if noisy_distribution == 'independent': weights += tf.multiply(tf.random_normal(shape=w_noise.shape), w_noise) elif noisy_distribution == 'factorised': noise_1 = f(tf.random_normal(tf.TensorShape([flatten_shape, 1]), dtype=tf.float32)) # ??????????????? noise_2 = f(tf.random_normal(tf.TensorShape([1, units]), dtype=tf.float32)) weights += tf.multiply(noise_1 * noise_2, w_noise) dense = tf.matmul(inputs, weights) if bias_shape is not None: assert bias_shape[0] == units biases = tf.get_variable('biases', shape=bias_shape, initializer=b_i) b_noise = tf.get_variable('b_noise', [1, units], initializer=b_i, collections=c_names) if noisy_distribution == 'independent': biases += tf.multiply(tf.random_normal(shape=b_noise.shape), b_noise) elif noisy_distribution == 'factorised': biases += tf.multiply(noise_2, b_noise) return activation(dense + biases) if activation is not None else dense + biases return activation(dense) if activation is not None else dense # ???bias??????relu
def make_skipgram_softmax_loss(embeddings_matrix, vocabulary_size, vector_size): vectors = tf.get_variable('vectors', (vocabulary_size, vector_size), dtype=tf.float32, initializer=tf.constant_initializer(embeddings_matrix)) minibatch = tf.placeholder(shape=(None, 2), dtype=tf.int32) center_word_vector = tf.nn.embedding_lookup(vectors, minibatch[:,0]) yhat = tf.matmul(center_word_vector, vectors, transpose_b=True) predict_word = minibatch[:,1] loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=predict_word, logits=yhat) loss = tf.reduce_mean(loss) return vectors, minibatch, loss
def encode(self, inputs, _input_length, _parses): with tf.variable_scope('BagOfWordsEncoder'): W = tf.get_variable('W', (self.embed_size, self.output_size)) b = tf.get_variable('b', shape=(self.output_size,), initializer=tf.constant_initializer(0, tf.float32)) enc_hidden_states = tf.tanh(tf.tensordot(inputs, W, [[2], [0]]) + b) enc_final_state = tf.reduce_sum(enc_hidden_states, axis=1) #assert enc_hidden_states.get_shape()[1:] == (self.config.max_length, self.config.hidden_size) if self._cell_type == 'lstm': enc_final_state = (tf.contrib.rnn.LSTMStateTuple(enc_final_state, enc_final_state),) enc_output = tf.nn.dropout(enc_hidden_states, keep_prob=self._dropout, seed=12345) return enc_output, enc_final_state
def add_input_op(self, xavier): with tf.variable_scope('embed'): # first the embed the input if self.config.train_input_embeddings: if self.config.input_embedding_matrix: initializer = tf.constant_initializer(self.config.input_embedding_matrix) else: initializer = xavier input_embed_matrix = tf.get_variable('input_embedding', shape=(self.config.dictionary_size, self.config.embed_size), initializer=initializer) else: input_embed_matrix = tf.constant(self.config.input_embedding_matrix) # dictionary size x embed_size assert input_embed_matrix.get_shape() == (self.config.dictionary_size, self.config.embed_size) # now embed the output if self.config.train_output_embeddings: output_embed_matrix = tf.get_variable('output_embedding', shape=(self.config.output_size, self.config.output_embed_size), initializer=xavier) else: output_embed_matrix = tf.constant(self.config.output_embedding_matrix) assert output_embed_matrix.get_shape() == (self.config.output_size, self.config.output_embed_size) inputs = tf.nn.embedding_lookup([input_embed_matrix], self.input_placeholder) # batch size x max length x embed_size assert inputs.get_shape()[1:] == (self.config.max_length, self.config.embed_size) return inputs, output_embed_matrix
def add_decoder_op(self, enc_final_state, enc_hidden_states, output_embed_matrix, training): cell_dec = tf.contrib.rnn.MultiRNNCell([self.make_rnn_cell(i, True) for i in range(self.config.rnn_layers)]) encoder_hidden_size = int(enc_hidden_states.get_shape()[-1]) decoder_hidden_size = int(cell_dec.output_size) # if encoder and decoder have different sizes, add a projection layer if encoder_hidden_size != decoder_hidden_size: assert False, (encoder_hidden_size, decoder_hidden_size) with tf.variable_scope('hidden_projection'): kernel = tf.get_variable('kernel', (encoder_hidden_size, decoder_hidden_size), dtype=tf.float32) # apply a relu to the projection for good measure enc_final_state = nest.map_structure(lambda x: tf.nn.relu(tf.matmul(x, kernel)), enc_final_state) enc_hidden_states = tf.nn.relu(tf.tensordot(enc_hidden_states, kernel, [[2], [1]])) else: # flatten and repack the state enc_final_state = nest.pack_sequence_as(cell_dec.state_size, nest.flatten(enc_final_state)) if self.config.connect_output_decoder: cell_dec = ParentFeedingCellWrapper(cell_dec, enc_final_state) else: cell_dec = InputIgnoringCellWrapper(cell_dec, enc_final_state) if self.config.apply_attention: attention = LuongAttention(self.config.decoder_hidden_size, enc_hidden_states, self.input_length_placeholder, probability_fn=tf.nn.softmax) cell_dec = AttentionWrapper(cell_dec, attention, cell_input_fn=lambda inputs, _: inputs, attention_layer_size=self.config.decoder_hidden_size, initial_cell_state=enc_final_state) enc_final_state = cell_dec.zero_state(self.batch_size, dtype=tf.float32) decoder = Seq2SeqDecoder(self.config, self.input_placeholder, self.input_length_placeholder, self.output_placeholder, self.output_length_placeholder, self.batch_number_placeholder) return decoder.decode(cell_dec, enc_final_state, self.config.grammar.output_size, output_embed_matrix, training)
def center_loss(features, label, alfa, nrof_classes): """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" (http://ydwen.github.io/papers/WenECCV16.pdf) """ nrof_features = features.get_shape()[1] centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) label = tf.reshape(label, [-1]) centers_batch = tf.gather(centers, label) diff = (1 - alfa) * (centers_batch - features) centers = tf.scatter_sub(centers, label, diff) loss = tf.reduce_mean(tf.square(features - centers_batch)) return loss, centers
def build_decoder(self): """Inference Network. p(X|h)""" with tf.variable_scope("decoder"): R = tf.get_variable("R", [self.reader.vocab_size, self.h_dim]) b = tf.get_variable("b", [self.reader.vocab_size]) x_i = tf.diag([1.]*self.reader.vocab_size) e = -tf.matmul(tf.matmul(self.h, R, transpose_b=True), x_i) + b self.p_x_i = tf.squeeze(tf.nn.softmax(e))
def build_generator(self): """Inference Network. p(X|h)""" with tf.variable_scope("generator"): self.R = tf.get_variable("R", [self.reader.vocab_size, self.h_dim]) self.b = tf.get_variable("b", [self.reader.vocab_size]) self.e = -tf.matmul(self.h, self.R, transpose_b=True) + self.b self.p_x_i = tf.squeeze(tf.nn.softmax(self.e))
def create_model(self, model_input, vocab_size, num_frames, **unused_params): shape = model_input.get_shape().as_list() frames_sum = tf.reduce_sum(tf.abs(model_input),axis=2) frames_true = tf.ones(tf.shape(frames_sum)) frames_false = tf.zeros(tf.shape(frames_sum)) frames_bool = tf.reshape(tf.where(tf.greater(frames_sum, frames_false), frames_true, frames_false),[-1,shape[1],1]) activation_1 = tf.reduce_max(model_input, axis=1) activation_2 = tf.reduce_sum(model_input*frames_bool, axis=1)/(tf.reduce_sum(frames_bool, axis=1)+1e-6) activation_3 = tf.reduce_min(model_input, axis=1) model_input_1, final_probilities_1 = self.sub_moe(activation_1,vocab_size,scopename="_max") model_input_2, final_probilities_2 = self.sub_moe(activation_2,vocab_size,scopename="_mean") model_input_3, final_probilities_3 = self.sub_moe(activation_3,vocab_size,scopename="_min") final_probilities = tf.stack((final_probilities_1,final_probilities_2,final_probilities_3),axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[shape[2], 3, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) activations = tf.stack((model_input_1, model_input_2, model_input_3), axis=2) weight = tf.nn.softmax(tf.einsum("aij,ijk->ajk", activations, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size]) result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1) return result
def __call__(self, x, train=True): shape = x.get_shape().as_list() with tf.variable_scope(self.name) as scope: self.beta = tf.get_variable("beta", shape[1:], initializer=tf.constant_initializer(0.)) self.gamma = tf.get_variable("gamma", shape[1:], initializer=tf.random_normal_initializer(1.,0.02)) self.mean = tf.get_variable("mean", shape[1:], initializer=tf.constant_initializer(0.),trainable=False) self.variance = tf.get_variable("variance",shape[1:], initializer=tf.constant_initializer(1.),trainable=False) if train: batch_mean, batch_var = tf.nn.moments(x, [0], name='moments') self.mean.assign(batch_mean) self.variance.assign(batch_var) ema_apply_op = self.ema.apply([self.mean, self.variance]) with tf.control_dependencies([ema_apply_op]): mean, var = tf.identity(batch_mean), tf.identity(batch_var) else: mean, var = self.ema.average(self.mean), self.ema.average(self.variance) normed = tf.nn.batch_normalization(x, mean, var, self.beta, self.gamma, self.epsilon) return normed
def cnn(self, model_input, l2_penalty=1e-8, num_filters = [1024, 1024, 1024], filter_sizes = [1,2,3], sub_scope="", **unused_params): max_frames = model_input.get_shape().as_list()[1] num_features = model_input.get_shape().as_list()[2] shift_inputs = [] for i in range(max(filter_sizes)): if i == 0: shift_inputs.append(model_input) else: shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:]) cnn_outputs = [] for nf, fs in zip(num_filters, filter_sizes): sub_input = tf.concat(shift_inputs[:fs], axis=2) sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs, shape=[num_features*fs, nf], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter)) cnn_output = tf.concat(cnn_outputs, axis=2) cnn_output = slim.batch_norm( cnn_output, center=True, scale=True, is_training=FLAGS.train, scope=sub_scope+"cluster_bn") return cnn_output, max_frames
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = num_extend lstm_size = FLAGS.lstm_cells pool_size=2 cnn_input = model_input num_filters=[256,256,512] filter_sizes=[1,2,3] features_size = sum(num_filters) final_probilities = [] moe_inputs = [] for layer in range(num_layers): cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) cnn_output = tf.nn.relu(cnn_output) cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1)) moe_inputs.append(cnn_multiscale) final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1)) final_probilities.append(final_probility) num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) num_frames = tf.maximum(num_frames//pool_size,1) final_probilities = tf.stack(final_probilities,axis=1) moe_inputs = tf.stack(moe_inputs,axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, features_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size]) result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1) return result