我们从Python开源项目中,提取了以下41个代码示例,用于说明如何使用tensorflow.contrib.layers.xavier_initializer()。
def get_weight_variable(shape, name=None, type='xavier_uniform', regularize=True, **kwargs): initialise_from_constant = False if type == 'xavier_uniform': initial = xavier_initializer(uniform=True, dtype=tf.float32) elif type == 'xavier_normal': initial = xavier_initializer(uniform=False, dtype=tf.float32) elif type == 'he_normal': initial = variance_scaling_initializer(uniform=False, factor=2.0, mode='FAN_IN', dtype=tf.float32) elif type == 'he_uniform': initial = variance_scaling_initializer(uniform=True, factor=2.0, mode='FAN_IN', dtype=tf.float32) elif type == 'caffe_uniform': initial = variance_scaling_initializer(uniform=True, factor=1.0, mode='FAN_IN', dtype=tf.float32) elif type == 'simple': stddev = kwargs.get('stddev', 0.02) initial = tf.truncated_normal(shape, stddev=stddev, dtype=tf.float32) initialise_from_constant = True elif type == 'bilinear': weights = _bilinear_upsample_weights(shape) initial = tf.constant(weights, shape=shape, dtype=tf.float32) initialise_from_constant = True else: raise ValueError('Unknown initialisation requested: %s' % type) if name is None: # This keeps to option open to use unnamed Variables weight = tf.Variable(initial) else: if initialise_from_constant: weight = tf.get_variable(name, initializer=initial) else: weight = tf.get_variable(name, shape=shape, initializer=initial) if regularize: tf.add_to_collection('weight_variables', weight) return weight
def cnn_network(self, units, n_layers, filter_width): """Assemble Convolutional neural network Args: units: input units to be convolved with kernels n_layers: number of layers filter_width: width of the filter (kernel) Returns: units: output units of the CNN auxiliary_outputs: auxiliary outputs from every layer """ n_filters = units.get_shape().as_list()[-1] auxiliary_outputs = [] for n_layer in range(n_layers): units = tf.layers.conv1d(units, n_filters, filter_width, padding='same', name='Layer_' + str(n_layer), activation=None, kernel_initializer=xavier_initializer()) auxiliary_outputs.append(units) units = tf.nn.relu(units) return units, auxiliary_outputs
def make_fc_layer( self, inp_lyr, name_fc_lyr, name_w, shp_w, name_b=None, shp_b=None, initializer=xavier_init(uniform=False) ): """ TODO - regularize batch norm params? """ W = self.make_wbkernels(name_w, shp_w, initializer=initializer) b = self.make_wbkernels( name_b, shp_b, initializer=tf.zeros_initializer() ) fc_lyr = tf.nn.bias_add( tf.matmul(inp_lyr, W, name=name_fc_lyr+'_matmul'), b, data_format=self.data_format, name=name_fc_lyr, ) if self.use_batch_norm: fc_lyr = tf.contrib.layers.batch_norm( fc_lyr, decay=self.batch_norm_decay, center=True, scale=True, data_format=self.data_format, is_training=self.is_training ) return fc_lyr
def _build_net(self, input_BO, scope): """ The Actor network. Uses ReLUs for all hidden layers, but a tanh to the output to bound the action. This follows their 'low-dimensional networks' using 400 and 300 units for the hidden layers. Set `reuse=False`. I don't use batch normalization or their precise weight initialization. """ with tf.variable_scope(scope, reuse=False): hidden1 = layers.fully_connected(input_BO, num_outputs=400, weights_initializer=layers.xavier_initializer(), activation_fn=tf.nn.relu) hidden2 = layers.fully_connected(hidden1, num_outputs=300, weights_initializer=layers.xavier_initializer(), activation_fn=tf.nn.relu) actions_BA = layers.fully_connected(hidden2, num_outputs=self.ac_dim, weights_initializer=layers.xavier_initializer(), activation_fn=tf.nn.tanh) # Note the tanh! # This should broadcast, but haven't tested with ac_dim > 1. actions_BA = tf.multiply(actions_BA, self.ac_high) return actions_BA
def add_layer(inputs, in_size, out_size, n_layer, activation_function=None): # add one more layer and return the output of this layer layer_name = 'layer%s' % n_layer with tf.variable_scope(layer_name): with tf.variable_scope('weights'): Weights = tf.get_variable(shape=[in_size, out_size], name='W', initializer=xavier_initializer()) tf.histogram_summary(layer_name + '/weights', Weights) with tf.variable_scope('biases'): biases = tf.get_variable(shape=[1, out_size], name='b', initializer=xavier_initializer()) tf.histogram_summary(layer_name + '/biases', biases) with tf.variable_scope('Wx_plus_b'): Wx_plus_b = tf.add(tf.matmul(inputs, Weights), biases) if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b, ) tf.histogram_summary(layer_name + '/outputs', outputs) return outputs # Make up some real data
def build_fully_connected_layers_with_batch_norm(the_input, shape, mode, num_previous_fully_connected_layers=0, activation_summaries=[]): """ a function to build the fully connected layers with batch normalization onto the computational graph from given specifications. shape of the format: [num_neurons_layer_1,num_neurons_layer_2,...,num_neurons_layer_n] """ for index, size in enumerate(shape): with tf.variable_scope("FC_" + str(num_previous_fully_connected_layers + index + 1)): temp_pre_activation = tf.layers.dense( inputs=the_input, units=size, use_bias=False, kernel_initializer=layers.xavier_initializer(), name="layer") temp_batch_normalized = tf.layers.batch_normalization(temp_pre_activation, training=(mode == tf.estimator.ModeKeys.TRAIN), fused=True) temp_layer_output = tf.nn.relu(temp_batch_normalized) the_input = temp_layer_output activation_summaries.append(layers.summarize_activation(temp_layer_output)) return the_input, activation_summaries
def generator(z): # because up to now we can not derive bias_add's higher order derivative in tensorflow, # so I use vanilla implementation of FC instead of a FC layer in tensorflow.contrib.layers # the following conv case is out of the same reason weights = slim.model_variable( 'fn_weights', shape=(FLAGS.z_dim, 4 * 4 * 512), initializer=ly.xavier_initializer()) bias = slim.model_variable( 'fn_bias', shape=(4 * 4 * 512, ), initializer=tf.zeros_initializer) train = tf.nn.relu(ly.batch_norm(fully_connected(z, weights, bias))) train = tf.reshape(train, (-1, 4, 4, 512)) train = ly.conv2d_transpose(train, 256, 3, stride=2, activation_fn=tf.nn.relu, normalizer_fn=ly.batch_norm, padding='SAME') train = ly.conv2d_transpose(train, 128, 3, stride=2, activation_fn=tf.nn.relu, normalizer_fn=ly.batch_norm, padding='SAME') train = ly.conv2d_transpose(train, 64, 3, stride=2, activation_fn=tf.nn.relu, normalizer_fn=ly.batch_norm, padding='SAME') train = ly.conv2d_transpose(train, 1, 3, stride=1, activation_fn=None, padding='SAME', biases_initializer=None) bias = slim.model_variable('bias', shape=( 1, ), initializer=tf.zeros_initializer) train += bias train = tf.nn.tanh(train) return train
def generator(z): weights = slim.model_variable( 'fn_weights', shape=(FLAGS.z_dim, 4 * 4 * 512), initializer=ly.xavier_initializer()) bias = slim.model_variable( 'fn_bias', shape=(4 * 4 * 512, ), initializer=tf.zeros_initializer) train = tf.nn.relu(fully_connected(z, weights, bias)) train = tf.reshape(train, (-1, 4, 4, 512)) train = ly.conv2d_transpose(train, 256, 3, stride=2, activation_fn=tf.nn.relu, normalizer_fn=ly.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) train = ly.conv2d_transpose(train, 128, 3, stride=2, activation_fn=tf.nn.relu, normalizer_fn=ly.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) train = ly.conv2d_transpose(train, 64, 3, stride=2, activation_fn=tf.nn.relu, normalizer_fn=ly.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) train = ly.conv2d_transpose(train, 1, 3, stride=1, activation_fn=None, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02), biases_initializer=None) bias = slim.model_variable('bias', shape=( 1, ), initializer=tf.zeros_initializer) train += bias train = tf.nn.tanh(train) return train
def __conv(self, input, kernel, strides=[1, 1, 1, 1], nonlinearity=True, batch_norm=True, name="conv"): with tf.variable_scope(name) as scope: kernel = tf.get_variable('weights', shape=kernel, initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(input, kernel, strides, padding='SAME') if batch_norm: conv = self.__batch_norm_wrapper(conv) if nonlinearity: conv = tf.nn.elu(conv, name=scope.name) return conv
def __init__(self, num_class = 101, keep_prob = 0.6, batch_size = 3, epoch=40, lr = 1e-4): self.IMG_WIDTH = 171 self.IMG_HEIGHT = 128 self.CROP_WIDTH = 112 self.CROP_HEIGHT = 112 self.graph = tf.Graph() self.num_class = num_class self.epoch = epoch self.CLIP_LENGTH = 16 self.keep_prob = keep_prob self.batch_size = batch_size decay_epoch=10 #?5?epoch??????? # train clip: 9537*5 CLIP=5 # test clip: 3783*5 CLIP=5 # train clip: 9537*3 CLIP=3 # test clip: 3783*3 CLIP=3 self.n_step_epoch=int( 9537/batch_size) with self.graph.as_default(): self.inputs = tf.placeholder(tf.float32, [None, self.CLIP_LENGTH, self.CROP_HEIGHT, self.CROP_WIDTH, 3]) self.labels = tf.placeholder(tf.int64, [batch_size,]) self.initializer = layers.xavier_initializer() self.global_step = tf.Variable(0, trainable = False, name = "global_step") self.lr = tf.train.exponential_decay(lr, self.global_step, int(decay_epoch*self.n_step_epoch), 1e-1, True) tf.add_to_collection(tf.GraphKeys.GLOBAL_STEP, self.global_step)
def make_wbkernels( self, name, shp=None, initializer=xavier_init(uniform=False), ): """ make weights, biases, kernels """ return tf.get_variable( name, shp, initializer=initializer, regularizer=self.reg )
def make_active_fc_layer( self, inp_lyr, name_fc_lyr, name_w, shp_w, name_b=None, shp_b=None, act=tf.nn.relu, initializer=xavier_init(uniform=False) ): return act(self.make_fc_layer( inp_lyr, name_fc_lyr, name_w, shp_w, name_b, shp_b, initializer=initializer ), name=name_fc_lyr+'_act')
def _build_net(self, input_BO, acts_BO, scope): """ The critic network. Use ReLUs for all hidden layers. The output consists of one Q-value for each batch. Set `reuse=False`. I don't use batch normalization or their precise weight initialization. Unlike the critic, it uses actions here but they are NOT included in the first hidden layer. In addition, we do a tf.reshape to get an output of shape (B,), not (B,1). Seems like tf.squeeze doesn't work with `?`. """ with tf.variable_scope(scope, reuse=False): hidden1 = layers.fully_connected(input_BO, num_outputs=400, weights_initializer=layers.xavier_initializer(), activation_fn=tf.nn.relu) # Insert the concatenation here. This should be fine, I think. state_action = tf.concat(axis=1, values=[hidden1, acts_BO]) hidden2 = layers.fully_connected(state_action, num_outputs=300, weights_initializer=layers.xavier_initializer(), activation_fn=tf.nn.relu) qvals_B = layers.fully_connected(hidden2, num_outputs=1, weights_initializer=layers.xavier_initializer(), activation_fn=None) return tf.reshape(qvals_B, shape=[-1])
def __init__(self, session, ob_dim=None, n_epochs=20, stepsize=1e-3): """ The network gets constructed upon initialization so future calls to self.fit will remember this. Right now we assume a preprocessing which results ob_dim*2+1 dimensions, and we assume a fixed neural network architecture (input-50-50-1, fully connected with tanh nonlineariites), which we should probably change. The number of outputs is one, so that ypreds_n is the predicted vector of state values, to be compared against ytargs_n. Since ytargs_n is of shape (n,), we need to apply a "squeeze" on the final predictions, which would otherwise be of shape (n,1). Bleh. """ # Value function V(s_t) (or b(s_t)), parameterized as a neural network. self.ob_no = tf.placeholder(shape=[None, ob_dim*2+1], name="nnvf_ob", dtype=tf.float32) self.h1 = layers.fully_connected(self.ob_no, num_outputs=50, weights_initializer=layers.xavier_initializer(uniform=True), activation_fn=tf.nn.tanh) self.h2 = layers.fully_connected(self.h1, num_outputs=50, weights_initializer=layers.xavier_initializer(uniform=True), activation_fn=tf.nn.tanh) self.ypreds_n = layers.fully_connected(self.h2, num_outputs=1, weights_initializer=layers.xavier_initializer(uniform=True), activation_fn=None) self.ypreds_n = tf.reshape(self.ypreds_n, [-1]) # (?,1) --> (?,). =) # Form the loss function, which is the simple (mean) L2 error. self.n_epochs = n_epochs self.lrate = stepsize self.ytargs_n = tf.placeholder(shape=[None], name="nnvf_y", dtype=tf.float32) self.l2_error = tf.reduce_mean(tf.square(self.ypreds_n - self.ytargs_n)) self.fit_op = tf.train.AdamOptimizer(self.lrate).minimize(self.l2_error) self.sess = session
def policy_model(data_in, action_dim): """ Create a neural network representing the BC policy. It will be trained using standard supervised learning techniques. Parameters ---------- data_in: [Tensor] The input (a placeholder) to the network, with leading dimension representing the batch size. action_dim: [int] Number of actions, each of which (at least for MuJoCo) is continuous-valued. Returns ------- out [Tensor] The output tensor which represents the predicted (or desired, if testing) action to take for the agent. """ with tf.variable_scope("BCNetwork", reuse=False): out = data_in out = layers.fully_connected(out, num_outputs=100, weights_initializer=layers.xavier_initializer(uniform=True), activation_fn=tf.nn.tanh) out = layers.fully_connected(out, num_outputs=100, weights_initializer=layers.xavier_initializer(uniform=True), activation_fn=tf.nn.tanh) out = layers.fully_connected(out, num_outputs=action_dim, weights_initializer=layers.xavier_initializer(uniform=True), activation_fn=None) return out
def fully_connected(bottom, n_out, name, reuse=DO_SHARE): shape = bottom.get_shape().as_list() with tf.variable_scope(name, reuse = reuse): # need to flattent he final result, find the dimension that is to be flattened dim = 1 for d in shape[1:]: dim *= d # print(dim) x = tf.reshape(bottom, [-1, dim]) weights = tf.get_variable('weights', [dim, n_out], tf.float32, xavier_initializer()) biases = tf.get_variable('bias', [n_out], tf.float32, tf.constant_initializer(0.0)) logits = tf.nn.bias_add(tf.matmul(x, weights), biases) return tf.nn.relu(logits)
def deconv_layer(bottom, shape, output_shape, name, reuse = DO_SHARE): #doubtful about this with tf.variable_scope(name, reuse = reuse): # shape will be in the following form: [height, width, output_channels, input_channels] weights = tf.get_variable('weights', shape, tf.float32, xavier_initializer()) biases = tf.get_variable('bias', shape[-2], tf.float32, tf.constant_initializer(0.0)) dconv = tf.nn.conv2d_transpose(bottom, weights, output_shape = output_shape, strides = [1, 1, 1, 1], padding='VALID') activation = tf.nn.relu(tf.nn.bias_add(dconv, biases)) # print(activation.get_shape()) return activation
def conv_layer(bottom, shape, name, reuse = DO_SHARE): with tf.variable_scope(name, reuse = reuse): # print 'hi'+name,reuse weights = tf.get_variable('weights', shape, tf.float32, xavier_initializer()) biases = tf.get_variable('bias', shape[-1], tf.float32, tf.constant_initializer(0.0)) conv = tf.nn.conv2d(bottom, weights, [1, 1, 1, 1], padding = 'SAME') activation = tf.nn.relu(tf.nn.bias_add(conv, biases)) return activation
def _init_embedding(self, scope): with tf.variable_scope(scope): with tf.variable_scope("embedding") as scope: self.embedding_matrix = tf.get_variable( name="embedding_matrix", shape=[self.vocab_size, self.embedding_size], initializer=layers.xavier_initializer(), dtype=tf.float32) self.inputs_embedded = tf.nn.embedding_lookup( self.embedding_matrix, self.inputs)
def task_specific_attention(inputs, output_size, initializer=layers.xavier_initializer(), activation_fn=tf.tanh, scope=None): """ Performs task-specific attention reduction, using learned attention context vector (constant within task of interest). Args: inputs: Tensor of shape [batch_size, units, input_size] `input_size` must be static (known) `units` axis will be attended over (reduced from output) `batch_size` will be preserved output_size: Size of output's inner (feature) dimension Returns: outputs: Tensor of shape [batch_size, output_dim]. """ assert len(inputs.get_shape()) == 3 and inputs.get_shape()[-1].value is not None with tf.variable_scope(scope or 'attention') as scope: attention_context_vector = tf.get_variable(name='attention_context_vector', shape=[output_size], initializer=initializer, dtype=tf.float32) input_projection = layers.fully_connected(inputs, output_size, activation_fn=activation_fn, scope=scope) vector_attn = tf.reduce_sum(tf.multiply(input_projection, attention_context_vector), axis=2, keep_dims=True) attention_weights = tf.nn.softmax(vector_attn, dim=1) weighted_projection = tf.multiply(input_projection, attention_weights) outputs = tf.reduce_sum(weighted_projection, axis=1) return outputs
def test_ddpg(): import gym_mix env = gym.make('ContinuousCopyRand-v0') env = wrappers.TimeLimit(env, max_episode_steps=0) @model(optimizer=tf.train.AdamOptimizer(0.0001), tracker=tf.train.ExponentialMovingAverage(1 - 0.001)) def actor(x): x = layers.fully_connected(x, 50, biases_initializer=layers.xavier_initializer()) a = layers.fully_connected(x, env.action_space.shape[0], None, weights_initializer=tf.random_normal_initializer(0, 1e-4)) return a @model(optimizer=tf.train.AdamOptimizer(.001), tracker=tf.train.ExponentialMovingAverage(1 - 0.001)) def critic(x, a): x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer()) x = tf.concat([x, a], axis=1) x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer()) x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer()) q = layers.fully_connected(x, 1, None, weights_initializer=tf.random_normal_initializer(0, 1e-4)) return tf.squeeze(q, 1) agent = DdpgAgent(env, actor, critic) for ep in range(10000): R, _ = agent.play_episode() if ep % 100 == 0: print(f'Return after episode {ep} is {R}')
def get_conv_var(self, f_size, in_c, out_c, name): if name in self.params.keys(): w_initializer = tf.constant_initializer(self.params[name][0].transpose((2, 3, 1, 0))) b_initializer = tf.constant_initializer(self.params[name][1]) else: b_initializer = w_initializer = xavier_initializer() f = tf.get_variable(name+'_f', [f_size, f_size, in_c, out_c], initializer=w_initializer, regularizer=l2_regularizer(self.l2_beta)) b = tf.get_variable(name+'_b', [out_c], initializer=b_initializer) return f, b
def get_fc_var(self, in_size, out_size, name): if name in self.params.keys(): w_initializer = tf.constant_initializer(self.params[name][0].transpose((1, 0))) b_initializer = tf.constant_initializer(self.params[name][1]) else: b_initializer = w_initializer = xavier_initializer() w = tf.get_variable(name+'_w', [in_size, out_size], initializer=w_initializer, regularizer=l2_regularizer(self.l2_beta)) b = tf.get_variable(name+'_b', [out_size], initializer=b_initializer) return w, b
def add_logits_op(self): with tf.variable_scope('lstm'): W_i = tf.get_variable('W_i', [self.input_size, self.num_hidden], initializer=xav()) b_i = tf.get_variable('b_i', [self.num_hidden], initializer=tf.constant_initializer(0.)) reshaped_features = tf.transpose(self.input_features, [1, 0, 2]) print('reshaped_features: ', reshaped_features.shape) reshaped_features = tf.reshape(reshaped_features, [-1, self.input_size]) proj_input_features = tf.matmul(reshaped_features, W_i) + b_i proj_input_features = tf.split(proj_input_features, 10, 0) # define lstm cell lstm_fw = tf.contrib.rnn.LSTMCell(self.num_hidden, state_is_tuple=True) outputs, final_state = tf.contrib.rnn.static_rnn(lstm_fw, inputs=proj_input_features, dtype=tf.float32) outputs = tf.transpose(outputs, [1, 0, 2]) outputs = tf.reshape(outputs, [-1, self.num_hidden]) with tf.variable_scope('output_projection'): W_o = tf.get_variable('Wo', [self.num_hidden, self.num_classes], initializer=xav()) b_o = tf.get_variable('bo', [self.num_classes], initializer=tf.constant_initializer(0.)) self.logits = tf.matmul(outputs, W_o) + b_o self.logits = tf.expand_dims(self.logits, 0)
def task_specific_attention(inputs, output_size, initializer=layers.xavier_initializer(), activation_fn=tf.tanh, scope=None): """ Performs task-specific attention reduction, using learned attention context vector (constant within task of interest). Args: inputs: Tensor of shape [batch_size, units, input_size] `input_size` must be static (known) `units` axis will be attended over (reduced from output) `batch_size` will be preserved output_size: Size of output's inner (feature) dimension Returns: outputs: Tensor of shape [batch_size, output_dim]. """ assert len(inputs.get_shape()) == 3 and inputs.get_shape( )[-1].value is not None with tf.variable_scope(scope or 'attention') as scope: attention_context_vector = tf.get_variable(name='attention_context_vector', shape=[output_size], initializer=initializer, dtype=tf.float32) input_projection = layers.fully_connected(inputs, output_size, activation_fn=activation_fn, scope=scope) attention_weights = tf.nn.softmax( tf.multiply(input_projection, attention_context_vector) ) weighted_projection = tf.multiply(input_projection, attention_weights) outputs = tf.reduce_sum(weighted_projection, axis=1) return outputs
def gloret(name, shape): return tf.get_variable(name, shape=shape, initializer=xavier_initializer())
def build_inception_module_with_batch_norm(the_input, module, mode, activation_summaries=[], num_previously_built_inception_modules=0, padding='same', force_no_concat=False): """ NOTE: 1) This comment no longer fully describes the functionality of the function. It will be updated in the near future when I have a bit more time to focus on this type of stuff. Builds an inception module based on the design given to the function. It returns the final layer in the module, and the activation summaries generated for the layers within the inception module. The layers will be named "module_N_path_M/layer_P", where N is the inception module number, M is what path number it is on, and P is what number layer it is in that path. Module of the format: [[[filters1_1,kernal_size1_1],... , [filters1_M,kernal_size1_M]],... , [filtersN_1,kernal_sizeN_1],... , [filtersN_P,kernal_sizeN_P]] """ path_outputs = [None for _ in range(len(module))] to_summarize = [] cur_input = None for j, path in enumerate(module): with tf.variable_scope("inception_module_" + str(num_previously_built_inception_modules + 1) + "_path_" + str(j + 1)): for i, section in enumerate(path): if i == 0: if j != 0: path_outputs[j - 1] = cur_input cur_input = the_input kernel_size = [section[1], section[1]] if len(section)==2 else [section[1], section[2]] cur_conv_output = tf.layers.conv2d( inputs=cur_input, filters=section[0], kernel_size=kernel_size, padding=padding, use_bias=False, kernel_initializer=layers.xavier_initializer(), name="layer_" + str(i + 1)) cur_batch_normalized = tf.layers.batch_normalization(cur_conv_output, training=(mode == tf.estimator.ModeKeys.TRAIN), fused=True) cur_input = tf.nn.relu(cur_batch_normalized) to_summarize.append(cur_input) path_outputs[-1] = cur_input activation_summaries = activation_summaries + [layers.summarize_activation(layer) for layer in to_summarize] with tf.variable_scope("inception_module_" + str(num_previously_built_inception_modules + 1)): for j in range(1, len(path_outputs)): if force_no_concat or path_outputs[0].get_shape().as_list()[1:3] != path_outputs[j].get_shape().as_list()[1:3]: return [temp_input for temp_input in path_outputs], activation_summaries return tf.concat([temp_input for temp_input in path_outputs], 3), activation_summaries
def create_weight_var(shape, initializer=xavier_initializer(seed=1)): """Create TF Variable for weights :param shape: shape of the variable :param initializer: (optional) by default, xavier initializer :return: the TF trainable variable """ return tf.get_variable(name='W', shape=shape, initializer=initializer)
def discriminator(img, name, target): size = 64 with tf.variable_scope(name): # img = ly.conv2d(img, num_outputs=size, kernel_size=3, # stride=2, activation_fn=None, biases_initializer=None) # bias = slim.model_variable('conv_bias', shape=( # size, ), initializer=tf.zeros_initializer) # img += bias # img = lrelu(img) img = ly.conv2d(img, num_outputs=size, kernel_size=3, stride=2, activation_fn=lrelu, normalizer_fn=ly.batch_norm) img = ly.conv2d(img, num_outputs=size * 2, kernel_size=3, stride=2, activation_fn=lrelu, normalizer_fn=ly.batch_norm) img = ly.conv2d(img, num_outputs=size * 4, kernel_size=3, stride=2, activation_fn=lrelu, normalizer_fn=ly.batch_norm) img = tf.reshape(img, (2 * batch_size, -1)) weights = slim.model_variable('weights', shape=[img.get_shape().as_list()[-1], 1], initializer=ly.xavier_initializer()) bias = slim.model_variable('bias', shape=( 1,), initializer=tf.zeros_initializer) logit = fully_connected(img, weights, bias) fake_logit = logit[:FLAGS.batch_size] true_logit = logit[FLAGS.batch_size:] d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( fake_logit, tf.zeros_like(fake_logit))) d_loss_true = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( true_logit, tf.ones_like(true_logit))) f = tf.reduce_mean(d_loss_fake + d_loss_true) return f, logit, d_loss_true, d_loss_fake
def __init__(self, sess, ob_dim, ac_dim): super().__init__(sess, ob_dim, ac_dim) # Placeholders for our inputs. self.ob_no = tf.placeholder(shape=[None, ob_dim], name="obs", dtype=tf.float32) self.ac_n = tf.placeholder(shape=[None], name="act", dtype=tf.int32) self.adv_n = tf.placeholder(shape=[None], name="adv", dtype=tf.float32) self.oldlogits_na = tf.placeholder(shape=[None, ac_dim], name='oldlogits', dtype=tf.float32) # Form the policy network and the log probabilities. self.hidden1 = layers.fully_connected(self.ob_no, num_outputs=50, weights_initializer=layers.xavier_initializer(uniform=True), activation_fn=tf.nn.tanh) self.logits_na = layers.fully_connected(self.hidden1, num_outputs=ac_dim, weights_initializer=layers.xavier_initializer(uniform=True), activation_fn=None) self.logp_na = tf.nn.log_softmax(self.logits_na) # Log probabilities of the actions in the minibatch, plus sampled action. self.nbatch = tf.shape(self.ob_no)[0] self.logprob_n = utils.fancy_slice_2d(self.logp_na, tf.range(self.nbatch), self.ac_n) self.sampled_ac = utils.categorical_sample_logits(self.logits_na)[0] # Policy gradients loss function and training step. self.surr_loss = - tf.reduce_mean(self.logprob_n * self.adv_n) self.stepsize = tf.placeholder(shape=[], dtype=tf.float32) self.update_op = tf.train.AdamOptimizer(self.stepsize).minimize(self.surr_loss) # For KL divergence and entropy diagnostic purposes. These are computed # as averages across individual KL/entropy w.r.t each minibatch state. self.oldlogp_na = tf.nn.log_softmax(self.oldlogits_na) self.oldp_na = tf.exp(self.oldlogp_na) self.p_na = tf.exp(self.logp_na) self.kl_n = tf.reduce_sum(self.oldp_na * (self.oldlogp_na - self.logp_na), axis=1) # I'm not sure why the KL divergence can be slightly negative. Each row # corresponds to a valid distribution. Must be numerical instability? self.assert_op = tf.Assert(tf.reduce_all(self.kl_n >= -1e-4), [self.kl_n]) with tf.control_dependencies([self.assert_op]): self.kl_n = tf.identity(self.kl_n) self.kl = tf.reduce_mean(self.kl_n) self.ent = tf.reduce_mean(tf.reduce_sum( -self.p_na * self.logp_na, axis=1))
def __init__(self, sess, ob_dim, ac_dim): super().__init__(sess, ob_dim, ac_dim) # Placeholders for our inputs. Note that actions are floats. self.ob_no = tf.placeholder(shape=[None, ob_dim], name="obs", dtype=tf.float32) self.ac_na = tf.placeholder(shape=[None, ac_dim], name="act", dtype=tf.float32) self.adv_n = tf.placeholder(shape=[None], name="adv", dtype=tf.float32) self.n = tf.shape(self.ob_no)[0] # Special to the continuous case, the log std vector, it's a parameter. # Also, make batch versions so we get shape (n,a) (or (1,a)), not (a,). self.logstd_a = tf.get_variable("logstd", [ac_dim], initializer=tf.zeros_initializer()) self.oldlogstd_a = tf.placeholder(name="oldlogstd", shape=[ac_dim], dtype=tf.float32) self.logstd_na = tf.ones(shape=(self.n,ac_dim), dtype=tf.float32) * self.logstd_a self.oldlogstd_na = tf.ones(shape=(self.n,ac_dim), dtype=tf.float32) * self.oldlogstd_a # The policy network and the logits, which are the mean of a Gaussian. # Then don't forget to make an "old" version of that for KL divergences. self.hidden1 = layers.fully_connected(self.ob_no, num_outputs=32, weights_initializer=layers.xavier_initializer(uniform=True), activation_fn=tf.nn.relu) self.hidden2 = layers.fully_connected(self.hidden1, num_outputs=32, weights_initializer=layers.xavier_initializer(uniform=True), activation_fn=tf.nn.relu) self.mean_na = layers.fully_connected(self.hidden2, num_outputs=ac_dim, weights_initializer=layers.xavier_initializer(uniform=True), activation_fn=None) self.oldmean_na = tf.placeholder(shape=[None, ac_dim], name='oldmean', dtype=tf.float32) # Diagonal Gaussian distribution for sampling actions and log probabilities. self.logprob_n = utils.gauss_log_prob(mu=self.mean_na, logstd=self.logstd_na, x=self.ac_na) self.sampled_ac = (tf.random_normal(tf.shape(self.mean_na)) * tf.exp(self.logstd_na) + self.mean_na)[0] # Loss function that we'll differentiate to get the policy gradient self.surr_loss = - tf.reduce_mean(self.logprob_n * self.adv_n) self.stepsize = tf.placeholder(shape=[], dtype=tf.float32) self.update_op = tf.train.AdamOptimizer(self.stepsize).minimize(self.surr_loss) # KL divergence and entropy among Gaussian(s). self.kl = tf.reduce_mean(utils.gauss_KL(self.mean_na, self.logstd_na, self.oldmean_na, self.oldlogstd_na)) self.ent = 0.5 * ac_dim * tf.log(2.*np.pi*np.e) + 0.5 * tf.reduce_sum(self.logstd_a)
def model(self, input_doc, input_words, output_label, batch_size, vocabulary_size=VOCABULARY_SIZE, embedding_size=EMBEDDINGS_SIZE, context_size=D2V_CONTEXT_SIZE, num_negative_samples=D2V_NEGATIVE_NUM_SAMPLES, learning_rate_initial=D2V_LEARNING_RATE_INITIAL, learning_rate_decay=D2V_LEARNING_RATE_DECAY, learning_rate_decay_steps=D2V_LEARNING_RATE_DECAY_STEPS): self.global_step = training_util.get_or_create_global_step() # inputs/outputs input_doc = tf.reshape(input_doc, [batch_size]) input_words = tf.reshape(input_words, [batch_size, context_size]) output_label = tf.reshape(output_label, [batch_size, 1]) # embeddings word_embeddings = _load_embeddings(vocabulary_size, embedding_size, filename_prefix='word_embeddings', from_dir=DIR_DATA_DOC2VEC) self.word_embeddings = tf.constant(value=word_embeddings, shape=[vocabulary_size, embedding_size], dtype=tf.float32, name='word_embeddings') self.doc_embeddings = tf.get_variable(shape=[self.dataset.num_docs, embedding_size], initializer=layers.xavier_initializer(), dtype=tf.float32, name='doc_embeddings') words_embed = tf.nn.embedding_lookup(self.word_embeddings, input_words) doc_embed = tf.nn.embedding_lookup(self.word_embeddings, input_doc) # average the words_embeds words_embed_average = tf.reduce_mean(words_embed, axis=1) embed = tf.concat([words_embed_average, doc_embed], axis=1) # NCE loss nce_weights = tf.get_variable(shape=[vocabulary_size, embedding_size * 2], initializer=layers.xavier_initializer(), dtype=tf.float32, name='nce_weights') nce_biases = tf.get_variable(shape=[vocabulary_size], initializer=layers.xavier_initializer(), dtype=tf.float32, name='nce_biases') nce_loss = tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, labels=output_label, inputs=embed, num_sampled=num_negative_samples, num_classes=vocabulary_size) self.loss = tf.reduce_mean(nce_loss) tf.summary.scalar('loss', self.loss) # learning rate & optimizer self.learning_rate = tf.train.exponential_decay(learning_rate_initial, self.global_step, learning_rate_decay_steps, learning_rate_decay, staircase=True, name='learning_rate') tf.summary.scalar('learning_rate', self.learning_rate) sgd = tf.train.GradientDescentOptimizer(self.learning_rate) self.optimizer = sgd.minimize(self.loss, global_step=self.global_step) return None
def model(self, input_label, output_word, batch_size, vocabulary_size=VOCABULARY_SIZE, embedding_size=EMBEDDINGS_SIZE, num_negative_samples=W2V_NEGATIVE_NUM_SAMPLES, learning_rate_initial=W2V_LEARNING_RATE_INITIAL, learning_rate_decay=W2V_LEARNING_RATE_DECAY, learning_rate_decay_steps=W2V_LEARNING_RATE_DECAY_STEPS): self.global_step = training_util.get_or_create_global_step() # inputs/outputs input_label_reshaped = tf.reshape(input_label, [batch_size]) output_word_reshaped = tf.reshape(output_word, [batch_size, 1]) # embeddings matrix_dimension = [vocabulary_size, embedding_size] self.embeddings = tf.get_variable(shape=matrix_dimension, initializer=layers.xavier_initializer(), dtype=tf.float32, name='embeddings') embed = tf.nn.embedding_lookup(self.embeddings, input_label_reshaped) # NCE loss stddev = 1.0 / math.sqrt(embedding_size) nce_weights = tf.Variable(tf.truncated_normal(matrix_dimension, stddev=stddev)) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) nce_loss = tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, labels=output_word_reshaped, inputs=embed, num_sampled=num_negative_samples, num_classes=vocabulary_size) self.loss = tf.reduce_mean(nce_loss) tf.summary.scalar('loss', self.loss) # learning rate & optimizer self.learning_rate = tf.train.exponential_decay(learning_rate_initial, self.global_step, learning_rate_decay_steps, learning_rate_decay, staircase=True, name='learning_rate') tf.summary.scalar('learning_rate', self.learning_rate) sgd = tf.train.GradientDescentOptimizer(self.learning_rate) self.optimizer = sgd.minimize(self.loss, global_step=self.global_step) # saver to save the model self.saver = tf.train.Saver() # check a nan value in the loss self.loss = tf.check_numerics(self.loss, 'loss is nan') # embeddings config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = self.embeddings.name filename_tsv = '{}_{}.tsv'.format('word2vec_dataset', vocabulary_size) if not os.path.exists(self.log_dir): os.makedirs(self.log_dir) shutil.copy(os.path.join(DIR_DATA_WORD2VEC, filename_tsv), self.log_dir) embedding.metadata_path = filename_tsv summary_writer = tf.summary.FileWriter(self.log_dir) projector.visualize_embeddings(summary_writer, config) # normalize the embeddings to save them norm = tf.sqrt(tf.reduce_sum(tf.square(self.embeddings), 1, keep_dims=True)) self.normalized_embeddings = self.embeddings / norm return None
def _convolution(self, value, filter_width, stride, input_channels, out_channels, apply_non_linearity=True): """ Apply a convolutional layer Args: value: the input tensor to apply the convolution on filter_width: the width of the filter (kernel) stride: the striding of the filter (kernel) input_channels: the number if input channels out_channels: the number of output channels apply_non_linearity: whether to apply a non linearity Returns: the output after convolution, added biases and possible non linearity applied """ layer_id = self.convolution_count self.convolution_count += 1 with tf.variable_scope('convolution_layer_{}'.format(layer_id)) as layer: # Create variables filter and bias filters = tf.get_variable('filters', shape=[filter_width, input_channels, out_channels], dtype=tf.float32, initializer=xavier_initializer()) bias = tf.Variable(tf.constant(0.0, shape=[out_channels]), name='bias') # Apply convolution convolution_out = tf.nn.conv1d(value, filters, stride, 'SAME', use_cudnn_on_gpu=True, name='convolution') # Create summary with tf.name_scope('summaries'): # add depth of 1 (=grayscale) leading to shape [filter_width, input_channels, 1, out_channels] kernel_with_depth = tf.expand_dims(filters, 2) # to tf.image_summary format [batch_size=out_channels, height=filter_width, width=input_channels, channels=1] kernel_transposed = tf.transpose(kernel_with_depth, [3, 0, 1, 2]) # this will display random 3 filters from all the output channels tf.summary.image(layer.name + 'filters', kernel_transposed, max_outputs=3) tf.summary.histogram(layer.name + 'filters', filters) tf.summary.image(layer.name + 'bias', tf.reshape(bias, [1, 1, out_channels, 1])) tf.summary.histogram(layer.name + 'bias', bias) # Add bias convolution_out = tf.nn.bias_add(convolution_out, bias) if apply_non_linearity: # Add non-linearity activations = tf.nn.relu(convolution_out, name='activation') tf.summary.histogram(layer.name + 'activation', activations) return activations, out_channels else: return convolution_out, out_channels
def __reslayer(self, inputs, in_filters, out_filters, stride=1): """ A regular resnet block """ with tf.variable_scope('sub1'): kernel = tf.get_variable('weights', [3, 3, in_filters, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(inputs, kernel, [1, stride, stride, 1], padding='SAME', name='conv') batch_norm = self.__batch_norm_wrapper(conv, decay=0.9999) conv = tf.nn.elu(batch_norm, 'elu') with tf.variable_scope('sub2'): kernel = tf.get_variable('weights', [3, 3, out_filters, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(conv, kernel, [1, 1, 1, 1], padding='SAME', name='conv1') bias = self.__batch_norm_wrapper(conv, decay=0.9999) with tf.variable_scope('subadd'): if in_filters != out_filters: kernel = tf.get_variable('weights', [1, 1, in_filters, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) inputs = tf.nn.conv2d( inputs, kernel, [1, stride, stride, 1], padding='SAME') bias += inputs conv = tf.nn.elu(bias, 'elu') num = np.power(2, np.floor(np.log2(out_filters) / 2)) grid = self.__put_activations_on_grid(conv, (int(num), int(out_filters / num))) tf.summary.image('sub2/activations', grid, max_outputs=1) return conv
def __reslayer_bottleneck(self, inputs, in_filters, out_filters, stride=1): """ A regular resnet block """ with tf.variable_scope('sub1'): kernel = tf.get_variable('weights', [1, 1, in_filters, out_filters / 4], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(inputs, kernel, [1, stride, stride, 1], padding='SAME', name='conv') batch_norm = self.__batch_norm_wrapper(conv, decay=0.9999) conv = tf.nn.elu(batch_norm, 'elu') with tf.variable_scope('sub2'): kernel = tf.get_variable('weights', [3, 3, out_filters / 4, out_filters / 4], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(conv, kernel, [1, 1, 1, 1], padding='SAME', name='conv1') batch_norm = self.__batch_norm_wrapper(conv, decay=0.9999) conv = tf.nn.elu(batch_norm, 'elu') with tf.variable_scope('sub3'): kernel = tf.get_variable('weights', [1, 1, out_filters / 4, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(conv, kernel, [1, 1, 1, 1], padding='SAME', name='conv') batch_norm = self.__batch_norm_wrapper(conv, decay=0.9999) with tf.variable_scope('subadd'): if in_filters != out_filters: kernel = tf.get_variable('weights', [1, 1, in_filters, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) inputs = tf.nn.conv2d( inputs, kernel, [1, stride, stride, 1], padding='SAME') batch_norm += inputs conv = tf.nn.elu(batch_norm, 'elu') num = np.power(2, np.floor(np.log2(out_filters) / 2)) grid = self.__put_activations_on_grid(conv, (int(num), int(out_filters / num))) tf.summary.image('sub3/activations', grid, max_outputs=1) return conv
def __reslayer(self, inputs, in_filters, out_filters, stride=1): """ A regular resnet block """ with tf.variable_scope('sub1'): kernel = tf.get_variable('weights', [3, 3, in_filters, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(inputs, kernel, [1, stride, stride, 1], padding='SAME', name='conv') batch_norm = self.__batch_norm_wrapper(conv, decay=0.9999, shape=[0, 1, 2]) conv = tf.nn.elu(batch_norm, 'elu') with tf.variable_scope('sub2'): kernel = tf.get_variable('weights', [3, 3, out_filters, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(conv, kernel, [1, 1, 1, 1], padding='SAME', name='conv1') bias = self.__batch_norm_wrapper(conv, decay=0.9999, shape=[0, 1, 2]) with tf.variable_scope('subadd'): if in_filters != out_filters: kernel = tf.get_variable('weights', [1, 1, in_filters, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) inputs = tf.nn.conv2d( inputs, kernel, [1, stride, stride, 1], padding='SAME') bias += inputs conv = tf.nn.elu(bias, 'elu') num = np.power(2, np.floor(np.log2(out_filters) / 2)) grid = self.__put_activations_on_grid(conv, (int(num), int(out_filters / num))) tf.summary.image('sub2/activations', grid, max_outputs=1) return conv
def __reslayer_bottleneck(self, inputs, in_filters, out_filters, stride=1): """ A regular resnet block """ with tf.variable_scope('sub1'): kernel = tf.get_variable('weights', [1, 1, in_filters, out_filters / 4], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(inputs, kernel, [1, stride, stride, 1], padding='SAME', name='conv') batch_norm = self.__batch_norm_wrapper(conv, decay=0.9999, shape=[0, 1, 2]) conv = tf.nn.elu(batch_norm, 'elu') with tf.variable_scope('sub2'): kernel = tf.get_variable('weights', [3, 3, out_filters / 4, out_filters / 4], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(conv, kernel, [1, 1, 1, 1], padding='SAME', name='conv1') batch_norm = self.__batch_norm_wrapper(conv, decay=0.9999, shape=[0, 1, 2]) conv = tf.nn.elu(batch_norm, 'elu') with tf.variable_scope('sub3'): kernel = tf.get_variable('weights', [1, 1, out_filters / 4, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(conv, kernel, [1, 1, 1, 1], padding='SAME', name='conv') batch_norm = self.__batch_norm_wrapper(conv, decay=0.9999, shape=[0, 1, 2]) with tf.variable_scope('subadd'): if in_filters != out_filters: kernel = tf.get_variable('weights', [1, 1, in_filters, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) inputs = tf.nn.conv2d( inputs, kernel, [1, stride, stride, 1], padding='SAME') batch_norm += inputs conv = tf.nn.elu(batch_norm, 'elu') num = np.power(2, np.floor(np.log2(out_filters) / 2)) grid = self.__put_activations_on_grid(conv, (int(num), int(out_filters / num))) tf.summary.image('sub3/activations', grid, max_outputs=1) return conv
def __reslayer(self, inputs, in_filters, out_filters, stride=1): """ A regular resnet block """ with tf.variable_scope('sub1'): kernel = tf.get_variable('weights', [3, 3, in_filters, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(inputs, kernel, [1, stride, stride, 1], padding='SAME', name='conv') batch_norm = self.__batch_norm_wrapper(conv, shape=[0, 1, 2, 3]) conv = tf.nn.elu(batch_norm, 'elu') with tf.variable_scope('sub2'): kernel = tf.get_variable('weights', [3, 3, out_filters, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(conv, kernel, [1, 1, 1, 1], padding='SAME', name='conv1') bias = self.__batch_norm_wrapper(conv, shape=[0, 1, 2, 3]) with tf.variable_scope('subadd'): if in_filters != out_filters: # inputs = tf.nn.avg_pool(inputs, (1, stride, stride, 1), # (1, stride, stride, 1), 'SAME') # inputs = tf.pad(inputs, [[0, 0], [0, 0], [0, 0], # [(out_filters - in_filters) // 2, # (out_filters - in_filters) // 2]]) kernel = tf.get_variable('weights', [1, 1, in_filters, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) inputs = tf.nn.conv2d(inputs, kernel, [1, stride, stride, 1], padding='SAME') bias += inputs conv = tf.nn.elu(bias, 'elu') num = np.power(2, np.floor(np.log2(out_filters) / 2)) grid = self.__put_activations_on_grid(conv, (int(num), int(out_filters / num))) tf.summary.image('sub2/activations', grid, max_outputs=1) return conv
def __reslayer_bottleneck(self, inputs, in_filters, out_filters, stride=1): """ A regular resnet block """ with tf.variable_scope('sub1'): kernel = tf.get_variable('weights', [1, 1, in_filters, out_filters/4], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(inputs, kernel, [1, stride, stride, 1], padding='SAME', name='conv') batch_norm = self.__batch_norm_wrapper(conv, shape=[0, 1, 2, 3]) conv = tf.nn.elu(batch_norm, 'elu') with tf.variable_scope('sub2'): kernel = tf.get_variable('weights', [3, 3, out_filters/4, out_filters/4], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(conv, kernel, [1, 1, 1, 1], padding='SAME', name='conv1') batch_norm = self.__batch_norm_wrapper(conv, shape=[0, 1, 2, 3]) conv = tf.nn.elu(batch_norm, 'elu') with tf.variable_scope('sub3'): kernel = tf.get_variable('weights', [1, 1, out_filters/4, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(conv, kernel, [1, 1, 1, 1], padding='SAME', name='conv') batch_norm = self.__batch_norm_wrapper(conv, shape=[0, 1, 2, 3]) with tf.variable_scope('subadd'): if in_filters != out_filters: # inputs = tf.nn.avg_pool(inputs, (1, stride, stride, 1), # (1, stride, stride, 1), 'SAME') # inputs = tf.pad(inputs, [[0, 0], [0, 0], [0, 0], # [(out_filters - in_filters) // 2, # (out_filters - in_filters) // 2]]) kernel = tf.get_variable('weights', [1, 1, in_filters, out_filters], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) inputs = tf.nn.conv2d(inputs, kernel, [1, stride, stride, 1], padding='SAME') batch_norm += inputs conv = tf.nn.elu(batch_norm, 'elu') num = np.power(2, np.floor(np.log2(out_filters) / 2)) grid = self.__put_activations_on_grid(conv, (int(num), int(out_filters / num))) tf.summary.image('sub3/activations', grid, max_outputs=1) return conv
def inference(self, inputs): # resnet with tf.variable_scope('first_layer'): kernel = tf.get_variable('weights', [7, 7, 3, 64], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) conv = tf.nn.conv2d(inputs, kernel, [1, 2, 2, 1], padding='SAME', name='conv') batch_norm = self.__batch_norm_wrapper(conv, shape=[0, 1, 2, 3]) conv = tf.nn.elu(batch_norm, 'elu') grid = self.__put_kernels_on_grid(kernel, (8, 8)) tf.summary.image('conv1/features', grid, max_outputs=1) grid = self.__put_activations_on_grid(conv, (8, 8)) tf.summary.image('conv1/activations', grid, max_outputs=1) inputs = tf.nn.max_pool(conv, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool') if self.net == NetworkType.RESNET34: inputs = self.__resnet34(inputs) elif self.net == NetworkType.RESNET50: inputs = self.__resnet50(inputs) # classify regions and add final region adjustments with tf.variable_scope('fully_connected'): fc = tf.reduce_mean(inputs, [1, 2]) class_weights = tf.get_variable('class_weights', [self.conv_feature_count, self.num_classes], initializer=xavier_initializer( dtype=tf.float32), dtype=tf.float32) class_bias = tf.get_variable("class_bias", [ self.num_classes], initializer=tf.constant_initializer( 0.1), dtype=tf.float32) class_score = tf.matmul(fc, class_weights) class_score = tf.nn.bias_add(class_score, class_bias) return class_score