def conv2d(x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad="SAME", dtype=tf.float32, collections=None): with tf.variable_scope(name): stride_shape = [1, stride[0], stride[1], 1] filter_shape = [filter_size[0], filter_size[1], int(x.get_shape()[3]), num_filters] # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = np.prod(filter_shape[:3]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = np.prod(filter_shape[:2]) * num_filters # initialize weights with random weights w_bound = np.sqrt(6. / (fan_in + fan_out)) w = tf.get_variable("W", filter_shape, dtype, tf.random_uniform_initializer(-w_bound, w_bound), collections=collections) b = tf.get_variable("b", [1, 1, 1, num_filters], initializer=tf.constant_initializer(0.0), collections=collections) return tf.nn.conv2d(x, w, stride_shape, pad) + b
def _build(self, initial_state, helper): if not self.initial_state: self._setup(initial_state, helper) scope = tf.get_variable_scope() scope.set_initializer(tf.random_uniform_initializer( -self.params["init_scale"], self.params["init_scale"])) maximum_iterations = None if self.mode == tf.contrib.learn.ModeKeys.INFER: maximum_iterations = self.params["max_decode_length"] outputs, final_state = dynamic_decode( decoder=self, output_time_major=True, impute_finished=False, maximum_iterations=maximum_iterations) return self.finalize(outputs, final_state)
def encode(self, inputs, sequence_length, **kwargs): scope = tf.get_variable_scope() scope.set_initializer(tf.random_uniform_initializer( -self.params["init_scale"], self.params["init_scale"])) cell = training_utils.get_rnn_cell(**self.params["rnn_cell"]) outputs, state = tf.nn.dynamic_rnn( cell=cell, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, **kwargs) return EncoderOutput( outputs=outputs, final_state=state, attention_values=outputs, attention_values_length=sequence_length)
def encode(self, inputs, sequence_length, **kwargs): scope = tf.get_variable_scope() scope.set_initializer(tf.random_uniform_initializer( -self.params["init_scale"], self.params["init_scale"])) cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"]) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, **kwargs) # Concatenate outputs and states of the forward and backward RNNs outputs_concat = tf.concat(outputs, 2) return EncoderOutput( outputs=outputs_concat, final_state=states, attention_values=outputs_concat, attention_values_length=sequence_length)
def __call__(self, obs, reuse=False): with tf.variable_scope(self.name) as scope: if reuse: scope.reuse_variables() x = obs x = tf.layers.dense(x, 64) if self.layer_norm: x = tc.layers.layer_norm(x, center=True, scale=True) x = tf.nn.relu(x) x = tf.layers.dense(x, 64) if self.layer_norm: x = tc.layers.layer_norm(x, center=True, scale=True) x = tf.nn.relu(x) x = tf.layers.dense(x, self.nb_actions, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)) x = tf.nn.tanh(x) return x
def __call__(self, obs, action, reuse=False): with tf.variable_scope(self.name) as scope: if reuse: scope.reuse_variables() x = obs x = tf.layers.dense(x, 64) if self.layer_norm: x = tc.layers.layer_norm(x, center=True, scale=True) x = tf.nn.relu(x) x = tf.concat([x, action], axis=-1) x = tf.layers.dense(x, 64) if self.layer_norm: x = tc.layers.layer_norm(x, center=True, scale=True) x = tf.nn.relu(x) x = tf.layers.dense(x, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)) return x
def build_lstm_inner(H, lstm_input): ''' build lstm decoder ''' lstm_cell = rnn_cell.BasicLSTMCell(H['lstm_size'], forget_bias=0.0, state_is_tuple=False) if H['num_lstm_layers'] > 1: lstm = rnn_cell.MultiRNNCell([lstm_cell] * H['num_lstm_layers'], state_is_tuple=False) else: lstm = lstm_cell batch_size = H['batch_size'] * H['grid_height'] * H['grid_width'] state = tf.zeros([batch_size, lstm.state_size]) outputs = [] with tf.variable_scope('RNN', initializer=tf.random_uniform_initializer(-0.1, 0.1)): for time_step in range(H['rnn_len']): if time_step > 0: tf.get_variable_scope().reuse_variables() output, state = lstm(lstm_input, state) outputs.append(output) return outputs
def create_net(self, shape): hidden_size = 64 print(shape) self.x = tf.placeholder(tf.float32, shape=[None, shape], name="x") self.y = tf.placeholder(tf.float32, shape=[None], name="y") weight_init = tf.random_uniform_initializer(-0.05, 0.05) bias_init = tf.constant_initializer(0) with tf.variable_scope("VF"): h1 = tf.nn.relu(fully_connected(self.x, shape, hidden_size, weight_init, bias_init, "h1")) h2 = tf.nn.relu(fully_connected(h1, hidden_size, hidden_size, weight_init, bias_init, "h2")) h3 = fully_connected(h2, hidden_size, 1, weight_init, bias_init, "h3") self.net = tf.reshape(h3, (-1,)) l2 = tf.nn.l2_loss(self.net - self.y) self.train = tf.train.AdamOptimizer().minimize(l2) self.session.run(tf.initialize_all_variables())
def conv2d(x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad="SAME", dtype=tf.float32, collections=None): with tf.variable_scope(name): stride_shape = [1, stride[0], stride[1], 1] filter_shape = [filter_size[0], filter_size[1], int(x.get_shape()[3]), num_filters] # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = np.prod(filter_shape[:3]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = np.prod(filter_shape[:2]) * num_filters # initialize weights with random weights w_bound = np.sqrt(6. / (fan_in + fan_out)) w = tf.get_variable("W", filter_shape, dtype, tf.random_uniform_initializer(-w_bound, w_bound), collections=collections) b = tf.get_variable("b", [1, 1, 1, num_filters], initializer=tf.zeros_initializer, collections=collections) return tf.nn.conv2d(x, w, stride_shape, pad) + b
def domain_classifier(self, images, name="G", reuse=False): random_uniform_init = tf.random_uniform_initializer(minval=-0.1, maxval=0.1) with tf.variable_scope(name): tf.get_variable_scope().reuse_variables() with tf.variable_scope("images"): # "generator/images" images_W = tf.get_variable("images_W", [self.img_dims, self.G_hidden_size], "float32", random_uniform_init) images_emb = tf.matmul(images, images_W) # B,H l2_loss = tf.constant(0.0) with tf.variable_scope("domain"): if reuse: tf.get_variable_scope().reuse_variables() with tf.variable_scope("output"): output_W = tf.get_variable("output_W", [self.G_hidden_size, self.num_domains], "float32", random_uniform_init) output_b = tf.get_variable("output_b", [self.num_domains], "float32", random_uniform_init) l2_loss += tf.nn.l2_loss(output_W) l2_loss += tf.nn.l2_loss(output_b) logits = tf.nn.xw_plus_b(images_emb, output_W, output_b, name="logits") predictions = tf.argmax(logits, 1, name="predictions") return predictions, logits, l2_loss
def construct_model(config, eval_config, raw_data, opt_method): train_data, valid_data, test_data, _ = raw_data eval_config.batch_size = 1 eval_config.num_steps = 1 initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input, opt_method=opt_method) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, data=valid_data, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input, opt_method=opt_method) with tf.name_scope("Test"): test_input = PTBInput(config=eval_config, data=test_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input, opt_method=opt_method) return m, mvalid, mtest
def _init_embeddings(self): with tf.variable_scope("embedding") as scope: sqrt3 = math.sqrt(3) initializer = tf.random_uniform_initializer(-sqrt3, sqrt3) self.encoder_embedding_matrix = tf.get_variable( name="encoder_embedding_matrix", shape=[self.encoder_vocab_size, self.embedding_size], initializer=initializer, dtype=tf.float32) self.decoder_embedding_matrix = tf.get_variable( name="decoder_embedding_matrix", shape=[self.decoder_vocab_size, self.embedding_size], initializer=initializer, dtype=tf.float32) # encoder?embedd self.encoder_inputs_embedded = tf.nn.embedding_lookup( self.encoder_embedding_matrix, self.encoder_inputs) # decoder?embedd self.decoder_train_inputs_embedded = tf.nn.embedding_lookup( self.decoder_embedding_matrix, self.decoder_train_inputs)
def evaluate_mc(data_path, dataset, load_model, mc_steps, seed): """Evaluate the model on the given data using MC averaging.""" ex.commands['print_config']() print("MC Evaluation of model:", load_model) assert mc_steps > 0 reader, (train_data, valid_data, test_data, _) = get_data(data_path, dataset) config = get_config() val_config = deepcopy(config) test_config = deepcopy(config) test_config.batch_size = test_config.num_steps = 1 with tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): _ = Model(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): _ = Model(is_training=False, config=val_config) mtest = Model(is_training=False, config=test_config) tf.initialize_all_variables() saver = tf.train.Saver() saver.restore(session, load_model) print("Testing on non-batched Test ...") test_perplexity = run_mc_epoch(seed, session, mtest, test_data, tf.no_op(), test_config, mc_steps, verbose=True) print("Full Test Perplexity: %.3f, Bits: %.3f" % (test_perplexity, np.log2(test_perplexity)))
def __init__(self, config, mode): self.config = config self.mode = mode # self.train_resnet = (train_resnet & (mode == 'training')) self.weight_initializer = tf.contrib.layers.xavier_initializer() self.const_initializer = tf.constant_initializer(0.0) self.emb_initializer = tf.random_uniform_initializer(minval=-1.0, maxval=1.0) self.level1_word2ix = json.load(open('data/train/word2ix_stem.json')) self.level1_model = level1_model.Level1Model(word_to_idx=self.level1_word2ix, dim_feature=config.LEVEL1_dim_feature, dim_embed=config.LEVEL1_dim_embed, dim_hidden=config.LEVEL1_dim_hidden, alpha_c=config.LEVEL1_alpha, dropout=config.LEVEL1_dropout, n_time_step=config.LEVEL1_T, train=(self.mode == 'training'))
def __init__(self, config, mode): self.config = config self.mode = mode self.weight_initializer = tf.contrib.layers.xavier_initializer() self.const_initializer = tf.constant_initializer(0.0) self.emb_initializer = tf.random_uniform_initializer(minval=-1.0, maxval=1.0) self.level1_word2ix = json.load(open('data/train/word2ix_stem.json')) self.level2_word2ix = json.load(open('data/train/word2ix_attr.json')) self.level1_model = level1_model.Level1Model(word_to_idx=self.level1_word2ix, dim_feature=config.LEVEL1_dim_feature, dim_embed=config.LEVEL1_dim_embed, dim_hidden=config.LEVEL1_dim_hidden, alpha_c=config.LEVEL1_alpha, dropout=config.LEVEL1_dropout, n_time_step=config.LEVEL1_T, train=(self.mode == 'training')) self.level2_model = level2_model.Level2Model(word_to_idx=self.level2_word2ix, dim_feature=config.LEVEL2_dim_feature, dim_embed=config.LEVEL2_dim_embed, dim_hidden=config.LEVEL2_dim_hidden, dropout=config.LEVEL2_dropout, n_time_step=config.LEVEL2_T)
def __init__(self, namespace, input_state, action_dim): super(ActorNetwork, self).__init__(namespace) self.input_state = input_state self.exploration_noise = util.OrnsteinUhlenbeckNoise(action_dim, opts.action_noise_theta, opts.action_noise_sigma) with tf.variable_scope(namespace): opts.hidden_layers = opts.actor_hidden_layers final_hidden = self.input_state_network(self.input_state, opts) # action dim output. note: actors out is (-1, 1) and scaled in env as required. weights_initializer = tf.random_uniform_initializer(-0.001, 0.001) self.output_action = slim.fully_connected(scope='output_action', inputs=final_hidden, num_outputs=action_dim, weights_initializer=weights_initializer, weights_regularizer=tf.contrib.layers.l2_regularizer(0.01), activation_fn=tf.nn.tanh)
def create_critic(self, name, state_input, action_input, reuse=False): hidden = state_input weights = [] with tf.variable_scope(name, reuse=reuse): for index, n_hidden in enumerate(self.n_hiddens): if index == 1: hidden = tf.concat([hidden, action_input], axis=1) hidden, layer_weights = denselayer("hidden_critic_{}".format(index), hidden, n_hidden, self.nonlinearity, tf.truncated_normal_initializer()) weights += layer_weights value, layer_weights = denselayer("value", hidden, 1, w_initializer=tf.random_uniform_initializer(-3e-3, 3e-3)) value = tf.reshape(value, [-1]) weights += layer_weights weight_phs = [tf.placeholder(tf.float32, shape=w.get_shape()) for w in weights] return value, weights, weight_phs
def get_initializer(params): if params.initializer == "uniform": max_val = params.initializer_gain return tf.random_uniform_initializer(-max_val, max_val) elif params.initializer == "normal": return tf.random_normal_initializer(0.0, params.initializer_gain) elif params.initializer == "normal_unit_scaling": return tf.variance_scaling_initializer(params.initializer_gain, mode="fan_avg", distribution="normal") elif params.initializer == "uniform_unit_scaling": return tf.variance_scaling_initializer(params.initializer_gain, mode="fan_avg", distribution="uniform") else: raise ValueError("Unrecognized initializer: %s" % params.initializer)
def __init__(self, input_sizes, output_size, scope): """Cretes a neural network layer.""" if type(input_sizes) != list: input_sizes = [input_sizes] self.input_sizes = input_sizes self.output_size = output_size self.scope = scope or "Layer" with tf.variable_scope(self.scope): self.Ws = [] get_W_index = 0; for input_idx, input_size in enumerate(input_sizes): W_name = "W_%d" % (input_idx,) W_initializer = tf.random_uniform_initializer( -0.003, 0.003) W_var = tf.get_variable(W_name, (input_size, output_size), initializer=W_initializer) get_W_index += 1 self.Ws.append(W_var) self.b = tf.get_variable("b", (output_size,), initializer=tf.constant_initializer(0))
def conv2d(x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad="SAME", dtype=tf.float32, collections=None): """ 2-dimensional convolutional layer. Source: https://github.com/openai/universe-starter-agent/blob/a3fdfba297c8c24d62d3c53978fb6fb26f80e76e/model.py """ with tf.variable_scope(name): stride_shape = [1, stride[0], stride[1], 1] filter_shape = [filter_size[0], filter_size[1], int(x.get_shape()[3]), num_filters] # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = np.prod(filter_shape[:3]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = np.prod(filter_shape[:2]) * num_filters # initialize weights with random weights w_bound = np.sqrt(6. / (fan_in + fan_out)) w = tf.get_variable("W", filter_shape, dtype, tf.random_uniform_initializer(-w_bound, w_bound), collections=collections) b = tf.get_variable("b", [1, 1, 1, num_filters], initializer=tf.constant_initializer(0.0), collections=collections) return tf.nn.conv2d(x, w, stride_shape, pad) + b
def conv2d(x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad="SAME", dtype=tf.float32, collections=None): with tf.variable_scope(name): stride_shape = [1, stride[0], stride[1], 1] filter_shape = [filter_size[0], filter_size[1], int(x.get_shape()[3]), num_filters] # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = np.prod(filter_shape[:3]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = np.prod(filter_shape[:2]) * num_filters # initialize weights with random weights w_bound = np.sqrt(6. / (fan_in + fan_out)) w = tf.get_variable("W", filter_shape, dtype, tf.random_uniform_initializer(-w_bound, w_bound), collections=collections) b = tf.get_variable("b", [1, 1, 1, num_filters], initializer=tf.constant_initializer(0.0), collections=collections) # Use dropout here to help prevent overfitting? #return tf.nn.dropout(tf.nn.conv2d(x, w, stride_shape, pad) + b, keep_prob=0.7, name='dropout_%s' % name) # Turn out it is better without dropout return tf.nn.conv2d(x, w, stride_shape, pad) + b
def get_fc_layer(name,size): ''' name - Name to be added after W_ and b_ (can be any datatype convertible to str) size - [inp_size,out_size] tf.get_variable looks for variable name in current scope and returns it. If not found, it uses the initializer ''' with tf.device('/cpu:0'): W = tf.get_variable('W_'+str(name), shape=[size[0],size[1]], initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32)) b = tf.get_variable('b_'+str(name), shape=[size[1]], initializer=tf.random_uniform_initializer(-model_options['init_scale'],\ model_options['init_scale'])) return W,b
def encode(self, inputs, sequence_length, **kwargs): scope = tf.get_variable_scope() scope.set_initializer(tf.random_uniform_initializer( -self.params["init_scale"], self.params["init_scale"])) cell = _get_rnn_cell(**self.params["rnn_cell"]) outputs, state = tf.nn.dynamic_rnn( cell=cell, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, **kwargs) return EncoderOutput( outputs=outputs, final_state=state, attention_values=outputs, attention_values_length=sequence_length)
def weight(name, shape, init='he', range=1, stddev=0.33, init_val=None): if init_val is not None: initializer = tf.constant_initializer(init_val) elif init == 'uniform': initializer = tf.random_uniform_initializer(-range, range) elif init == 'normal': initializer = tf.random_normal_initializer(stddev = stddev) elif init == 'he': fan_in, _ = _get_dims(shape) std = math.sqrt(2.0 / fan_in) initializer = tf.random_normal_initializer(stddev = std) elif init == 'xavier': fan_in, fan_out = _get_dims(shape) range = math.sqrt(6.0 / (fan_in + fan_out)) initializer = tf.random_uniform_initializer(-range, range) else: initializer = tf.truncated_normal_initializer(stddev = stddev) var = tf.get_variable(name, shape, initializer = initializer) tf.add_to_collection('l2', tf.nn.l2_loss(var)) return var
def _init_embeddings(self): with tf.variable_scope("embedding") as scope: # Uniform(-sqrt(3), sqrt(3)) has variance=1. sqrt3 = math.sqrt(3) initializer = tf.random_uniform_initializer(-sqrt3, sqrt3) self.embedding_matrix = tf.get_variable( name="embedding_matrix", shape=[self.vocab_size, self.embedding_size], initializer=initializer, dtype=tf.float32) self.encoder_inputs_embedded = embedding_lookup_unique( self.embedding_matrix, self.encoder_inputs) self.decoder_train_inputs_embedded = embedding_lookup_unique( self.embedding_matrix, self.decoder_train_inputs)
def deconv2d(x, out_shape, name, filter_size=(3, 3), stride=(1, 1), pad="SAME", dtype=tf.float32, collections=None, prevNumFeat=None): with tf.variable_scope(name): num_filters = out_shape[-1] prevNumFeat = int(x.get_shape()[3]) if prevNumFeat is None else prevNumFeat stride_shape = [1, stride[0], stride[1], 1] # transpose_filter : [height, width, out_channels, in_channels] filter_shape = [filter_size[0], filter_size[1], num_filters, prevNumFeat] # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = np.prod(filter_shape[:2]) * prevNumFeat # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" fan_out = np.prod(filter_shape[:3]) # initialize weights with random weights w_bound = np.sqrt(6. / (fan_in + fan_out)) w = tf.get_variable("W", filter_shape, dtype, tf.random_uniform_initializer(-w_bound, w_bound), collections=collections) b = tf.get_variable("b", [num_filters], initializer=tf.constant_initializer(0.0), collections=collections) deconv2d = tf.nn.conv2d_transpose(x, w, tf.pack(out_shape), stride_shape, pad) # deconv2d = tf.reshape(tf.nn.bias_add(deconv2d, b), deconv2d.get_shape()) return deconv2d
def _init_aspect_embeddings(self): with tf.variable_scope("AspectEmbedding") as scope: self.input_shape = tf.shape(self.inputs) # Uniform(-sqrt(3), sqrt(3)) has variance=1. sqrt3 = tf.sqrt(3.0) initializer = tf.random_uniform_initializer(-sqrt3, sqrt3) """self.aspect_embedding_matrix = tf.get_variable( name="aspect_embedding_matrix", shape=[self.aspect_vocab_size, self.aspect_embedding_size], initializer=initializer, dtype=tf.float32)""" self.aspect_embedding_matrix = tf.Variable( tf.constant(0.0, shape=[self.aspect_vocab_size, self.aspect_embedding_size]), trainable=False, name="aspect_embedding_matrix") self.aspect_embedding_placeholder = tf.placeholder(tf.float32, [self.aspect_vocab_size, self.aspect_embedding_size]) self.aspect_embedding_init = self.aspect_embedding_matrix.assign(self.aspect_embedding_placeholder) self.input_aspect_embedded = tf.nn.embedding_lookup( self.aspect_embedding_matrix, self.input_aspect) # -> [batch_size, da] s = tf.shape(self.input_aspect_embedded) self.input_aspect_embedded_final = tf.tile(tf.reshape(self.input_aspect_embedded, (s[0], -1, s[1])), (1, self.input_shape[1], 1)) # -> [batch_size, N, da]
def __init__( self, net_encode_in = None, net_decode_in = None, cell_fn = None,#tf.nn.rnn_cell.LSTMCell, cell_init_args = {'state_is_tuple':True}, n_hidden = 256, initializer = tf.random_uniform_initializer(-0.1, 0.1), in_sequence_length = None, out_sequence_length = None, initial_state = None, dropout = None, n_layer = 1, # return_last = False, return_seq_2d = False, name = 'peeky_seq2seq', ): Layer.__init__(self, name=name) if cell_fn is None: raise Exception("Please put in cell_fn") # self.inputs = layer.outputs print(" [TL] PeekySeq2seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" % (self.name, n_hidden, cell_fn.__name__, dropout, n_layer))
def __init__( self, net_encode_in = None, net_decode_in = None, cell_fn = None,#tf.nn.rnn_cell.LSTMCell, cell_init_args = {'state_is_tuple':True}, n_hidden = 256, initializer = tf.random_uniform_initializer(-0.1, 0.1), in_sequence_length = None, out_sequence_length = None, initial_state = None, dropout = None, n_layer = 1, # return_last = False, return_seq_2d = False, name = 'attention_seq2seq', ): Layer.__init__(self, name=name) if cell_fn is None: raise Exception("Please put in cell_fn") # self.inputs = layer.outputs print(" [TL] PeekySeq2seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" % (self.name, n_hidden, cell_fn.__name__, dropout, n_layer)) ## Shape layer
def weight(name, shape, init='he', range=0.1, stddev=0.01, init_val=None, group_id=0): """ Get a weight variable. """ if init_val != None: initializer = tf.constant_initializer(init_val) elif init == 'uniform': initializer = tf.random_uniform_initializer(-range, range) elif init == 'normal': initializer = tf.random_normal_initializer(stddev=stddev) elif init == 'he': fan_in, _ = _get_dims(shape) std = math.sqrt(2.0 / fan_in) initializer = tf.random_normal_initializer(stddev=std) elif init == 'xavier': fan_in, fan_out = _get_dims(shape) range = math.sqrt(6.0 / (fan_in + fan_out)) initializer = tf.random_uniform_initializer(-range, range) else: initializer = tf.truncated_normal_initializer(stddev = stddev) var = tf.get_variable(name, shape, initializer = initializer) tf.add_to_collection('l2_'+str(group_id), tf.nn.l2_loss(var)) return var
def main(_): with tf.Session() as sess: query = [[1],[2],[3],[4],[5]] answer = [[6],[7],[8],[9],[0],[0],[0],[0],[0],[0]] target = [1] config = Config initializer = tf.random_uniform_initializer(-1 * config.init_scale, 1 * config.init_scale) with tf.variable_scope(name_or_scope="rnn_model", initializer=initializer): model = Hier_rnn_model(config, name_scope=config.name_model) sess.run(tf.global_variables_initializer()) input_feed = {} for i in range(config.buckets[0][0]): input_feed[model.query[i].name] = query[i] for i in range(config.buckets[0][1]): input_feed[model.answer[i].name] = answer[i] input_feed[model.target.name] = target fetches = [model.b_train_op[0], model.b_query_state[0], model.b_state[0], model.b_logits[0]] train_op, query, state, logits = sess.run(fetches=fetches, feed_dict=input_feed) print("query: ", np.shape(query)) pass
def conv2d(x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad="SAME", dtype=tf.float32, collections=None): with tf.variable_scope(name): stride_shape = [1, stride[0], stride[1], 1] filter_shape = [filter_size[0], filter_size[1], int(x.get_shape()[3]), num_filters] # There are "num input feature maps * filter height * filter width" # inputs to each hidden unit. fan_in = np.prod(filter_shape[:3]) # Each unit in the lower layer receives a gradient from: "num output # feature maps * filter height * filter width" / pooling size. fan_out = np.prod(filter_shape[:2]) * num_filters # Initialize weights with random weights. w_bound = np.sqrt(6 / (fan_in + fan_out)) w = tf.get_variable("W", filter_shape, dtype, tf.random_uniform_initializer(-w_bound, w_bound), collections=collections) b = tf.get_variable("b", [1, 1, 1, num_filters], initializer=tf.constant_initializer(0.0), collections=collections) return tf.nn.conv2d(x, w, stride_shape, pad) + b
def v(self): with tf.variable_scope('critic'): w_i = tf.random_uniform_initializer(0., 0.1) b_i = tf.zeros_initializer() with tf.variable_scope('dense1'): dense1 = dense(self.state_input, 100, [100], w_i, activation=tf.nn.relu6) with tf.variable_scope('dense2'): dense2 = dense(dense1, 1, [1], w_i, b_i, activation=None) return dense2 # Note: We need 2 return value here: mu & sigma. So it is not suitable to use lazy_property.
def get_mu_sigma(self): with tf.variable_scope('actor'): w_i = tf.random_uniform_initializer(0., 0.1) dense1 = dense(self.state_input, 200, None, w_i, None, activation=tf.nn.relu6) with tf.variable_scope('mu'): mu = dense(dense1, self.action_dim, None, w_i, None, activation=tf.nn.tanh) with tf.variable_scope('sigma'): sigma = dense(dense1, self.action_dim, None, w_i, None, activation=tf.nn.softplus) # return mu * self.config.ACTION_BOUND[1], sigma + 1e-4 return mu, sigma + 1e-4
def a_prob(self): with tf.variable_scope('actor'): w_i = tf.random_uniform_initializer(0., 0.1) b_i = tf.zeros_initializer() with tf.variable_scope('dense1'): dense1 = dense(self.state_input, 200, None, w_i, b_i, activation=tf.nn.relu6) with tf.variable_scope('dense2'): dense2 = dense(dense1, self.action_dim, None, w_i, b_i, activation=tf.nn.softmax) return dense2
def conv2d(x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad="SAME", dtype=tf.float32, collections=None, summary_tag=None): with tf.variable_scope(name): stride_shape = [1, stride[0], stride[1], 1] filter_shape = [filter_size[0], filter_size[1], int(x.get_shape()[3]), num_filters] # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = intprod(filter_shape[:3]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = intprod(filter_shape[:2]) * num_filters # initialize weights with random weights w_bound = np.sqrt(6. / (fan_in + fan_out)) w = tf.get_variable("W", filter_shape, dtype, tf.random_uniform_initializer(-w_bound, w_bound), collections=collections) b = tf.get_variable("b", [1, 1, 1, num_filters], initializer=tf.zeros_initializer(), collections=collections) if summary_tag is not None: tf.summary.image(summary_tag, tf.transpose(tf.reshape(w, [filter_size[0], filter_size[1], -1, 1]), [2, 0, 1, 3]), max_images=10) return tf.nn.conv2d(x, w, stride_shape, pad) + b
def source_embedding(self): """Returns the embedding used for the source sequence. """ return tf.get_variable( name="W", shape=[self.source_vocab_info.total_size, self.params["embedding.dim"]], initializer=tf.random_uniform_initializer( -self.params["embedding.init_scale"], self.params["embedding.init_scale"]))