def select_present(x, presence, batch_size=1, name='select_present'): with tf.variable_scope(name): presence = 1 - tf.to_int32(presence) # invert mask bs = x.get_shape()[0] if bs != None: # here type(bs) is tf.Dimension and == is ok batch_size = int(bs) num_partitions = 2 * batch_size r = tf.range(0, num_partitions, 2) r.set_shape(tf.TensorShape(batch_size)) r = broadcast_against(r, presence) presence += r selected = tf.dynamic_partition(x, presence, num_partitions) selected = tf.concat(axis=0, values=selected) selected = tf.reshape(selected, tf.shape(x)) return selected
def soft_attn(self, top_recur): """""" reuse = (self.moving_params is not None) or None input_size = top_recur.get_shape().as_list()[-1] with tf.variable_scope('MLP', reuse=reuse): head_mlp, dep_mlp = self.MLP(top_recur, self.info_mlp_size, func=self.info_func, keep_prob=self.info_keep_prob, n_splits=2) with tf.variable_scope('Arcs', reuse=reuse): arc_logits = self.bilinear_classifier(dep_mlp, head_mlp, keep_prob=self.info_keep_prob) arc_prob = self.softmax(arc_logits) head_lin = tf.batch_matmul(arc_prob, top_recur) top_recur = tf.concat(2, [top_recur, head_lin]) top_recur.set_shape([tf.Dimension(None), tf.Dimension(None), tf.Dimension(4*self.recur_size)]) return top_recur #=============================================================
def broadcast_add(inputs1, inputs2): """""" inputs1_shape = tf.shape(inputs1) inputs_size = inputs1.get_shape().as_list()[-1] inputs2_shape = tf.shape(inputs2) inputs1 = tf.transpose(inputs1, [0,2,1]) inputs2 = tf.transpose(inputs2, [0,2,1]) inputs1 = tf.reshape(inputs1, tf.pack([-1,inputs1_shape[1],1])) inputs2 = tf.reshape(inputs2, tf.pack([-1,1,inputs2_shape[1]])) inputs = inputs1 + inputs2 inputs = tf.reshape(inputs, [inputs1_shape[0], inputs1_shape[2], inputs1_shape[1], inputs2_shape[1]]) inputs = tf.transpose(inputs, [0,2,3,1]) inputs.set_shape([tf.Dimension(None)]*3 + [tf.Dimension(inputs_size)]) return inputs #===============================================================
def broadcast_mult(inputs1, inputs2): """""" inputs1_shape = tf.shape(inputs1) inputs_size = inputs1.get_shape().as_list()[-1] inputs2_shape = tf.shape(inputs2) inputs1 = tf.transpose(inputs1, [0,2,1]) inputs2 = tf.transpose(inputs2, [0,2,1]) inputs1 = tf.reshape(inputs1, tf.pack([-1,inputs1_shape[1],1])) inputs2 = tf.reshape(inputs2, tf.pack([-1,1,inputs2_shape[1]])) inputs = inputs1 * inputs2 inputs = tf.reshape(inputs, tf.pack([inputs1_shape[0], inputs1_shape[2], inputs1_shape[1], inputs2_shape[1]])) inputs = tf.transpose(inputs, [0,2,3,1]) inputs.set_shape([tf.Dimension(None)]*3 + [tf.Dimension(inputs_size)]) return inputs #***************************************************************
def weighted_average(self, inputs, moving_params=None): """""" input_shape = tf.shape(inputs) batch_size = input_shape[0] bucket_size = input_shape[1] input_size = len(self) if moving_params is not None: trainable_embeddings = moving_params.average(self.trainable_embeddings) else: trainable_embeddings = self.trainable_embeddings embed_input = tf.matmul(tf.reshape(inputs, [-1, input_size]), trainable_embeddings) embed_input = tf.reshape(embed_input, tf.pack([batch_size, bucket_size, self.embed_size])) embed_input.set_shape([tf.Dimension(None), tf.Dimension(None), tf.Dimension(self.embed_size)]) if moving_params is None: tf.add_to_collection('Weights', embed_input) return embed_input #=============================================================
def flatten(self, keep_prob=1): """ Flattens 4D Tensor (from Conv Layer) into 2D Tensor (to FC Layer) :param keep_prob: int. set to 1 for no dropout """ self.count['flat'] += 1 scope = 'flat_' + str(self.count['flat']) with tf.variable_scope(scope): # Reshape function input_nodes = tf.Dimension( self.input.get_shape()[1] * self.input.get_shape()[2] * self.input.get_shape()[3]) output_shape = tf.stack([-1, input_nodes]) self.input = tf.reshape(self.input, output_shape) # Dropout function if keep_prob != 1: self.input = tf.nn.dropout(self.input, keep_prob=keep_prob) print(scope + ' output: ' + str(self.input.get_shape()))
def batch_repeat_unpack(x, repeats=1, name=None): with tf.name_scope(name, "batch-repeat-unpack", values=[x]): # x.shape = (batches, repeats, ...) # reshape to (batches * repeats, ...) shape = tf.concat([[-1], [repeats], tf.shape(x)[1:]], axis=0) t = tf.reshape(x, shape=shape) repeats_dim = tf.Dimension(repeats) t.set_shape( tf.TensorShape([ x.get_shape()[0] // repeats_dim, repeats_dim ]).concatenate(x.get_shape()[1:]) ) return t
def count_trainable_parameters(print_model=False): """Count the number of trainable parameters is the current graph. Returns: count: the number of trainable parameters""" total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() if print_model: print(variable) variable_parametes = 1 for dim in shape: variable_parametes *= dim.value total_parameters += variable_parametes return total_parameters
def count_number_of_parameters(): total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parametes = 1 for dim in shape: variable_parametes *= dim.value total_parameters += variable_parametes return total_parameters
def get_inference_input(inputs, params): dataset = tf.data.Dataset.from_tensor_slices( tf.constant(inputs) ) # Split string dataset = dataset.map(lambda x: tf.string_split([x]).values, num_parallel_calls=params.num_threads) # Append <eos> dataset = dataset.map( lambda x: tf.concat([x, [tf.constant(params.eos)]], axis=0), num_parallel_calls=params.num_threads ) # Convert tuple to dictionary dataset = dataset.map( lambda x: {"source": x, "source_length": tf.shape(x)[0]}, num_parallel_calls=params.num_threads ) dataset = dataset.padded_batch( params.decode_batch_size, {"source": [tf.Dimension(None)], "source_length": []}, {"source": params.pad, "source_length": 0} ) iterator = dataset.make_one_shot_iterator() features = iterator.get_next() src_table = tf.contrib.lookup.index_table_from_tensor( tf.constant(params.vocabulary["source"]), default_value=params.mapping["source"][params.unk] ) features["source"] = src_table.lookup(features["source"]) return features
def count_trainable_param_number(): """Count total number of parameters of trainable parameters. """ total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parametes = 1 for dim in shape: variable_parametes *= dim.value total_parameters += variable_parametes return total_parameters
def print_trainable_variables(self): size = tf.Dimension(0) print('*' * 80) for v in tf.trainable_variables(): print('{}[{}]'.format(v.name, v.shape)) size += np.prod(v.shape) print('TOTAL SIZE: {}\n{}'.format(size, '*' * 80))
def setUp(self): d_7 = tf.Dimension(7) p_rgb = ['red', 'green', 'blue'] self.i_7 = core.Axis('7', d_7) self.i_7p = core.Axis('7prime', d_7) self.i_rgb = core.Axis('rgb', p_rgb) self.i_range = core.Axis('range', range(7)) self.i_unknown = core.Axis('unknown', None)
def test_axis_value(self): self.assertEqual(self.i_7.value, tf.Dimension(7)) self.assertTrue(self.i_range.value == tuple(range(7)))
def test_repr(self): self.assertEqual("Axis('7', Dimension(7))", repr(self.i_7))
def setUp(self): d_7 = tf.Dimension(7) d_8 = tf.Dimension(8) p_rgb = ['red', 'green', 'blue'] p_range = range(7) self.i_8 = core.Axis('8', d_8) self.a0 = core.Axes([('d7', d_7)]) self.a1 = core.Axes([('d7', d_7)]) self.a2 = core.Axes([('d7', d_7), ('rgb', p_rgb)]) self.a3 = core.Axes([('8', d_8), ('range', p_range)])
def test_repr(self): self.assertEqual("Axes([('d7', Dimension(7))])", repr(self.a0))
def setUp(self): tensor = tf.ones([7, 3, 8, 1]) a0 = ('x', range(7)) a1 = ('channel', ['red', 'green', 'blue']) a2 = ('y', 8) a3 = ('z', tf.Dimension(1)) self.lt = core.LabeledTensor(tensor, [a0, a1, a2, a3])
def test_repr(self): pattern = textwrap.dedent("""\ <LabeledTensor '...' shape=(7, 3, 8, 1) dtype=float32 axes=[('x', ...), ('channel', ...), ('y', Dimension(8)), ('z', Dimension(1))]>""") regexp = re.escape(pattern).replace(re.escape('...'), '.*') self.assertRegexpMatches(repr(self.lt), regexp)
def linear_classifier(self, inputs, n_classes, add_bias=True): """""" n_dims = len(inputs.get_shape().as_list()) batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_size = n_classes output_shape = tf.pack([batch_size] + [bucket_size]*(n_dims-2) + [output_size]) if self.moving_params is None: if self.drop_gradually: s = self.global_sigmoid keep_prob = s + (1-s)*self.mlp_keep_prob else: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.pack([batch_size] + [1]*(n_dims-2) +[input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) inputs = tf.reshape(inputs, [-1, input_size]) output = linalg.linear(inputs, output_size, add_bias=add_bias, initializer=tf.zeros_initializer, moving_params=self.moving_params) output = tf.reshape(output, output_shape) output.set_shape([tf.Dimension(None)]*(n_dims-1) + [tf.Dimension(output_size)]) return output #=============================================================
def assert_shape(variable, shape): """Assert that a TensorFlow Variable has a particular shape. Args: variable: TF Variable shape: a TensorShape, Dimension or tuple """ variable.get_shape().assert_is_compatible_with(shape)
def expand_dims_for_broadcast(low_tensor, high_tensor): """Expand the dimensions of a lower-rank tensor, so that its rank matches that of a higher-rank tensor. This makes it possible to perform broadcast operations between low_tensor and high_tensor. Args: low_tensor (Tensor): lower-rank Tensor with shape [s_0, ..., s_p] high_tensor (Tensor): higher-rank Tensor with shape [s_0, ..., s_p, ..., s_n] Note that the shape of low_tensor must be a prefix of the shape of high_tensor. Returns: Tensor: the lower-rank tensor, but with shape expanded to be [s_0, ..., s_p, 1, 1, ..., 1] """ orig_shape = tf.shape(low_tensor) orig_rank = tf.rank(low_tensor) target_rank = tf.rank(high_tensor) # assert that shapes are compatible assert_op = assert_broadcastable(low_tensor, high_tensor) with tf.control_dependencies([assert_op]): pad_shape = tf.tile([1], [target_rank - orig_rank]) new_shape = tf.concat(0, [orig_shape, pad_shape]) result = tf.reshape(low_tensor, new_shape) # add static shape information high_shape_static = high_tensor.get_shape() low_shape_static = low_tensor.get_shape() extra_rank = high_shape_static.ndims - low_shape_static.ndims result_dims = list(low_shape_static.dims) + [tf.Dimension(1)] * extra_rank result_shape = tf.TensorShape(result_dims) result.set_shape(result_shape) return result
def __init__(self, align='left', seq_length=None, dtype=tf.int32, name='FeedSequenceBatch'): """Create a Feedable SequenceBatch. Args: align (str): can be 'left' or 'right'. If 'left', values will be left-aligned, with padding on the right. If 'right', values will be right-aligned, with padding on the left. Default is 'left'. seq_length (int): the Tensor representing the SequenceBatch will have exactly this many columns. Default is None. If None, seq_length will be dynamically determined. dtype: data type of the SequenceBatch values array. Defaults to int32. name (str): namescope for the Tensors created inside this Model. """ if align not in ('left', 'right'): raise ValueError("align must be either 'left' or 'right'.") self._align_right = (align == 'right') self._seq_length = seq_length with tf.name_scope(name): values = tf.placeholder(dtype, shape=[None, None], name='values') # (batch_size, seq_length) mask = tf.placeholder(tf.float32, shape=[None, None], name='mask') # (batch_size, seq_length) if self._seq_length is not None: # add static shape information batch_dim, _ = values.get_shape() new_shape = tf.TensorShape([batch_dim, tf.Dimension(seq_length)]) values.set_shape(new_shape) mask.set_shape(new_shape) super(FeedSequenceBatch, self).__init__(values, mask)
def RNN(self, inputs): """""" input_size = inputs.get_shape().as_list()[-1] cell = self.recur_cell(self._config, input_size=input_size, moving_params=self.moving_params) lengths = tf.reshape(tf.to_int64(self.sequence_lengths), [-1]) if self.moving_params is None: ff_keep_prob = self.ff_keep_prob recur_keep_prob = self.recur_keep_prob else: ff_keep_prob = 1 recur_keep_prob = 1 if self.recur_bidir: top_recur, fw_recur, bw_recur = rnn.dynamic_bidirectional_rnn(cell, cell, inputs, lengths, ff_keep_prob=ff_keep_prob, recur_keep_prob=recur_keep_prob, dtype=tf.float32) fw_cell, fw_out = tf.split(1, 2, fw_recur) bw_cell, bw_out = tf.split(1, 2, bw_recur) end_recur = tf.concat(1, [fw_out, bw_out]) top_recur.set_shape([tf.Dimension(None), tf.Dimension(None), tf.Dimension(2*self.recur_size)]) else: top_recur, end_recur = rnn.dynamic_rnn(cell, inputs, lengths, ff_keep_prob=ff_keep_prob, recur_keep_prob=recur_keep_prob, dtype=tf.float32) top_recur.set_shape([tf.Dimension(None), tf.Dimension(None), tf.Dimension(self.recur_size)]) return top_recur, end_recur #=============================================================
def linear(self, inputs, output_size, n_splits=1, add_bias=False): """""" n_dims = len(inputs.get_shape().as_list()) batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_shape = tf.pack([batch_size] + [bucket_size]*(n_dims-2) + [output_size]) shape_to_set = [tf.Dimension(None)]*(n_dims-1) + [tf.Dimension(output_size)] if self.moving_params is None: keep_prob = self.info_keep_prob else: keep_prob = 1 if keep_prob < 1: noise_shape = tf.pack([batch_size] + [1]*(n_dims-2) + [input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) lin = linalg.linear(inputs, output_size, n_splits=n_splits, add_bias=add_bias, moving_params=self.moving_params) if n_splits == 1: lin = [lin] for i, split in enumerate(lin): split.set_shape(shape_to_set) if n_splits == 1: return lin[0] else: return lin #=============================================================
def MLP(self, inputs, output_size, func=None, keep_prob=None, n_splits=1): """""" n_dims = len(inputs.get_shape().as_list()) batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_shape = tf.pack([batch_size] + [bucket_size]*(n_dims-2) + [output_size]) shape_to_set = [tf.Dimension(None)]*(n_dims-1) + [tf.Dimension(output_size)] if func is None: func = self.mlp_func if self.moving_params is None: if keep_prob is None: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if keep_prob < 1: noise_shape = tf.pack([batch_size] + [1]*(n_dims-2) + [input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) linear = linalg.linear(inputs, output_size, n_splits=n_splits * (1+(func.__name__ in ('gated_tanh', 'gated_identity'))), add_bias=True, moving_params=self.moving_params) if func.__name__ in ('gated_tanh', 'gated_identity'): linear = [tf.concat(n_dims-1, [lin1, lin2]) for lin1, lin2 in zip(linear[:len(linear)//2], linear[len(linear)//2:])] if n_splits == 1: linear = [linear] for i, split in enumerate(linear): split = func(split) split.set_shape(shape_to_set) linear[i] = split if n_splits == 1: return linear[0] else: return linear #=============================================================
def double_MLP(self, inputs, n_splits=1): """""" batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_size = self.attn_mlp_size output_shape = tf.pack([batch_size, bucket_size, bucket_size, output_size]) shape_to_set = [tf.Dimension(None), tf.Dimension(None), tf.Dimension(None), tf.Dimension(output_size)] if self.moving_params is None: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.pack([batch_size, 1, input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) lin1, lin2 = linalg.linear(inputs, output_size*n_splits, n_splits=2, add_bias=True, moving_params=self.moving_params) lin1 = tf.reshape(tf.transpose(lin1, [0, 2, 1]), tf.pack([-1, bucket_size, 1])) lin2 = tf.reshape(tf.transpose(lin2, [0, 2, 1]), tf.pack([-1, 1, bucket_size])) lin = lin1 + lin2 lin = tf.reshape(lin, tf.pack([batch_size, n_splits*output_size, bucket_size, bucket_size])) lin = tf.transpose(lin, [0,2,3,1]) top_mlps = tf.split(3, n_splits, self.mlp_func(lin)) for top_mlp in top_mlps: top_mlp.set_shape(shape_to_set) if n_splits == 1: return top_mlps[0] else: return top_mlps #=============================================================
def conditional_diagonal_bilinear_classifier(self, inputs1, inputs2, n_classes, probs, add_bias1=True, add_bias2=True): """""" input_shape = tf.shape(inputs1) batch_size = input_shape[0] bucket_size = input_shape[1] input_size = inputs1.get_shape().as_list()[-1] input_shape_to_set = [tf.Dimension(None), tf.Dimension(None), input_size+1] output_shape = tf.pack([batch_size, bucket_size, n_classes, bucket_size]) if len(probs.get_shape().as_list()) == 2: probs = tf.to_float(tf.one_hot(tf.to_int64(probs), bucket_size, 1, 0)) else: probs = tf.stop_gradient(probs) if self.moving_params is None: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.pack([batch_size, 1, input_size]) inputs1 = tf.nn.dropout(inputs1, tf.sqrt(keep_prob), noise_shape=noise_shape) inputs2 = tf.nn.dropout(inputs2, tf.sqrt(keep_prob), noise_shape=noise_shape) inputs1 = tf.concat(2, [inputs1, tf.ones(tf.pack([batch_size, bucket_size, 1]))]) inputs1.set_shape(input_shape_to_set) inputs2 = tf.concat(2, [inputs2, tf.ones(tf.pack([batch_size, bucket_size, 1]))]) inputs2.set_shape(input_shape_to_set) bilin = linalg.diagonal_bilinear(inputs1, inputs2, n_classes, add_bias1=add_bias1, add_bias2=add_bias2, initializer=tf.zeros_initializer, moving_params=self.moving_params) weighted_bilin = tf.batch_matmul(bilin, tf.expand_dims(probs, 3)) return weighted_bilin, bilin #=============================================================
def conditional_bilinear_classifier(self, inputs1, inputs2, n_classes, probs, add_bias1=True, add_bias2=True): """""" input_shape = tf.shape(inputs1) batch_size = input_shape[0] bucket_size = input_shape[1] input_size = inputs1.get_shape().as_list()[-1] input_shape_to_set = [tf.Dimension(None), tf.Dimension(None), input_size+1] output_shape = tf.pack([batch_size, bucket_size, n_classes, bucket_size]) if len(probs.get_shape().as_list()) == 2: probs = tf.to_float(tf.one_hot(tf.to_int64(probs), bucket_size, 1, 0)) else: probs = tf.stop_gradient(probs) if self.moving_params is None: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.pack([batch_size, 1, input_size]) inputs1 = tf.nn.dropout(inputs1, keep_prob, noise_shape=noise_shape) inputs2 = tf.nn.dropout(inputs2, keep_prob, noise_shape=noise_shape) inputs1 = tf.concat(2, [inputs1, tf.ones(tf.pack([batch_size, bucket_size, 1]))]) inputs1.set_shape(input_shape_to_set) inputs2 = tf.concat(2, [inputs2, tf.ones(tf.pack([batch_size, bucket_size, 1]))]) inputs2.set_shape(input_shape_to_set) bilin = linalg.bilinear(inputs1, inputs2, n_classes, add_bias1=add_bias1, add_bias2=add_bias2, initializer=tf.zeros_initializer, moving_params=self.moving_params) weighted_bilin = tf.batch_matmul(bilin, tf.expand_dims(probs, 3)) return weighted_bilin, bilin #=============================================================
def fc(self, output_nodes, keep_prob=1, activation_fn=tf.nn.relu, b_value=0.0, s_value=1.0, bn=True, trainable=True): """ Fully Connected Layer :param output_nodes: int :param keep_prob: int. set to 1 for no dropout :param activation_fn: tf.nn function :param b_value: float or None :param s_value: float or None :param bn: bool """ self.count['fc'] += 1 scope = 'fc_' + str(self.count['fc']) with tf.variable_scope(scope): # Flatten if necessary if len(self.input.get_shape()) == 4: input_nodes = tf.Dimension( self.input.get_shape()[1] * self.input.get_shape()[2] * self.input.get_shape()[3]) output_shape = tf.stack([-1, input_nodes]) self.input = tf.reshape(self.input, output_shape) # Matrix Multiplication Function input_nodes = self.input.get_shape()[1] output_shape = [input_nodes, output_nodes] w = self.weight_variable(name='weights', shape=output_shape, trainable=trainable) self.input = tf.matmul(self.input, w) if bn is True: # batch normalization self.input = self.batch_norm(self.input, 'fc') if b_value is not None: # bias value b = self.const_variable(name='bias', shape=[output_nodes], value=b_value, trainable=trainable) self.input = tf.add(self.input, b) if s_value is not None: # scale value s = self.const_variable(name='scale', shape=[output_nodes], value=s_value, trainable=trainable) self.input = tf.multiply(self.input, s) if activation_fn is not None: # activation function self.input = activation_fn(self.input) if keep_prob != 1: # dropout function self.input = tf.nn.dropout(self.input, keep_prob=keep_prob) print(scope + ' output: ' + str(self.input.get_shape()))
def dilated_conv1d(inputs, out_channels, filter_width=2, rate=1, padding='VALID', name=None, gain=np.sqrt(2), activation=tf.nn.relu): ''' Args: inputs: (tensor) output_channels: filter_width: rate: padding: name: gain: activation: Outputs: outputs: (tensor) ''' assert name with tf.variable_scope(name): _, width, _ = inputs.get_shape().as_list() inputs_ = time_to_batch(inputs, rate=rate) outputs_ = conv1d(inputs_, out_channels=out_channels, filter_width=filter_width, padding=padding, gain=gain, activation=activation) _, conv_out_width, _ = outputs_.get_shape().as_list() new_width = conv_out_width * rate diff = new_width - width outputs = batch_to_time(outputs_, rate=rate, crop_left=diff) # Add additional shape information. tensor_shape = [tf.Dimension(None), tf.Dimension(width), tf.Dimension(out_channels)] outputs.set_shape(tf.TensorShape(tensor_shape)) return outputs
def log_number_of_params(): total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() #tf.logging.info('Shape: %s', shape) #tf.logging.info('shape length: %s', len(shape)) variable_parametes = 1 for dim in shape: #tf.logging.info('dim: %s', dim) variable_parametes *= dim.value #tf.logging.info('variable params: %s', variable_parametes) total_parameters += variable_parametes tf.logging.info('Total number of parameters: %s', total_parameters)
def get_evaluation_input(inputs, params): with tf.device("/cpu:0"): # Create datasets datasets = [] for data in inputs: dataset = tf.data.Dataset.from_tensor_slices(data) # Split string dataset = dataset.map(lambda x: tf.string_split([x]).values, num_parallel_calls=params.num_threads) # Append <eos> dataset = dataset.map( lambda x: tf.concat([x, [tf.constant(params.eos)]], axis=0), num_parallel_calls=params.num_threads ) datasets.append(dataset) dataset = tf.data.Dataset.zip(tuple(datasets)) # Convert tuple to dictionary dataset = dataset.map( lambda *x: { "source": x[0], "source_length": tf.shape(x[0])[0], "references": x[1:] }, num_parallel_calls=params.num_threads ) dataset = dataset.padded_batch( params.eval_batch_size, { "source": [tf.Dimension(None)], "source_length": [], "references": (tf.Dimension(None),) * (len(inputs) - 1) }, { "source": params.pad, "source_length": 0, "references": (params.pad,) * (len(inputs) - 1) } ) iterator = dataset.make_one_shot_iterator() features = iterator.get_next() src_table = tf.contrib.lookup.index_table_from_tensor( tf.constant(params.vocabulary["source"]), default_value=params.mapping["source"][params.unk] ) tgt_table = tf.contrib.lookup.index_table_from_tensor( tf.constant(params.vocabulary["target"]), default_value=params.mapping["target"][params.unk] ) features["source"] = src_table.lookup(features["source"]) features["references"] = tuple( tgt_table.lookup(item) for item in features["references"] ) return features
def show_trainable_parameters(verbose=False): """Shows the number of trainable parameters in this graph. Parameters ---------- verbose: Boolean, optional Show additional information and list the number of trainable variables per variable, not just the total sum. """ total_width = 80 trainable_vars = tf.trainable_variables() if len(trainable_vars) == 0: print("No model-params found.") return if verbose: print("-" * total_width) total_parameters = 0 groups = {} for var in trainable_vars: # shape is an array of tf.Dimension shape = var.get_shape() var_params = 1 for dim in shape: var_params *= dim.value if verbose: print("{:69} | {:8d}".format(var.name, var_params)) total_parameters += var_params group_name = var.name.split('/')[0] if group_name in groups: groups[group_name] += var_params else: groups.update({group_name: var_params}) print("-" * total_width) for group, count in groups.iteritems(): print("{:69} | {:8d}".format(group, count)) print("=" * total_width) print("{:69} | {:8d}".format("TOTAL", total_parameters)) print("-" * total_width)
def MLP(self, inputs, n_splits=1): """""" n_dims = len(inputs.get_shape().as_list()) batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_size = self.mlp_size output_shape = tf.pack([batch_size] + [bucket_size]*(n_dims-2) + [output_size]) shape_to_set = [tf.Dimension(None)]*(n_dims-1) + [tf.Dimension(output_size)] if self.moving_params is None: if self.drop_gradually: s = self.global_sigmoid keep_prob = s + (1-s)*self.mlp_keep_prob else: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.pack([batch_size] + [1]*(n_dims-2) + [input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) linear = linalg.linear(inputs, output_size, n_splits=n_splits, add_bias=True, moving_params=self.moving_params) if n_splits == 1: linear = [linear] for i, split in enumerate(linear): split = self.mlp_func(split) split.set_shape(shape_to_set) linear[i] = split if self.moving_params is None: with tf.variable_scope('Linear', reuse=True): matrix = tf.get_variable('Weights') I = tf.diag(tf.ones([self.mlp_size])) for W in tf.split(1, n_splits, matrix): WTWmI = tf.matmul(W, W, transpose_a=True) - I tf.add_to_collection('ortho_losses', tf.nn.l2_loss(WTWmI)) for split in linear: tf.add_to_collection('covar_losses', self.covar_loss(split)) if n_splits == 1: return linear[0] else: return linear #=============================================================
def double_MLP(self, inputs, n_splits=1): """""" batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_size = self.mlp_size output_shape = tf.pack([batch_size, bucket_size, bucket_size, output_size]) shape_to_set = [tf.Dimension(None), tf.Dimension(None), tf.Dimension(None), tf.Dimension(output_size)] if self.moving_params is None: if self.drop_gradually: s = self.global_sigmoid keep_prob = s + (1-s)*self.mlp_keep_prob else: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.pack([batch_size, 1, input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) lin1, lin2 = linalg.linear(inputs, output_size*n_splits, n_splits=2, add_bias=True, moving_params=self.moving_params) lin1 = tf.reshape(tf.transpose(lin1, [0, 2, 1]), tf.pack([-1, bucket_size, 1])) lin2 = tf.reshape(tf.transpose(lin2, [0, 2, 1]), tf.pack([-1, 1, bucket_size])) lin = lin1 + lin2 lin = tf.reshape(lin, tf.pack([batch_size, n_splits*output_size, bucket_size, bucket_size])) lin = tf.transpose(lin, [0,2,3,1]) top_mlps = tf.split(3, n_splits, self.mlp_func(lin)) for top_mlp in top_mlps: top_mlp.set_shape(shape_to_set) if self.moving_params is None: with tf.variable_scope('Linear', reuse=True): matrix = tf.get_variable('Weights') I = tf.diag(tf.ones([self.mlp_size])) for W in tf.split(1, 2*n_splits, matrix): WTWmI = tf.matmul(W, W, transpose_a=True) - I tf.add_to_collection('ortho_losses', tf.nn.l2_loss(WTWmI)) for split in top_mlps: tf.add_to_collection('covar_losses', self.covar_loss(split)) if n_splits == 1: return top_mlps[0] else: return top_mlps #=============================================================
def conditional_diagonal_bilinear_classifier(self, inputs1, inputs2, n_classes, probs, add_bias1=True, add_bias2=True): """""" input_shape = tf.shape(inputs1) batch_size = input_shape[0] bucket_size = input_shape[1] input_size = inputs1.get_shape().as_list()[-1] input_shape_to_set = [tf.Dimension(None), tf.Dimension(None), input_size+1] output_shape = tf.pack([batch_size, bucket_size, n_classes, bucket_size]) if len(probs.get_shape().as_list()) == 2: probs = tf.to_float(tf.one_hot(tf.to_int64(probs), bucket_size, 1, 0)) else: probs = tf.stop_gradient(probs) if self.moving_params is None: if self.drop_gradually: s = self.global_sigmoid keep_prob = s + (1-s)*self.mlp_keep_prob else: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.pack([batch_size, 1, input_size]) inputs1 = tf.nn.dropout(inputs1, tf.sqrt(keep_prob), noise_shape=noise_shape) inputs2 = tf.nn.dropout(inputs2, tf.sqrt(keep_prob), noise_shape=noise_shape) inputs1 = tf.concat(2, [inputs1, tf.ones(tf.pack([batch_size, bucket_size, 1]))]) inputs1.set_shape(input_shape_to_set) inputs2 = tf.concat(2, [inputs2, tf.ones(tf.pack([batch_size, bucket_size, 1]))]) inputs2.set_shape(input_shape_to_set) bilin = linalg.diagonal_bilinear(inputs1, inputs2, n_classes, add_bias1=add_bias1, add_bias2=add_bias2, initializer=tf.zeros_initializer, moving_params=self.moving_params) weighted_bilin = tf.batch_matmul(bilin, tf.expand_dims(probs, 3)) return weighted_bilin, bilin #=============================================================
def conditional_bilinear_classifier(self, inputs1, inputs2, n_classes, probs, add_bias1=True, add_bias2=True): """""" input_shape = tf.shape(inputs1) batch_size = input_shape[0] bucket_size = input_shape[1] input_size = inputs1.get_shape().as_list()[-1] input_shape_to_set = [tf.Dimension(None), tf.Dimension(None), input_size+1] output_shape = tf.pack([batch_size, bucket_size, n_classes, bucket_size]) if len(probs.get_shape().as_list()) == 2: probs = tf.to_float(tf.one_hot(tf.to_int64(probs), bucket_size, 1, 0)) else: probs = tf.stop_gradient(probs) if self.moving_params is None: if self.drop_gradually: s = self.global_sigmoid keep_prob = s + (1-s)*self.mlp_keep_prob else: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.pack([batch_size, 1, input_size]) inputs1 = tf.nn.dropout(inputs1, keep_prob, noise_shape=noise_shape) inputs2 = tf.nn.dropout(inputs2, keep_prob, noise_shape=noise_shape) inputs1 = tf.concat(2, [inputs1, tf.ones(tf.pack([batch_size, bucket_size, 1]))]) inputs1.set_shape(input_shape_to_set) inputs2 = tf.concat(2, [inputs2, tf.ones(tf.pack([batch_size, bucket_size, 1]))]) inputs2.set_shape(input_shape_to_set) bilin = linalg.bilinear(inputs1, inputs2, n_classes, add_bias1=add_bias1, add_bias2=add_bias2, initializer=tf.zeros_initializer, moving_params=self.moving_params) weighted_bilin = tf.batch_matmul(bilin, tf.expand_dims(probs, 3)) return weighted_bilin, bilin #=============================================================
def linear(inputs, output_size, add_bias=True, n_splits=1, initializer=None, scope=None, moving_params=None): """""" if not isinstance(inputs, (list, tuple)): inputs = [inputs] output_size *= n_splits with tf.variable_scope(scope or 'Linear'): # Reformat the input total_input_size = 0 shapes = [a.get_shape().as_list() for a in inputs] for shape in shapes: total_input_size += shape[-1] input_shape = tf.shape(inputs[0]) output_shape = [] for i in xrange(len(shapes[0])): output_shape.append(input_shape[i]) output_shape[-1] = output_size output_shape = tf.pack(output_shape) for i, (input_, shape) in enumerate(zip(inputs, shapes)): inputs[i] = tf.reshape(input_, [-1, shape[-1]]) concatenation = tf.concat(1, inputs) # Get the matrix if initializer is None and moving_params is None: mat = orthonormal_initializer(total_input_size, output_size//n_splits) mat = np.concatenate([mat]*n_splits, axis=1) initializer = tf.constant_initializer(mat) matrix = tf.get_variable('Weights', [total_input_size, output_size], initializer=initializer) if moving_params is not None: matrix = moving_params.average(matrix) else: tf.add_to_collection('Weights', matrix) # Get the bias if add_bias: bias = tf.get_variable('Biases', [output_size], initializer=tf.zeros_initializer) if moving_params is not None: bias = moving_params.average(bias) else: bias = 0 # Do the multiplication new = tf.matmul(concatenation, matrix) + bias new = tf.reshape(new, output_shape) new.set_shape([tf.Dimension(None) for _ in xrange(len(shapes[0])-1)] + [tf.Dimension(output_size)]) if n_splits > 1: return tf.split(len(new.get_shape().as_list())-1, n_splits, new) else: return new #===============================================================
def diagonal_bilinear(inputs1, inputs2, output_size, add_bias2=True, add_bias1=True, add_bias=False, initializer=None, scope=None, moving_params=None): """""" with tf.variable_scope(scope or 'Bilinear'): # Reformat the inputs ndims = len(inputs1.get_shape().as_list()) inputs1_shape = tf.shape(inputs1) inputs2_shape = tf.shape(inputs2) inputs1_bucket_size = inputs1_shape[ndims-2] inputs2_bucket_size = inputs2_shape[ndims-2] inputs1_size = inputs1.get_shape().as_list()[-1] inputs2_size = inputs2.get_shape().as_list()[-1] assert inputs1_size == inputs2_size output_shape = [] batch_size = 1 for i in xrange(ndims-2): batch_size *= inputs1_shape[i] output_shape.append(inputs1_shape[i]) output_shape.append(inputs1_bucket_size) output_shape.append(output_size) output_shape.append(inputs2_bucket_size) output_shape = tf.pack(output_shape) inputs1 = tf.reshape(inputs1, tf.pack([batch_size, inputs1_bucket_size, inputs1_size])) inputs2 = tf.reshape(inputs2, tf.pack([batch_size, inputs2_bucket_size, inputs2_size])) inputs1.set_shape([tf.Dimension(None)]*2 + [tf.Dimension(inputs1_size)]) inputs2.set_shape([tf.Dimension(None)]*2 + [tf.Dimension(inputs2_size)]) inputs = broadcast_mult(inputs1, inputs2) with tf.variable_scope('Bilinear'): bilin = linear(inputs, output_size, add_bias=add_bias, initializer=initializer, scope=scope, moving_params=moving_params) with tf.variable_scope('Linear1'): lin1 = linear(inputs1, output_size, add_bias=False, initializer=initializer, scope=scope, moving_params=moving_params) lin1 = tf.expand_dims(lin1, 2) with tf.variable_scope('Linear2'): lin2 = linear(inputs2, output_size, add_bias=False, initializer=initializer, scope=scope, moving_params=moving_params) lin2 = tf.expand_dims(lin2, 1) bilin = tf.transpose(bilin+lin1+lin2, [0,1,3,2]) return bilin #===============================================================
def set_shape(tensor, shape): """ This function will filling the missing shape information of given tensor """ if not is_tensor(tensor): raise ValueError('tensor must be instance of `Tensor`.') # ====== Test ====== # ndims = tensor.get_shape().ndims shape = as_tuple(shape) if ndims != len(shape): raise ValueError("The tensor has %d dimensions, but the given shape " "has %d dimension." % (ndims, len(shape))) # ====== DO it ====== # old_shape = tensor.get_shape() new_shape = [] for old, new in zip(old_shape, shape): old_value = old.value if isinstance(new, tf.Dimension): new = new.value # matching old and new values if old_value is not None and new is not None: if old_value != new: raise ValueError("Known shape information mismatch, from tensorflow" ":%s, and given shape:%s." % (str(old_shape.as_list()), str(shape))) else: new_shape.append(old_value) elif old_value is None and new is not None: new_shape.append(new) elif old_value is not None and new is None: new_shape.append(old_value) elif old is None and new is None: new_shape.append(old) else: new_shape.append(None) tensor.set_shape(new_shape) return tensor # =========================================================================== # VALUE MANIPULATION # ===========================================================================