def get_weight_variable(shape, name=None, type='xavier_uniform', regularize=True, **kwargs): initialise_from_constant = False if type == 'xavier_uniform': initial = xavier_initializer(uniform=True, dtype=tf.float32) elif type == 'xavier_normal': initial = xavier_initializer(uniform=False, dtype=tf.float32) elif type == 'he_normal': initial = variance_scaling_initializer(uniform=False, factor=2.0, mode='FAN_IN', dtype=tf.float32) elif type == 'he_uniform': initial = variance_scaling_initializer(uniform=True, factor=2.0, mode='FAN_IN', dtype=tf.float32) elif type == 'caffe_uniform': initial = variance_scaling_initializer(uniform=True, factor=1.0, mode='FAN_IN', dtype=tf.float32) elif type == 'simple': stddev = kwargs.get('stddev', 0.02) initial = tf.truncated_normal(shape, stddev=stddev, dtype=tf.float32) initialise_from_constant = True elif type == 'bilinear': weights = _bilinear_upsample_weights(shape) initial = tf.constant(weights, shape=shape, dtype=tf.float32) initialise_from_constant = True else: raise ValueError('Unknown initialisation requested: %s' % type) if name is None: # This keeps to option open to use unnamed Variables weight = tf.Variable(initial) else: if initialise_from_constant: weight = tf.get_variable(name, initializer=initial) else: weight = tf.get_variable(name, shape=shape, initializer=initial) if regularize: tf.add_to_collection('weight_variables', weight) return weight
def calculate_loss_mix2(self, predictions, predictions_class, predictions_encoder, labels, **unused_params): with tf.name_scope("loss_mix2"): float_labels = tf.cast(labels, tf.float32) float_encoders = float_labels for i in range(FLAGS.encoder_layers): var_i = np.loadtxt(FLAGS.autoencoder_dir+'autoencoder_layer%d.model' % i) weight_i = tf.constant(var_i[:-1,:],dtype=tf.float32) bias_i = tf.reshape(tf.constant(var_i[-1,:],dtype=tf.float32),[-1]) float_encoders = tf.nn.xw_plus_b(float_encoders,weight_i,bias_i) if i<FLAGS.encoder_layers-1: float_encoders = tf.nn.relu(float_encoders) else: hidden_mean = tf.reduce_mean(float_encoders,axis=1,keep_dims=True) hidden_std = tf.sqrt(tf.reduce_mean(tf.square(float_encoders-hidden_mean),axis=1,keep_dims=True)) float_encoders = (float_encoders-hidden_mean)/(hidden_std+1e-6) #float_encoders = tf.nn.sigmoid(float_encoders) cross_entropy_encoder = 0.1*self.calculate_mseloss(predictions_encoder,float_encoders) cross_entropy_loss = self.calculate_loss(predictions,labels) return cross_entropy_encoder+cross_entropy_loss, float_encoders #return cross_entropy_encoder, float_encoders
def _conv_layer(self, bottom, filter_size, filter_num, scope_name, bottom_channel=None, padding='SAME'): if not bottom_channel: _, _, _, bottom_channel = bottom.get_shape().as_list() with tf.variable_scope(scope_name): kernel = tf.Variable( tf.truncated_normal([*filter_size, bottom_channel, filter_num], dtype=tf.float32, stddev=1e-1), trainable=False, name='weights' ) conv = tf.nn.conv2d(bottom, kernel, [1, 1, 1, 1], padding=padding) biases = tf.Variable( tf.constant(0.0, shape=[filter_num], dtype=tf.float32), trainable=True, name='bias' ) out = tf.nn.bias_add(conv, biases) return out
def omniglot(): sess = tf.InteractiveSession() """ def wrapper(v): return tf.Print(v, [v], message="Printing v") v = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='Matrix') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) temp = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='temp') temp = wrapper(v) #with tf.control_dependencies([temp]): temp.eval() print 'Hello'""" def update_tensor(V, dim2, val): # Update tensor V, with index(:,dim2[:]) by val[:] val = tf.cast(val, V.dtype) def body(_, (v, d2, chg)): d2_int = tf.cast(d2, tf.int32) return tf.slice(tf.concat_v2([v[:d2_int],[chg] ,v[d2_int+1:]], axis=0), [0], [v.get_shape().as_list()[0]]) Z = tf.scan(body, elems=(V, dim2, val), initializer=tf.constant(1, shape=V.get_shape().as_list()[1:], dtype=tf.float32), name="Scan_Update") return Z
def bag_of_tokens(config, labels, label_lengths): if config.train_output_embeddings: with tf.variable_scope('embed', reuse=True): output_embeddings = tf.get_variable('output_embedding') else: output_embeddings = tf.constant(config.output_embedding_matrix) #everything_label_placeholder = tf.placeholder(shape=(None, config.max_length,), dtype=tf.int32) #everything_label_length_placeholder = tf.placeholder(shape=(None,), dtype=tf.int32) labels = tf.constant(np.array(labels)) embedded_output = tf.gather(output_embeddings, labels) print('embedded_output before', embedded_output) #mask = tf.sequence_mask(label_lengths, maxlen=config.max_length, dtype=tf.float32) # note: this multiplication will broadcast the mask along all elements of the depth dimension # (which is why we run the expand_dims to choose how to broadcast) #embedded_output = embedded_output * tf.expand_dims(mask, axis=2) #print('embedded_output after', embedded_output) return tf.reduce_sum(embedded_output, axis=1)
def highway(self, input_1, input_2, size_1, size_2, l2_penalty=1e-8, layer_size=1): output = input_2 for idx in range(layer_size): with tf.name_scope('output_lin_%d' % idx): W = tf.Variable(tf.truncated_normal([size_2,size_1], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[size_1]), name="b") tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(W)) tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(b)) output = tf.nn.relu(tf.nn.xw_plus_b(output,W,b)) with tf.name_scope('transform_lin_%d' % idx): W = tf.Variable(tf.truncated_normal([size_1,size_1], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[size_1]), name="b") tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(W)) tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(b)) transform_gate = tf.sigmoid(tf.nn.xw_plus_b(input_1,W,b)) carry_gate = tf.constant(1.0) - transform_gate output = transform_gate * output + carry_gate * input_1 return output
def conv_block(self, input, out_size, layer, kernalsize=3, l2_penalty=1e-8, shortcut=False): in_shape = input.get_shape().as_list() if layer>0: filter_shape = [kernalsize, 1, in_shape[3], out_size] else: filter_shape = [kernalsize, in_shape[2], 1, out_size] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W-%s" % layer) b = tf.Variable(tf.constant(0.1, shape=[out_size]), name="b-%s" % layer) tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(W)) tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(b)) if layer>0: conv = tf.nn.conv2d(input, W, strides=[1, 1, 1, 1], padding="SAME", name="conv-%s" % layer) else: conv = tf.nn.conv2d(input, W, strides=[1, 1, 1, 1], padding="VALID", name="conv-%s" % layer) if shortcut: shortshape = [1,1,in_shape[3], out_size] Ws = tf.Variable(tf.truncated_normal(shortshape, stddev=0.05), name="Ws-%s" % layer) tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(Ws)) conv = conv + tf.nn.conv2d(input, Ws, strides=[1, 1, 1, 1], padding="SAME", name="conv-shortcut-%s" % layer) h = tf.nn.bias_add(conv, b) h2 = tf.nn.relu(tf.contrib.layers.batch_norm(h, center=True, scale=True, epsilon=1e-5, decay=0.9), name="relu-%s" % layer) return h2
def test_without_residuals(self): inputs = tf.constant(np.random.randn(1, 2)) state = (tf.constant(np.random.randn(1, 2)), tf.constant(np.random.randn(1, 2))) with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): standard_cell = tf.contrib.rnn.MultiRNNCell( [tf.contrib.rnn.GRUCell(2) for _ in range(2)], state_is_tuple=True) res_standard = standard_cell(inputs, state, scope="standard") test_cell = rnn_cell.ExtendedMultiRNNCell( [tf.contrib.rnn.GRUCell(2) for _ in range(2)]) res_test = test_cell(inputs, state, scope="test") with self.test_session() as sess: sess.run([tf.global_variables_initializer()]) res_standard_, res_test_, = sess.run([res_standard, res_test]) # Make sure it produces the same results as the standard cell self.assertAllClose(res_standard_[0], res_test_[0]) self.assertAllClose(res_standard_[1][0], res_test_[1][0]) self.assertAllClose(res_standard_[1][1], res_test_[1][1])
def _test_with_residuals(self, inputs, **kwargs): """Runs the cell in a session""" inputs = tf.convert_to_tensor(inputs) state = (tf.constant(np.random.randn(1, 2)), tf.constant(np.random.randn(1, 2))) with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): test_cell = rnn_cell.ExtendedMultiRNNCell( [tf.contrib.rnn.GRUCell(2) for _ in range(2)], residual_connections=True, **kwargs) res_test = test_cell(inputs, state, scope="test") with self.test_session() as sess: sess.run([tf.global_variables_initializer()]) return sess.run(res_test)
def test_eos_masking(self): probs = tf.constant([[-.2, -.2, -.2, -.2, -.2], [-.3, -.3, -.3, 3, 0], [5, 6, 0, 0, 0]]) eos_token = 0 previously_finished = tf.constant([0, 1, 0], dtype=tf.float32) masked = beam_search.mask_probs(probs, eos_token, previously_finished) with self.test_session() as sess: probs = sess.run(probs) masked = sess.run(masked) np.testing.assert_array_equal(probs[0], masked[0]) np.testing.assert_array_equal(probs[2], masked[2]) np.testing.assert_equal(masked[1][0], 0) np.testing.assert_approx_equal(masked[1][1], np.finfo('float32').min) np.testing.assert_approx_equal(masked[1][2], np.finfo('float32').min) np.testing.assert_approx_equal(masked[1][3], np.finfo('float32').min) np.testing.assert_approx_equal(masked[1][4], np.finfo('float32').min)
def get_loss(pred, label, end_points, reg_weight=0.001): """ pred: B*NUM_CLASSES, label: B, """ loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label) classify_loss = tf.reduce_mean(loss) tf.summary.scalar('classify loss', classify_loss) # Enforce the transformation as orthogonal matrix transform = end_points['transform'] # BxKxK K = transform.get_shape()[1].value mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1])) mat_diff -= tf.constant(np.eye(K), dtype=tf.float32) mat_diff_loss = tf.nn.l2_loss(mat_diff) tf.summary.scalar('mat loss', mat_diff_loss) return classify_loss + mat_diff_loss * reg_weight
def get_loss(pred, label, end_points, reg_weight=0.001): """ pred: BxNxC, label: BxN, """ loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label) classify_loss = tf.reduce_mean(loss) tf.scalar_summary('classify loss', classify_loss) # Enforce the transformation as orthogonal matrix transform = end_points['transform'] # BxKxK K = transform.get_shape()[1].value mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1])) mat_diff -= tf.constant(np.eye(K), dtype=tf.float32) mat_diff_loss = tf.nn.l2_loss(mat_diff) tf.scalar_summary('mat_loss', mat_diff_loss) return classify_loss + mat_diff_loss * reg_weight
def get_loss(l_pred, seg_pred, label, seg, weight, end_points): per_instance_label_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=l_pred, labels=label) label_loss = tf.reduce_mean(per_instance_label_loss) # size of seg_pred is batch_size x point_num x part_cat_num # size of seg is batch_size x point_num per_instance_seg_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=seg_pred, labels=seg), axis=1) seg_loss = tf.reduce_mean(per_instance_seg_loss) per_instance_seg_pred_res = tf.argmax(seg_pred, 2) # Enforce the transformation as orthogonal matrix transform = end_points['transform'] # BxKxK K = transform.get_shape()[1].value mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1])) - tf.constant(np.eye(K), dtype=tf.float32) mat_diff_loss = tf.nn.l2_loss(mat_diff) total_loss = weight * seg_loss + (1 - weight) * label_loss + mat_diff_loss * 1e-3 return total_loss, label_loss, per_instance_label_loss, seg_loss, per_instance_seg_loss, per_instance_seg_pred_res
def mnist_batcher_in_tanh_vector( batch_size, capacity=256, min_after_dequeue=128, ): (x, y), (_, _) = keras.datasets.mnist.load_data() x = tf.constant(x) x = tf.cast(x, tf.float32) x = keras.layers.Flatten()(x) / 127.5 - 1. y = tf.cast(y, tf.int64) return tf.train.shuffle_batch( [x, y], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue, enqueue_many=True )
def _validate(self, machine, n=10): N = n * n z = np.random.normal(0., 1., size=[n, self.arch['z_dim']]) z = np.concatenate([z] * n, axis=1) z = np.reshape(z, [N, -1]).astype(np.float32) # consecutive rows y = np.asarray( [[5, 0, 0 ], [9, 0, 0 ], [12, 0, 0 ], [17, 0, 0 ], [19, 0, 0 ], [161, 0, 0 ], [170, 0, 0 ], [170, 16, 0 ], [161, 9, 4 ], [19, 24, 50]], dtype=np.int64) y = np.concatenate([y] * n, axis=0) Z = tf.constant(z) Y = tf.constant(y) Xh = machine.generate(Z, Y) # 100, 64, 64, 3 Xh = make_png_thumbnail(Xh, n) return Xh
def batch_norm_layer(self, to_be_normalized, is_training): if is_training: train_phase = tf.constant(1) else: train_phase = tf.constant(-1) beta = tf.Variable(tf.constant(0.0, shape=[to_be_normalized.shape[-1]]), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=[to_be_normalized.shape[-1]]), name='gamma', trainable=True) # axises = np.arange(len(to_be_normalized.shape) - 1) # change to apply tensorflow 1.3 axises = [0,1,2] print("start nn.moments") print("axises : " + str(axises)) batch_mean, batch_var = tf.nn.moments(to_be_normalized, axises, name='moments') print("nn.moments successful") ema = tf.train.ExponentialMovingAverage(decay=0.5) def mean_var_with_update(): ema_apply_op = ema.apply([batch_mean, batch_var]) with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = tf.cond(train_phase > 0, mean_var_with_update, lambda: (ema.average(batch_mean), ema.average(batch_var))) # if is training --> update normed = tf.nn.batch_normalization(to_be_normalized, mean, var, beta, gamma, 1e-3) return normed
def repeat(tensor: tf.Tensor, repeats: int, axis: int) -> tf.Tensor: """ Repeat elements of the input tensor in the specified axis ``repeats``-times. .. note:: Chaining of this op may produce TF warnings although the performance seems to be unaffected. :param tensor: TF tensor to be repeated :param repeats: number of repeats :param axis: axis to repeat :return: tensor with repeated elements """ shape = tensor.get_shape().as_list() dims = np.arange(len(tensor.shape)) prepare_perm = np.hstack(([axis], np.delete(dims, axis))) restore_perm = np.hstack((dims[1:axis+1], [0], dims[axis+1:])) indices = tf.cast(tf.floor(tf.range(0, shape[axis]*repeats)/tf.constant(repeats)), 'int32') shuffled = tf.transpose(tensor, prepare_perm) repeated = tf.gather(shuffled, indices) return tf.transpose(repeated, restore_perm)
def test_dense_to_sparse(self): """ Test if `dense_to_sparse` works properly.""" with tf.Session().as_default(): dense = tf.constant([[1., 2., 0.], [0., 0., 3.]], dtype=tf.float32) sparse = dense_to_sparse(dense) self.assertTrue(np.array_equal(sparse.indices.eval(), np.array([[0, 0], [0, 1], [1, 2]]))) self.assertTrue(np.array_equal(sparse.values.eval(), np.array([1., 2., 3.]))) mask = tf.constant([[0, 1, 0], [1, 0, 0]], dtype=tf.int32) masked = dense_to_sparse(dense, mask) self.assertTrue(np.array_equal(masked.indices.eval(), np.array([[0, 1], [1, 0]]))) self.assertTrue(np.array_equal(masked.values.eval(), np.array([2., 0.])))
def test_repeat(self): """ Test if `repeat` works the same as np.repeat.""" with tf.Session().as_default(): # try different tensor types for npdtype, tfdtype in [(np.int32, tf.int32), (np.float32, tf.float32)]: for init_value in [np.array([0, 1, 2, 3], dtype=npdtype), np.array([[0, 1], [2, 3], [4, 5]], dtype=npdtype)]: # and all their axes for axis in range(len(init_value.shape)): for repeats in [1, 2, 3, 11]: tensor = tf.constant(init_value, dtype=tfdtype) repeated_value = repeat(tensor, repeats=repeats, axis=axis).eval() expected_value = np.repeat(init_value, repeats=repeats, axis=axis) self.assertTrue(np.all(repeated_value == expected_value))
def __init__(self, tag, x, summary_fn=tf.summary.scalar, summary_args=(), scope=None): """ Initializes an Average. Arguments x: Tensor to be averaged over multiple runs. tag: Tag for the summary. summary_fn: Function used for creating a summary. summary_args: Arguments passed to the summary function. """ with tf.variable_scope(scope or type(self).__name__): counter = tf.Variable(name="counter", initial_value=tf.constant(0), dtype=tf.int32, trainable=False) running_sum = tf.Variable(name="running_sum", initial_value=tf.constant(0.), dtype=tf.float32, trainable=False) self._running_average = running_sum / tf.cast(counter, tf.float32) self._summary = summary_fn(tag or x.name + '_avg', self._running_average, **summary_args) self._update_op = tf.group(counter.assign_add(1), running_sum.assign_add(x)) self._reset_op = tf.group(counter.assign(0), running_sum.assign(0.))
def simulate_dynamics(initial_pos, initial_vel, stepsize, n_steps, energy_fn): def leapfrog(pos, vel, step, i): de_dp_ = tf.gradients(tf.reduce_sum(energy_fn(pos)), pos)[0] new_vel_ = vel - step * de_dp_ new_pos_ = pos + step * new_vel_ return [new_pos_, new_vel_, step, tf.add(i, 1)] def condition(pos, vel, step, i): return tf.less(i, n_steps) de_dp = tf.gradients(tf.reduce_sum(energy_fn(initial_pos)), initial_pos)[0] vel_half_step = initial_vel - 0.5 * stepsize * de_dp pos_full_step = initial_pos + stepsize * vel_half_step i = tf.constant(0) final_pos, new_vel, _, _ = tf.while_loop(condition, leapfrog, [pos_full_step, vel_half_step, stepsize, i]) de_dp = tf.gradients(tf.reduce_sum(energy_fn(final_pos)), final_pos)[0] final_vel = new_vel - 0.5 * stepsize * de_dp return final_pos, final_vel
def smoothing_cross_entropy(self,logits, labels, vocab_size, confidence=0.9): #confidence = 1.0 - label_smoothing. where label_smooth=0.1. from http://github.com/tensorflow/tensor2tensor """Cross entropy with label smoothing to limit over-confidence.""" with tf.name_scope("smoothing_cross_entropy", [logits, labels]): # Low confidence is given to all non-true labels, uniformly. low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1) # Normalizing constant is the best cross-entropy value with soft targets. # We subtract it just for readability, makes no difference on learning. normalizing = -(confidence * tf.log(confidence) + tf.to_float(vocab_size - 1) * low_confidence * tf.log(low_confidence + 1e-20)) # Soft targets. soft_targets = tf.one_hot( tf.cast(labels, tf.int32), depth=vocab_size, on_value=confidence, off_value=low_confidence) xentropy = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=soft_targets) return xentropy - normalizing
def apply_gradients(self, grads_and_vars, global_step=None): """Apply gradients to model variables specified in `grads_and_vars`. `apply_gradients` returns an op that calls `tf.train.Optimizer.apply_gradients` and then zeros the gradient variables stored in `self.grads_and_vars`. Args: grads_and_vars (list): Description. global_step (None, optional): tensorflow global_step variable. Returns: (tf.Operation): Applies gradient update to model followed by an internal gradient zeroing operation to `self.grads_and_vars`. """ self.mini_flag = tf.assign(self.mini_flag, tf.constant([0], dtype = tf.float32)) # grads_and_vars = self.aggregate_gradients(grads_and_vars, method='average') with tf.control_dependencies([self.mini_flag]): optimize = self._optimizer.apply_gradients(grads_and_vars, global_step=global_step) #return [optimize, self.zero_grad()] return optimize
def init_var(self): trainable = self.trainable with tf.variable_scope(self.scope): if self.init_beta is None: self.init_beta = tf.constant(0.0, shape=[self.n_out]) if self.init_gamma is None: self.init_gamma = tf.constant(1.0, shape=[self.n_out]) self.beta = self.declare_var( [self.n_out], init_val=self.init_beta, name='beta', trainable=trainable) self.gamma = self.declare_var( [self.n_out], init_val=self.init_gamma, name='gamma', trainable=trainable) self.ema = tf.train.ExponentialMovingAverage(decay=self.decay) self.batch_mean = None self.batch_var = None self.ema_apply_op = None pass pass
def test(self): def decode_greedily(beam_search: bool, merge_repeated: bool): aa_ctc_blank_aa_logits = tf.constant(np.array([[[1.0, 0.0]], [[1.0, 0.0]], [[0.0, 1.0]], [[1.0, 0.0]], [[1.0, 0.0]]], dtype=np.float32)) sequence_length = tf.constant(np.array([5], dtype=np.int32)) (decoded_list,), log_probabilities = \ tf.nn.ctc_beam_search_decoder(inputs=aa_ctc_blank_aa_logits, sequence_length=sequence_length, merge_repeated=merge_repeated, beam_width=1) \ if beam_search else \ tf.nn.ctc_greedy_decoder(inputs=aa_ctc_blank_aa_logits, sequence_length=sequence_length, merge_repeated=merge_repeated) return list(tf.Session().run(tf.sparse_tensor_to_dense(decoded_list)[0])) self.assertEqual([0], decode_greedily(beam_search=True, merge_repeated=True)) self.assertEqual([0, 0], decode_greedily(beam_search=True, merge_repeated=False)) self.assertEqual([0, 0], decode_greedily(beam_search=False, merge_repeated=True)) self.assertEqual([0, 0, 0, 0], decode_greedily(beam_search=False, merge_repeated=False))
def bag_hinge_loss(config, preds, sent_mask, flip_sent_mask, hete_mask, sent_trgt, sent_num): """ HINGE LOSS: DEFINED AS: MAX(0, M - MIN(SENT+) - MAX(SENT-)) THIS ONLY APPLIES TO HETE BAGS. """ flip_sent_trgt = \ tf.constant(1, shape=[config.batch_size,sent_num], dtype=config.data_type) - \ sent_trgt pos_preds = preds + flip_sent_trgt + flip_sent_mask # [batch_size, sent_num] neg_preds = preds * flip_sent_trgt * sent_mask # [batch_size, sent_num] min_pos_pred = tf.reduce_min(pos_preds, 1) # min_pos_pred = tf.Print(min_pos_pred, [min_pos_pred], message='min_pos_pred') max_neg_pred = tf.reduce_max(neg_preds, 1) # max_neg_pred = tf.Print(max_neg_pred, [max_neg_pred], message='max_neg_pred') hinge_loss = hete_mask * tf.reduce_max(tf.pack( [tf.constant(0, shape=[config.batch_size], dtype=config.data_type), (0.20 - min_pos_pred + max_neg_pred)], axis=1), 1) # [batch_size] # hinge_loss = tf.Print(hinge_loss, [hinge_loss], message='hinge_loss', summarize=20) avg_hinge_loss = tf.reduce_sum(hinge_loss) / (tf.reduce_sum(hete_mask) + 1e-12) return avg_hinge_loss
def loss(self, img_batch, label_batch): """Create the network, run inference on the input batch and compute loss. Args: input_batch: batch of pre-processed images. Returns: Pixel-wise softmax loss. """ raw_output = self._create_network(tf.cast(img_batch, tf.float32), keep_prob=tf.constant(0.5)) prediction = tf.reshape(raw_output, [-1, n_classes]) # Need to resize labels and convert using one-hot encoding. label_batch = self.prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3])) gt = tf.reshape(label_batch, [-1, n_classes]) # Pixel-wise softmax loss. loss = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=gt) reduced_loss = tf.reduce_mean(loss) return reduced_loss
def layer_norm_all(h, base, num_units, scope): # Layer Norm (faster version) # # Performs layer norm on multiple base at once (ie, i, g, j, o for lstm) # # Reshapes h in to perform layer norm in parallel with tf.variable_scope(scope): h_reshape = tf.reshape(h, [-1, base, num_units]) mean = tf.reduce_mean(h_reshape, [2], keep_dims=True) var = tf.reduce_mean(tf.square(h_reshape - mean), [2], keep_dims=True) epsilon = tf.constant(1e-3) rstd = tf.rsqrt(var + epsilon) h_reshape = (h_reshape - mean) * rstd # reshape back to original h = tf.reshape(h_reshape, [-1, base * num_units]) alpha = tf.get_variable('layer_norm_alpha', [4 * num_units], initializer=tf.constant_initializer(1.0), dtype=tf.float32) bias = tf.get_variable('layer_norm_bias', [4 * num_units], initializer=tf.constant_initializer(0.0), dtype=tf.float32) return (h * alpha) + bias
def random_access_problem(which=1): import raputil as ru if which == 1: opts = ru.Problem.scenario1() else: opts = ru.Problem.scenario2() p = ru.Problem(**opts) x1 = p.genX(1) y1 = p.fwd(x1) A = p.S M,N = A.shape nbatches = int(math.ceil(1000 /x1.shape[1])) prob = NumpyGenerator(p=p,nbatches=nbatches,A=A,opts=opts,iid=(which==1)) if which==2: prob.maskX_ = tf.expand_dims( tf.constant( (np.arange(N) % (N//2) < opts['Nu']).astype(np.float32) ) , 1) _,prob.noise_var = p.add_noise(y1) unused = p.genYX(nbatches) # for legacy reasons -- want to compare against a previous run (prob.yval, prob.xval) = p.genYX(nbatches) (prob.yinit, prob.xinit) = p.genYX(nbatches) import multiprocessing as mp prob.nsubprocs = mp.cpu_count() return prob
def pwlin_grid(r_,rvar_,theta_,dtheta = .75): """piecewise linear with noise-adaptive grid spacing. returns xhat,dxdr where q = r/dtheta/sqrt(rvar) xhat = r * interp(q,theta) all but the last dimensions of theta must broadcast to r_ e.g. r.shape = (500,1000) is compatible with theta.shape=(500,1,7) """ ntheta = int(theta_.get_shape()[-1]) scale_ = dtheta / tf.sqrt(rvar_) ars_ = tf.clip_by_value( tf.expand_dims( tf.abs(r_)*scale_,-1),0.0, ntheta-1.0 ) centers_ = tf.constant( np.arange(ntheta),dtype=tf.float32 ) outer_distance_ = tf.maximum(0., 1.0-tf.abs(ars_ - centers_) ) # new dimension for distance to closest bin centers (or center) gain_ = tf.reduce_sum( theta_ * outer_distance_,axis=-1) # apply the gain (learnable) xhat_ = gain_ * r_ dxdr_ = tf.gradients(xhat_,r_)[0] return (xhat_,dxdr_)
def interp1d_(xin_,xp,yp_): """ Interpolate a uniformly sampled piecewise linear function. Mapping elements from xin_ to the result. Input values will be clipped to range of xp. xin_ : input tensor (real) xp : x grid (constant -- must be a 1d numpy array, uniformly spaced) yp_ : tensor of the result values at the gridpoints xp """ import tensorflow as tf x_ = tf.clip_by_value(xin_,xp.min(),xp.max()) dx = xp[1]-xp[0] assert len(xp.shape)==1,'only 1d interpolation' assert xp.shape[0]==int(yp_.get_shape()[0]) assert abs(np.diff(xp)/dx - 1.0).max() < 1e-6,'must be uniformly sampled' newshape = [ ] x1_ = tf.expand_dims(x_,-1) dt = yp_.dtype wt_ = tf.maximum(tf.constant(0.,dtype=dt), 1-abs(x1_ - tf.constant(xp,dtype=dt))/dx ) y_ = tf.reduce_sum(wt_ * yp_,axis=-1) return y_
def __init__(self, config): self.layers = {} self.weights = {} self.biases = {} self.losses = {} self.regular_losses = {} self.trainable = {} self.summaries = {} # set parameters self.lr_rates = {} for key, val in config.lr_rates.iteritems(): self.lr_rates[key] = tf.get_variable('lr_rates/'+key, initializer=tf.constant(val), dtype=tf.float32) self.momentum = tf.get_variable('momentum', initializer=tf.constant(config.momentum), dtype=tf.float32) self.weight_decay = tf.get_variable('weight_decay', initializer=tf.constant(config.weight_decay), dtype=tf.float32) self.lr_rate = tf.get_variable('lr_rate', initializer=tf.constant(config.lr_rate), dtype=tf.float32)
def _embed_sentences(self): """Tensorflow implementation of Simple but Tough-to-Beat Baseline""" # Get word features word_embeddings = self._get_embedding() word_feats = tf.nn.embedding_lookup(word_embeddings, self.input) # Get marginal estimates and scaling term batch_size = tf.shape(word_feats)[0] a = tf.pow(10.0, self._get_a_exp()) p = tf.constant(self.marginals, dtype=tf.float32, name='marginals') q = tf.reshape( a / (a + tf.nn.embedding_lookup(p, self.input)), (batch_size, self.mx_len, 1) ) # Compute initial sentence embedding z = tf.reshape(1.0 / tf.to_float(self.input_lengths), (batch_size, 1)) S = z * tf.reduce_sum(q * word_feats, axis=1) # Compute common component S_centered = S - tf.reduce_mean(S, axis=0) _, _, V = tf.svd(S_centered, full_matrices=False, compute_uv=True) self.tf_ccx = tf.stop_gradient(tf.gather(tf.transpose(V), 0)) # Common component removal ccx = tf.reshape(self._get_common_component(), (1, self.d)) sv = {'embeddings': word_embeddings, 'a': a, 'p': p, 'ccx': ccx} return S - tf.matmul(S, ccx * tf.transpose(ccx)), sv
def _get_embedding(self): """ Return embedding tensor (either constant or variable) Row 0 is 0 vector for no token Row 1 is random initialization for UNKNOWN Rows 2 : 2 + len(self.embedding_words) are pretrained initialization Remaining rows are random initialization """ zero = tf.constant(0.0, dtype=tf.float32, shape=(1, self.d)) s = self.seed - 1 unk = tf.Variable(tf.random_normal((1, self.d), stddev=SD, seed=s)) pretrain = tf.Variable(self.embeddings_train, dtype=tf.float32) vecs = [zero, unk, pretrain] n_r = self.word_dict.num_words() - len(self.embedding_words_train) if n_r > 0: r = tf.Variable(tf.random_normal((n_r, self.d), stddev=SD, seed=s)) vecs.append(r) self.U = tf.concat(vecs, axis=0, name='embedding_matrix') return self.U
def pixel_wise_cross_entropy_loss_weighted(logits, labels, class_weights): ''' Weighted cross entropy loss, with a weight per class :param logits: Network output before softmax :param labels: Ground truth masks :param class_weights: A list of the weights for each class :return: weighted cross entropy loss ''' n_class = len(class_weights) flat_logits = tf.reshape(logits, [-1, n_class]) flat_labels = tf.reshape(labels, [-1, n_class]) class_weights = tf.constant(np.array(class_weights, dtype=np.float32)) weight_map = tf.multiply(flat_labels, class_weights) weight_map = tf.reduce_sum(weight_map, axis=1) loss_map = tf.nn.softmax_cross_entropy_with_logits(logits=flat_logits, labels=flat_labels) weighted_loss = tf.multiply(loss_map, weight_map) loss = tf.reduce_mean(weighted_loss) return loss
def image_reading(path: str, resized_size: Tuple[int, int]=None, data_augmentation: bool=False, padding: bool=False) -> Tuple[tf.Tensor, tf.Tensor]: # Read image image_content = tf.read_file(path, name='image_reader') image = tf.cond(tf.equal(tf.string_split([path], '.').values[1], tf.constant('jpg', dtype=tf.string)), true_fn=lambda: tf.image.decode_jpeg(image_content, channels=1, try_recover_truncated=True), # TODO channels = 3 ? false_fn=lambda: tf.image.decode_png(image_content, channels=1), name='image_decoding') # Data augmentation if data_augmentation: image = augment_data(image) # Padding if padding: with tf.name_scope('padding'): image, img_width = padding_inputs_width(image, resized_size, increment=CONST.DIMENSION_REDUCTION_W_POOLING) # Resize else: image = tf.image.resize_images(image, size=resized_size) img_width = tf.shape(image)[1] with tf.control_dependencies([tf.assert_equal(image.shape[:2], resized_size)]): return image, img_width
def add_input_op(self, xavier): with tf.variable_scope('embed'): # first the embed the input if self.config.train_input_embeddings: if self.config.input_embedding_matrix: initializer = tf.constant_initializer(self.config.input_embedding_matrix) else: initializer = xavier input_embed_matrix = tf.get_variable('input_embedding', shape=(self.config.dictionary_size, self.config.embed_size), initializer=initializer) else: input_embed_matrix = tf.constant(self.config.input_embedding_matrix) # dictionary size x embed_size assert input_embed_matrix.get_shape() == (self.config.dictionary_size, self.config.embed_size) # now embed the output if self.config.train_output_embeddings: output_embed_matrix = tf.get_variable('output_embedding', shape=(self.config.output_size, self.config.output_embed_size), initializer=xavier) else: output_embed_matrix = tf.constant(self.config.output_embedding_matrix) assert output_embed_matrix.get_shape() == (self.config.output_size, self.config.output_embed_size) inputs = tf.nn.embedding_lookup([input_embed_matrix], self.input_placeholder) # batch size x max length x embed_size assert inputs.get_shape()[1:] == (self.config.max_length, self.config.embed_size) return inputs, output_embed_matrix
def calculate_loss_mix(self, predictions, predictions_class, labels, **unused_params): with tf.name_scope("loss_mix"): float_labels = tf.cast(labels, tf.float32) if FLAGS.support_type=="class": seq = np.loadtxt(FLAGS.class_file) tf_seq = tf.one_hot(tf.constant(seq,dtype=tf.int32),FLAGS.encoder_size) float_classes_org = tf.matmul(float_labels,tf_seq) class_true = tf.ones(tf.shape(float_classes_org)) class_false = tf.zeros(tf.shape(float_classes_org)) float_classes = tf.where(tf.greater(float_classes_org, class_false), class_true, class_false) cross_entropy_class = self.calculate_loss(predictions_class,float_classes) elif FLAGS.support_type=="frequent": float_classes = float_labels[:,0:FLAGS.encoder_size] cross_entropy_class = self.calculate_loss(predictions_class,float_classes) elif FLAGS.support_type=="encoder": float_classes = float_labels for i in range(FLAGS.encoder_layers): var_i = np.loadtxt(FLAGS.autoencoder_dir+'autoencoder_layer%d.model' % i) weight_i = tf.constant(var_i[:-1,:],dtype=tf.float32) bias_i = tf.reshape(tf.constant(var_i[-1,:],dtype=tf.float32),[-1]) float_classes = tf.nn.xw_plus_b(float_classes,weight_i,bias_i) if i<FLAGS.encoder_layers-1: float_classes = tf.nn.relu(float_classes) else: float_classes = tf.nn.sigmoid(float_classes) #float_classes = tf.nn.relu(tf.sign(float_classes - 0.5)) cross_entropy_class = self.calculate_mseloss(predictions_class,float_classes) else: float_classes = float_labels for i in range(FLAGS.moe_layers-1): float_classes = tf.concat((float_classes,float_labels),axis=1) cross_entropy_class = self.calculate_loss(predictions_class,float_classes) cross_entropy_loss = self.calculate_loss(predictions,labels) return cross_entropy_loss + 0.1*cross_entropy_class
def calculate_loss_mix(self, predictions, predictions_class, labels, **unused_params): with tf.name_scope("loss_softmax_mix"): vocab_size = labels.get_shape().as_list()[1] cross_entropy_class = tf.constant(0.0) for i in range(FLAGS.moe_layers): predictions_subclass = predictions_class[:,i*vocab_size:(i+1)*vocab_size] cross_entropy_class = cross_entropy_class + self.calculate_loss(predictions_subclass,labels) cross_entropy_loss = self.calculate_loss(predictions,labels) return cross_entropy_loss + 0.1*cross_entropy_class
def calculate_loss(self, predictions, labels, **unused_params): with tf.name_scope("loss_xent"): epsilon = 10e-6 origin_labels = tf.cast(labels, tf.float32) vocab_size = origin_labels.get_shape().as_list()[1] float_labels = tf.tile(tf.reshape(origin_labels,[-1, 1, vocab_size]),[1,FLAGS.top_k,1]) float_labels = tf.reshape(float_labels,[-1,vocab_size]) cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + ( 1 - float_labels) * tf.log(1 - predictions + epsilon) cross_entropy_loss = tf.negative(cross_entropy_loss) num_labels = tf.minimum(tf.reduce_sum(origin_labels,axis=1),tf.constant(FLAGS.top_k,dtype=tf.float32)) mask = tf.reshape(tf.sequence_mask(num_labels,tf.constant(FLAGS.top_k,dtype=tf.float32),dtype=tf.float32),[-1]) cross_entropy_loss = tf.reduce_sum(tf.reduce_sum(cross_entropy_loss, 1)*mask)/(tf.reduce_sum(mask)+epsilon) return cross_entropy_loss
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1): """Determines a threshold for classifying examples as positive Args: y: labels y_pred: scores from the classifier recall: Threshold is set to classify at least this fraction of positive labelled examples as positive false_positive_margin: Threshold is set to acheive desired recall, and then is extended to include an additional fraction of negative labelled examples equal to false_positive_margin (This allows adding a buffer to the threshold while maintaining a constant "cost") """ n_positive = np.count_nonzero(y) n_negative = len(y) - n_positive if n_positive == 0: return np.max(y_pred) if false_positive_margin == 0 and recall == 1: return np.min(y_pred[y]) ind = np.argsort(y_pred) y_pred_sorted = y_pred[ind] y_sorted = y[ind] so_far = [0, 0] j = 0 for i in reversed(range(len(y_sorted))): so_far[y_sorted[i]] += 1 if so_far[1] >= int(np.floor(recall * n_positive)): j = i break so_far = [0, 0] if false_positive_margin == 0: return y_pred_sorted[j] k = 0 for i in reversed(range(j)): so_far[y_sorted[i]] += 1 if so_far[0] >= false_positive_margin * n_negative: k = i break return y_pred_sorted[k]
def normalized_columns_initializer(std=1.0): def _initializer(shape, dtype=None, partition_info=None): out = np.random.randn(*shape).astype(np.float32) out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True)) return tf.constant(out) return _initializer
def gauss_log_prob(mu, logstd, x): var = tf.exp(2*logstd) gp = -tf.square(x - mu)/(2*var) - .5*tf.log(tf.constant(2*np.pi)) - logstd return tf.reduce_sum(gp, [1])
def gauss_ent(mu, logstd): h = tf.reduce_sum(logstd + tf.constant(0.5*np.log(2*np.pi*np.e), tf.float32)) return h