我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.stack()。
def SampleRandomFrames(model_input, num_frames, num_samples): """Samples a random set of frames of size num_samples. Args: model_input: A tensor of size batch_size x max_frames x feature_size num_frames: A tensor of size batch_size x 1 num_samples: A scalar Returns: `model_input`: A tensor of size batch_size x num_samples x feature_size """ batch_size = tf.shape(model_input)[0] frame_index = tf.cast( tf.multiply( tf.random_uniform([batch_size, num_samples]), tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32) batch_index = tf.tile( tf.expand_dims(tf.range(batch_size), 1), [1, num_samples]) index = tf.stack([batch_index, frame_index], 2) return tf.gather_nd(model_input, index)
def feed_network(self,data,keep_prob,chunk_size,n_chunks,dynamic): # This code is copied from tflearn sequence_lengths = None if dynamic: sequence_lengths = net.calc_seqlenth(data if isinstance(data, tf.Tensor) else tf.stack(data)) batch_size = tf.shape(data)[0] weight_dropout = tf.nn.dropout(self._layer_weights, keep_prob) rnn_dropout = rnn.core_rnn_cell.DropoutWrapper(self._gru_cell,output_keep_prob=keep_prob) # Calculation Begin input_shape = data.get_shape().as_list() ndim = len(input_shape) axis = [1, 0] + list(range(2,ndim)) data = tf.transpose(data,(axis)) sequence = tf.unstack(data) outputs, states = rnn.static_rnn(rnn_dropout, sequence, dtype=tf.float32, sequence_length = sequence_lengths) if dynamic: outputs = tf.transpose(tf.stack(outputs), [1, 0, 2]) output = net.advanced_indexing_op(outputs, sequence_lengths) else: output = outputs[-1] output = tf.add(tf.matmul(output,weight_dropout), self._layer_biases) return output
def value_transition(self, curr_state, next_symbols, batch_size): first_value_token = self.num_functions + self.num_begin_tokens + self.num_control_tokens num_value_tokens = self.output_size - first_value_token with tf.name_scope('grammar_transition'): adjusted_next_symbols = tf.where(next_symbols >= self.num_control_tokens, next_symbols + (first_value_token - self.num_control_tokens), next_symbols) assert1 = tf.Assert(tf.reduce_all(tf.logical_and(next_symbols < num_value_tokens, next_symbols >= 0)), [curr_state, next_symbols]) with tf.control_dependencies([assert1]): transitions = tf.gather(tf.constant(self.transition_matrix), curr_state) assert transitions.get_shape()[1:] == (self.output_size,) indices = tf.stack((tf.range(0, batch_size), adjusted_next_symbols), axis=1) next_state = tf.gather_nd(transitions, indices) assert2 = tf.Assert(tf.reduce_all(next_state >= 0), [curr_state, adjusted_next_symbols, next_state]) with tf.control_dependencies([assert2]): return tf.identity(next_state)
def create_model(self, model_input, vocab_size, num_mixtures=None, l2_penalty=1e-8, sub_scope="ddcc", original_input=None, dropout=False, keep_prob=None, noise_level=None, num_frames=None, **unused_params): num_supports = FLAGS.num_supports num_models = FLAGS.divergence_model_count support_predictions = [] for i in xrange(num_models): sub_prediction = self.sub_model(model_input,vocab_size, num_mixtures, l2_penalty, sub_scope+"%d"%i, dropout, keep_prob, noise_level) support_predictions.append(sub_prediction) support_predictions = tf.stack(support_predictions, axis=1) main_predictions = tf.reduce_mean(support_predictions, axis=1) return {"predictions": main_predictions, "support_predictions": support_predictions}
def resize_axis(tensor, axis, new_size, fill_value=0): tensor = tf.convert_to_tensor(tensor) shape = tf.unstack(tf.shape(tensor)) pad_shape = shape[:] pad_shape[axis] = tf.maximum(0, new_size - shape[axis]) shape[axis] = tf.minimum(shape[axis], new_size) shape = tf.stack(shape) resized = tf.concat([ tf.slice(tensor, tf.zeros_like(shape), shape), tf.fill(tf.stack(pad_shape), tf.cast(fill_value, tensor.dtype)) ], axis) # Update shape. new_shape = tensor.get_shape().as_list() # A copy is being made. new_shape[axis] = new_size resized.set_shape(new_shape) return resized
def trainable_initial_state(self, batch_size): """ Create a trainable initial state for the BasicLSTMCell :param batch_size: number of samples per batch :return: LSTMStateTuple """ def _create_initial_state(batch_size, state_size, trainable=True, initializer=tf.random_normal_initializer()): with tf.device('/cpu:0'): s = tf.get_variable('initial_state', shape=[1, state_size], dtype=tf.float32, trainable=trainable, initializer=initializer) state = tf.tile(s, tf.stack([batch_size] + [1])) return state with tf.variable_scope('initial_c'): initial_c = _create_initial_state(batch_size, self._num_units) with tf.variable_scope('initial_h'): initial_h = _create_initial_state(batch_size, self._num_units) return tf.contrib.rnn.LSTMStateTuple(initial_c, initial_h)
def __call__(self, inputs, steps): def fn(zv, x): """ Transition for training, without Metropolis-Hastings. `z` is the input state. `v` is created as a dummy variable to allow output of v_, for training p(v). :param x: variable only for specifying the number of steps :return: next state `z_`, and the corresponding auxiliary variable `v_`. """ z, v = zv v = tf.random_normal(shape=tf.stack([tf.shape(z)[0], self.network.v_dim])) z_, v_ = self.network.forward([z, v]) return z_, v_ elems = tf.zeros([steps]) return tf.scan(fn, elems, inputs, back_prop=True)
def bilateral_slice(grid, guide, name=None): """Slices into a bilateral grid using the guide map. Args: grid: (Tensor) [batch_size, grid_h, grid_w, depth, n_outputs] grid to slice from. guide: (Tensor) [batch_size, h, w ] guide map to slice along. name: (string) name for the operation. Returns: sliced: (Tensor) [batch_size, h, w, n_outputs] sliced output. """ with tf.name_scope(name): gridshape = grid.get_shape().as_list() if len(gridshape) == 6: _, _, _, _, n_out, n_in = gridshape grid = tf.concat(tf.unstack(grid, None, axis=5), 4) sliced = hdrnet_ops.bilateral_slice(grid, guide) if len(gridshape) == 6: sliced = tf.stack(tf.split(sliced, n_in, axis=3), axis=4) return sliced # pylint: enable=redefined-builtin
def discriminate(self, image, Y): print("Initializing the discriminator") print("Y shape", Y.get_shape()) yb = tf.reshape(Y, tf.stack([self.batch_size, 1, 1, self.dim_y])) print("image shape", image.get_shape()) print("yb shape", yb.get_shape()) X = tf.concat([image, yb * tf.ones([self.batch_size, 24, 24, self.dim_y])],3) print("X shape", X.get_shape()) h1 = lrelu( tf.nn.conv2d( X, self.discrim_W1, strides=[1,2,2,1], padding='SAME' )) print("h1 shape", h1.get_shape()) h1 = tf.concat([h1, yb * tf.ones([self.batch_size, 12, 12, self.dim_y])],3) print("h1 shape", h1.get_shape()) h2 = lrelu(batchnormalize( tf.nn.conv2d( h1, self.discrim_W2, strides=[1,2,2,1], padding='SAME')) ) print("h2 shape", h2.get_shape()) h2 = tf.reshape(h2, [self.batch_size, -1]) h2 = tf.concat([h2, Y], 1) discri=tf.matmul(h2, self.discrim_W3 ) print("discri shape", discri.get_shape()) h3 = lrelu(batchnormalize(discri)) return h3
def get_image_summary(img, idx=0): """ Make an image summary for 4d tensor image with index idx """ V = tf.slice(img, (0, 0, 0, idx), (1, -1, -1, 1)) V -= tf.reduce_min(V) V /= tf.reduce_max(V) V *= 255 img_w = tf.shape(img)[1] img_h = tf.shape(img)[2] V = tf.reshape(V, tf.stack((img_w, img_h, 1))) V = tf.transpose(V, (2, 0, 1)) V = tf.reshape(V, tf.stack((-1, img_w, img_h, 1))) return V
def deconv_2d_drop_bn_relu(inp, inp_chan, out_chan, kernel, stride=1, prob=1.0, name="", is_train=True): weights = tf.Variable(tf.truncated_normal( shape=[kernel, kernel, out_chan, inp_chan], mean=0.0, stddev=0.3), name=name+"_weights") bias = tf.Variable(tf.constant( shape=[out_chan], value=0.0), name=name+"_bias") inp_shape = tf.shape(inp) deconv = tf.nn.conv2d_transpose( value=inp, filter=weights, output_shape=tf.stack([inp_shape[0], inp_shape[1]*stride, inp_shape[2]*stride, out_chan]), strides=[1, stride, stride, 1], padding='VALID', name=name+"_deconv") drop = tf.nn.dropout(deconv, prob, name=name+"_drop") out = tf.nn.relu(tf.contrib.layers.batch_norm(drop + bias, is_training=is_train)) return out, weights, bias
def rotate_points(orig_points, angle, w, h): """Return rotated points Args: orig_points: 'Tensor' with shape [N,2], each entry is point (x,y) angle: rotate radians Returns: 'Tensor' with shape [N,2], with rotated points """ # rotation rotate_mat = tf.stack([[tf.cos(angle) / w, tf.sin(angle) / h], [-tf.sin(angle) / w, tf.cos(angle) / h]]) # shift coord orig_points = tf.subtract(orig_points, 0.5) orig_points = tf.stack([orig_points[:, 0] * w, orig_points[:, 1] * h], axis=1) print(orig_points) rotated_points = tf.matmul(orig_points, rotate_mat) + 0.5 return rotated_points
def conv_cond_concat(x, y): """Concatenate conditioning vector on feature map axis.""" #print('input x:',x.get_shape().as_list()) #print('input y:',y.get_shape().as_list()) xshape=x.get_shape() #tile by [1,64,64,1] tile_shape=tf.stack([1,xshape[1],xshape[2],1]) tile_y=tf.tile(y,tile_shape) #print('tile y:',tile_y.get_shape().as_list()) return tf.concat([x,tile_y],axis=3) #x_shapes = x.get_shape() #y_shapes = y.get_shape() #return tf.concat([ #x, y*tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])], 3)
def deconv2d(input_, output_shape, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, name="deconv2d", with_w=False): with tf.variable_scope(name): # filter : [height, width, output_channels, in_channels] w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]], initializer=tf.random_normal_initializer(stddev=stddev)) tf_output_shape=tf.stack(output_shape) deconv = tf.nn.conv2d_transpose(input_, w, output_shape=tf_output_shape, strides=[1, d_h, d_w, 1]) biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0)) #deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape()) deconv = tf.reshape(tf.nn.bias_add(deconv, biases), tf_output_shape) if with_w: return deconv, w, biases else: return deconv
def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False): shape = input_.get_shape().as_list() #mat_shape=tf.stack([tf.shape(input_)[1],output_size]) mat_shape=[shape[1],output_size] with tf.variable_scope(scope or "Linear"): #matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32, matrix = tf.get_variable("Matrix", mat_shape, tf.float32, tf.random_normal_initializer(stddev=stddev)) bias = tf.get_variable("bias", [output_size], initializer=tf.constant_initializer(bias_start)) if with_w: return tf.matmul(input_, matrix) + bias, matrix, bias else: return tf.matmul(input_, matrix) + bias #minibatch method that improves on openai #because it doesn't fix batchsize: #TODO: recheck when not sleepy
def answer_module(self): """ Answer Module:generate an answer from the final memory vector. Input: hidden state from episodic memory module:[batch_size,hidden_size] question:[batch_size, embedding_size] """ steps=self.sequence_length if self.decode_with_sequences else 1 #decoder for a list of tokens with sequence. e.g."x1 x2 x3 x4..." a=self.m_T #init hidden state y_pred=tf.zeros((self.batch_size,self.hidden_size)) #TODO usually we will init this as a special token '<GO>', you can change this line by pass embedding of '<GO>' from outside. logits_list=[] logits_return=None for i in range(steps): cell = rnn.GRUCell(self.hidden_size) y_previous_q=tf.concat([y_pred,self.query_embedding],axis=1) #[batch_hidden_size*2] _, a = cell( y_previous_q,a) logits=tf.layers.dense(a,units=self.num_classes) #[batch_size,vocab_size] logits_list.append(logits) if self.decode_with_sequences:#need to get sequences. logits_return = tf.stack(logits_list, axis=1) # [batch_size,sequence_length,num_classes] else:#only need to get an answer, not sequences logits_return = logits_list[0] #[batcj_size,num_classes] return logits_return
def _rnn_attention_decoder(self, decoder_cell, training_wheels): loop_fn = self._custom_rnn_loop_fn(decoder_cell.output_size, training_wheels=training_wheels) decoder_outputs, _, (context_vectors_array, attention_logits_array, pointer_probability_array) = \ tf.nn.raw_rnn(decoder_cell, loop_fn, swap_memory=True) decoder_outputs = decoder_outputs.stack() decoder_outputs = tf.transpose(decoder_outputs, [1, 0, 2]) attention_logits = attention_logits_array.gather(tf.range(0, attention_logits_array.size() - 1)) attention_logits = tf.transpose(attention_logits, [1, 0, 2]) context_vectors = context_vectors_array.gather(tf.range(0, context_vectors_array.size() - 1)) context_vectors = tf.transpose(context_vectors, [1, 0, 2]) pointer_probabilities = pointer_probability_array.gather(tf.range(0, pointer_probability_array.size() - 1)) pointer_probabilities = tf.transpose(pointer_probabilities, [1, 0]) return decoder_outputs, context_vectors, attention_logits, pointer_probabilities
def kSparse(self, x, topk): print 'run regular k-sparse' dim = int(x.get_shape()[1]) if topk > dim: warnings.warn('Warning: topk should not be larger than dim: %s, found: %s, using %s' % (dim, topk, dim)) topk = dim k = dim - topk values, indices = tf.nn.top_k(-x, k) # indices will be [[0, 1], [2, 1]], values will be [[6., 2.], [5., 4.]] # We need to create full indices like [[0, 0], [0, 1], [1, 2], [1, 1]] my_range = tf.expand_dims(tf.range(0, tf.shape(indices)[0]), 1) # will be [[0], [1]] my_range_repeated = tf.tile(my_range, [1, k]) # will be [[0, 0], [1, 1]] full_indices = tf.stack([my_range_repeated, indices], axis=2) # change shapes to [N, k, 1] and [N, k, 1], to concatenate into [N, k, 2] full_indices = tf.reshape(full_indices, [-1, 2]) to_reset = tf.sparse_to_dense(full_indices, tf.shape(x), tf.reshape(values, [-1]), default_value=0., validate_indices=False) res = tf.add(x, to_reset) return res
def loss(self, img_batch, label_batch): """Create the network, run inference on the input batch and compute loss. Args: input_batch: batch of pre-processed images. Returns: Pixel-wise softmax loss. """ raw_output = self._create_network(tf.cast(img_batch, tf.float32), keep_prob=tf.constant(0.5)) prediction = tf.reshape(raw_output, [-1, n_classes]) # Need to resize labels and convert using one-hot encoding. label_batch = self.prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3])) gt = tf.reshape(label_batch, [-1, n_classes]) # Pixel-wise softmax loss. loss = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=gt) reduced_loss = tf.reduce_mean(loss) return reduced_loss
def feed_network(self,data,keep_prob,chunk_size,n_chunks, dynamic): # This code is copied from tflearn sequence_lengths = None if dynamic: sequence_lengths = net.calc_seqlenth(data if isinstance(data, tf.Tensor) else tf.stack(data)) batch_size = tf.shape(data)[0] weight_dropout = tf.nn.dropout(self._layer_weights, keep_prob) rnn_dropout = rnn.core_rnn_cell.DropoutWrapper(self._lstm_cell,output_keep_prob=keep_prob) # Calculation Begin input_shape = data.get_shape().as_list() ndim = len(input_shape) axis = [1, 0] + list(range(2,ndim)) data = tf.transpose(data,(axis)) sequence = tf.unstack(data) outputs, states = rnn.static_rnn(rnn_dropout, sequence, dtype=tf.float32, sequence_length = sequence_lengths) if dynamic: outputs = tf.transpose(tf.stack(outputs), [1, 0, 2]) output = net.advanced_indexing_op(outputs, sequence_lengths) else: output = outputs[-1] output = tf.add(tf.matmul(output,weight_dropout), self._layer_biases) return output
def combine_gradients(tower_grads): """Calculate the combined gradient for each shared variable across all towers. Note that this function provides a synchronization point across all towers. Args: tower_grads: List of lists of (gradient, variable) tuples. The outer list is over individual gradients. The inner list is over the gradient calculation for each tower. Returns: List of pairs of (gradient, variable) where the gradient has been summed across all towers. """ filtered_grads = [[x for x in grad_list if x[0] is not None] for grad_list in tower_grads] final_grads = [] for i in xrange(len(filtered_grads[0])): grads = [filtered_grads[t][i] for t in xrange(len(filtered_grads))] grad = tf.stack([x[0] for x in grads], 0) grad = tf.reduce_sum(grad, 0) final_grads.append((grad, filtered_grads[0][i][1],)) return final_grads
def generate_mask(img_mask_list, h, w, l): img_masks, loss_masks = [], [] for i in range(l): # generate image mask img_mask = img_mask_list[i] img_mask = tf.cast(tf.image.decode_png(img_mask), tf.float32) img_mask = tf.reshape(img_mask, (h, w)) img_masks.append(img_mask) # generate loss mask s_total = h * w s_mask = tf.reduce_sum(img_mask) def f1(): return img_mask*((s_total-s_mask)/s_mask-1)+1 def f2(): return tf.zeros_like(img_mask) def f3(): return tf.ones_like(img_mask) loss_mask = tf.case([(tf.equal(s_mask, 0), f2), \ (tf.less(s_mask, s_total/2), f1)], default=f3) loss_masks.append(loss_mask) return tf.stack(img_masks), tf.stack(loss_masks)
def calculate_allocation_weighting(self, usage_vector): """ :param: usage vector: tensor of shape [batch_size, memory_size] :return: allocation tensor of shape [batch_size, memory_size] """ usage_vector = Memory.epsilon + (1 - Memory.epsilon) * usage_vector # We're sorting the "-self.usage_vector" because top_k returns highest values and we need the lowest highest_usage, inverse_indices = tf.nn.top_k(-usage_vector, k=self.memory_size) lowest_usage = -highest_usage allocation_scrambled = (1 - lowest_usage) * tf.cumprod(lowest_usage, axis=1, exclusive=True) # allocation is not in the correct order. alloation[i] contains the sorted[i] value # reversing the already inversed indices for each batch indices = tf.stack([tf.invert_permutation(batch_indices) for batch_indices in tf.unstack(inverse_indices)]) allocation = tf.stack([tf.gather(mem, ind) for mem, ind in zip(tf.unstack(allocation_scrambled), tf.unstack(indices))]) return allocation
def read_and_decode(filename_queue, batch_size): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) feature = features() feature = tf.parse_single_example( serialized_example, features = feature, ) hr_image = tf.decode_raw(feature['hr_image'], tf.uint8) height = tf.cast(feature['height'], tf.int32) width = tf.cast(feature['width'], tf.int32) print(height) image_shape = tf.stack([128, 128,3 ]) hr_image = tf.reshape(hr_image, image_shape) hr_image = tf.image.random_flip_left_right(hr_image) hr_image = tf.image.random_contrast(hr_image, 0.5, 1.3) hr_images = tf.train.shuffle_batch([hr_image], batch_size = batch_size, capacity = 30, num_threads = 2, min_after_dequeue = 10) return hr_images
def _get_top_k(scores1, scores2, k, max_span_size, support2question): max_support_length = tf.shape(scores1)[1] doc_idx, pointer1, topk_scores1 = segment_top_k(scores1, support2question, k) # [num_questions * beam_size] doc_idx_flat = tf.reshape(doc_idx, [-1]) pointer_flat1 = tf.reshape(pointer1, [-1]) # [num_questions * beam_size, support_length] scores_gathered2 = tf.gather(scores2, doc_idx_flat) if max_span_size < 0: pointer_flat1, max_span_size = pointer_flat1 + max_span_size + 1, -max_span_size left_mask = misc.mask_for_lengths(tf.cast(pointer_flat1, tf.int32), max_support_length, mask_right=False) right_mask = misc.mask_for_lengths(tf.cast(pointer_flat1 + max_span_size, tf.int32), max_support_length) scores_gathered2 = scores_gathered2 + left_mask + right_mask pointer2 = tf.argmax(scores_gathered2, axis=1, output_type=tf.int32) topk_score2 = tf.gather_nd(scores2, tf.stack([doc_idx_flat, pointer2], 1)) return doc_idx, pointer1, tf.reshape(pointer2, [-1, k]), topk_scores1 + tf.reshape(topk_score2, [-1, k])
def distance_biases(time_steps, window_size=10, reuse=False): """ Return a 2-d tensor with the values of the distance biases to be applied on the intra-attention matrix of size sentence_size Args: time_steps: tensor scalar window_size: window size reuse: reuse variables Returns: 2-d tensor (time_steps, time_steps) """ with tf.variable_scope('distance-bias', reuse=reuse): # this is d_{i-j} distance_bias = tf.get_variable('dist_bias', [window_size], initializer=tf.zeros_initializer()) r = tf.range(0, time_steps) r_matrix = tf.tile(tf.reshape(r, [1, -1]), tf.stack([time_steps, 1])) raw_idxs = r_matrix - tf.reshape(r, [-1, 1]) clipped_idxs = tf.clip_by_value(raw_idxs, 0, window_size - 1) values = tf.nn.embedding_lookup(distance_bias, clipped_idxs) return values
def apply_time_pooling(inputs, sequence_length, stride, pooling_avg=False): shape = [tf.shape(inputs)[0], tf.shape(inputs)[1], inputs.get_shape()[2].value] if pooling_avg: inputs_ = [inputs[:, i::stride, :] for i in range(stride)] max_len = tf.shape(inputs_[0])[1] for k in range(1, stride): len_ = tf.shape(inputs_[k])[1] paddings = tf.stack([[0, 0], [0, max_len - len_], [0, 0]]) inputs_[k] = tf.pad(inputs_[k], paddings=paddings) inputs = tf.reduce_sum(inputs_, axis=0) / len(inputs_) else: inputs = inputs[:, ::stride, :] inputs = tf.reshape(inputs, tf.stack([shape[0], tf.shape(inputs)[1], shape[2]])) sequence_length = (sequence_length + stride - 1) // stride # rounding up return inputs, sequence_length
def reinforce_baseline(decoder_states, reward): """ Center the reward by computing a baseline reward over decoder states. :param decoder_states: internal states of the decoder, tensor of shape (batch_size, time_steps, state_size) :param reward: reward for each time step, tensor of shape (batch_size, time_steps) :return: reward - computed baseline, tensor of shape (batch_size, time_steps) """ # batch_size = tf.shape(decoder_states)[0] # time_steps = tf.shape(decoder_states)[1] # state_size = decoder_states.get_shape()[2] # states = tf.reshape(decoder_states, shape=tf.stack([batch_size * time_steps, state_size])) baseline = dense(tf.stop_gradient(decoder_states), units=1, activation=None, name='reward_baseline', kernel_initializer=tf.constant_initializer(0.01)) baseline = tf.squeeze(baseline, axis=2) # baseline = tf.reshape(baseline, shape=tf.stack([batch_size, time_steps])) return reward - baseline
def zoomout(image, gt_bboxes, params): X_out = tf.random_uniform([], 1.05, params['X_out']) h, w, _ = tf.unstack(tf.to_float(tf.shape(image))) zoomout_color = params['zoomout_color']+[0] bg_color = tf.constant(zoomout_color, dtype=tf.float32) x_shift = tf.random_uniform([], 0, (X_out - 1) * w) y_shift = tf.random_uniform([], 0, (X_out - 1) * h) x2_shift = (X_out - 1) * w - x_shift y2_shift = (X_out - 1) * h - y_shift # somewhat hacky solution to pad with MEAN_COLOR # tf.pad does not support custom constant padding unlike numpy image -= bg_color image = tf.pad(image, tf.to_int32([[y_shift, y2_shift], [x_shift, x2_shift], [0, 0]])) image += bg_color gt_x, gt_y, gt_w, gt_h = tf.unstack(gt_bboxes, axis=1) gt_bboxes = tf.stack([gt_x + x_shift/w, gt_y + y_shift/h, gt_w, gt_h], axis=1)/X_out return image, gt_bboxes
def encode_bboxes_tf(proposals, gt, config): """Encode bbox coordinates in a format used for computing the loss""" prop_x = proposals[..., 0] prop_y = proposals[..., 1] prop_w = proposals[..., 2] prop_h = proposals[..., 3] gt_x = gt[..., 0] gt_y = gt[..., 1] gt_w = gt[..., 2] gt_h = gt[..., 3] diff_x = (gt_x + 0.5*gt_w - prop_x - 0.5*prop_w)/prop_w diff_y = (gt_y + 0.5*gt_h - prop_y - 0.5*prop_h)/prop_h diff_w = tf.log(gt_w/prop_w) diff_h = tf.log(gt_h/prop_h) var_x, var_y, var_w, var_h = config['prior_variance'] x = tf.stack([diff_x/var_x, diff_y/var_y, diff_w/var_w, diff_h/var_h], -1) return x
def decode_bboxes(tcoords, anchors): var_x, var_y, var_w, var_h = config['prior_variance'] t_x = tcoords[:, 0]*var_x t_y = tcoords[:, 1]*var_y t_w = tcoords[:, 2]*var_w t_h = tcoords[:, 3]*var_h a_w = anchors[:, 2] a_h = anchors[:, 3] a_x = anchors[:, 0]+a_w/2 a_y = anchors[:, 1]+a_h/2 x = t_x*a_w + a_x y = t_y*a_h + a_y w = tf.exp(t_w)*a_w h = tf.exp(t_h)*a_h x1 = tf.maximum(0., x - w/2) y1 = tf.maximum(0., y - h/2) x2 = tf.minimum(1., w + x1) y2 = tf.minimum(1., h + y1) return tf.stack([y1, x1, y2, x2], axis=1)
def _flat_reconstruction_loss(self, flat_x_target, flat_rnn_output): split_x_target = tf.split(flat_x_target, self._output_depths, axis=-1) split_rnn_output = tf.split( flat_rnn_output, self._output_depths, axis=-1) losses = [] truths = [] predictions = [] metric_map = {} for i in range(len(self._output_depths)): l, m, t, p = ( super(MultiOutCategoricalLstmDecoder, self)._flat_reconstruction_loss( split_x_target[i], split_rnn_output[i])) losses.append(l) truths.append(t) predictions.append(p) for k, v in m.items(): metric_map['%s_%d' % (k, i)] = v return (tf.reduce_sum(losses, axis=0), metric_map, tf.stack(truths), tf.stack(predictions))
def loss_wrapper(y, y_, loss_function, transitions=None, nums_tags=None, batch_size=None, weights=None, average_cross_steps=True): assert len(y) == len(y_) total_loss = [] if loss_function is crf_loss: #print len(y), len(transitions), len(nums_tags) assert len(y) == len(transitions) and len(transitions) == len(nums_tags) and batch_size is not None for sy, sy_, stranstion, snums_tags in zip(y, y_, transitions, nums_tags): total_loss.append(loss_function(sy, sy_, stranstion, snums_tags, batch_size)) elif loss_function is cross_entropy: assert len(y) == len(nums_tags) for sy, sy_, snums_tags in zip(y, y_, nums_tags): total_loss.append(loss_function(sy, sy_, snums_tags)) elif loss_function is sparse_cross_entropy: for sy, sy_ in zip(y, y_): total_loss.append(loss_function(sy, sy_)) elif loss_function is sparse_cross_entropy_with_weights: assert len(y) == len(nums_tags) for sy, sy_, snums_tags in zip(y, y_): total_loss.append(tf.reshape(loss_function(sy, sy_, weights=weights, average_cross_steps=average_cross_steps), [-1])) else: for sy, sy_ in zip(y, y_): total_loss.append(tf.reshape(loss_function(sy, sy_), [-1])) return tf.stack(total_loss)
def combine_gradients(tower_grads): """Calculate the combined gradient for each shared variable across all towers. Note that this function provides a synchronization point across all towers. Args: tower_grads: List of lists of (gradient, variable) tuples. The outer list is over individual gradients. The inner list is over the gradient calculation for each tower. Returns: List of pairs of (gradient, variable) where the gradient has been summed across all towers. """ filtered_grads = [[x for x in grad_list if x[0] is not None] for grad_list in tower_grads] final_grads = [] for i in range(len(filtered_grads[0])): grads = [filtered_grads[t][i] for t in range(len(filtered_grads))] grad = tf.stack([x[0] for x in grads], 0) grad = tf.reduce_sum(grad, 0) final_grads.append((grad, filtered_grads[0][i][1],)) return final_grads
def __call__(self, input_layer, output_size, scope=None, in_dim=None, stddev=0.02, bias_start=0.0): shape = input_layer.shape input_ = input_layer.tensor if True:#try: if len(shape) == 4: input_ = tf.reshape(input_, tf.stack([tf.shape(input_)[0], np.prod(shape[1:])])) input_.set_shape([None, np.prod(shape[1:])]) shape = input_.get_shape().as_list() with tf.variable_scope(scope or "Linear"): matrix = self.variable("Matrix", [in_dim or shape[1], output_size], dt=tf.float32, init=tf.random_normal_initializer(stddev=stddev)) bias = self.variable("bias", [output_size], init=tf.constant_initializer(bias_start)) return input_layer.with_tensor(tf.matmul(input_, matrix) + bias, parameters=self.vars) #except Exception: # import ipdb; ipdb.set_trace()
def transition(self, curr_state, next_symbols, batch_size): with tf.name_scope('grammar_transition'): transitions = tf.gather(tf.constant(self.transition_matrix), curr_state) assert transitions.get_shape()[1:] == (self.output_size,) indices = tf.stack((tf.range(0, batch_size), next_symbols), axis=1) next_state = tf.gather_nd(transitions, indices) return next_state
def ctc_label_dense_to_sparse(labels, label_lengths, batch_size): # The second dimension of labels must be equal to the longest label length in the batch correct_shape_assert = tf.assert_equal(tf.shape(labels)[1], tf.reduce_max(label_lengths)) with tf.control_dependencies([correct_shape_assert]): labels = tf.identity(labels) label_shape = tf.shape(labels) num_batches_tns = tf.stack([label_shape[0]]) max_num_labels_tns = tf.stack([label_shape[1]]) def range_less_than(previous_state, current_input): return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool) init = tf.expand_dims(init, 0) dense_mask = tf.scan(range_less_than, label_lengths, initializer=init, parallel_iterations=1) dense_mask = dense_mask[:, 0, :] label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape) label_ind = tf.boolean_mask(label_array, dense_mask) batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), tf.reverse(label_shape, [0]))) batch_ind = tf.boolean_mask(batch_array, dense_mask) indices = tf.transpose(tf.reshape(tf.concat([batch_ind, label_ind], 0), [2, -1])) shape = [batch_size, tf.reduce_max(label_lengths)] vals_sparse = gather_nd(labels, indices, shape) return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape)) # Validate and normalize transcriptions. Returns a cleaned version of the label # or None if it's invalid.
def create_model(self, model_input, vocab_size, num_frames, **unused_params): shape = model_input.get_shape().as_list() frames_sum = tf.reduce_sum(tf.abs(model_input),axis=2) frames_true = tf.ones(tf.shape(frames_sum)) frames_false = tf.zeros(tf.shape(frames_sum)) frames_bool = tf.reshape(tf.where(tf.greater(frames_sum, frames_false), frames_true, frames_false),[-1,shape[1],1]) activation_1 = tf.reduce_max(model_input, axis=1) activation_2 = tf.reduce_sum(model_input*frames_bool, axis=1)/(tf.reduce_sum(frames_bool, axis=1)+1e-6) activation_3 = tf.reduce_min(model_input, axis=1) model_input_1, final_probilities_1 = self.sub_moe(activation_1,vocab_size,scopename="_max") model_input_2, final_probilities_2 = self.sub_moe(activation_2,vocab_size,scopename="_mean") model_input_3, final_probilities_3 = self.sub_moe(activation_3,vocab_size,scopename="_min") final_probilities = tf.stack((final_probilities_1,final_probilities_2,final_probilities_3),axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[shape[2], 3, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) activations = tf.stack((model_input_1, model_input_2, model_input_3), axis=2) weight = tf.nn.softmax(tf.einsum("aij,ijk->ajk", activations, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size]) result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1) return result
def rnn(self, model_input, lstm_size, num_frames,sub_scope="", **unused_params): """Creates a model which uses a stack of LSTMs to represent the video. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ ## Batch normalize the input stacked_lstm = tf.contrib.rnn.MultiRNNCell( [ tf.contrib.rnn.BasicLSTMCell( lstm_size, forget_bias=1.0, state_is_tuple=True) for _ in range(1) ], state_is_tuple=True) with tf.variable_scope("RNN-"+sub_scope): outputs, state = tf.nn.dynamic_rnn(stacked_lstm, model_input, sequence_length=num_frames, swap_memory=True, dtype=tf.float32) state_out = tf.concat(map(lambda x: x.c, state), axis=1) return state_out
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = num_extend lstm_size = FLAGS.lstm_cells pool_size=2 cnn_input = model_input num_filters=[256,256,512] filter_sizes=[1,2,3] features_size = sum(num_filters) final_probilities = [] moe_inputs = [] for layer in range(num_layers): cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) cnn_output = tf.nn.relu(cnn_output) cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1)) moe_inputs.append(cnn_multiscale) final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1)) final_probilities.append(final_probility) num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) num_frames = tf.maximum(num_frames//pool_size,1) final_probilities = tf.stack(final_probilities,axis=1) moe_inputs = tf.stack(moe_inputs,axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, features_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size]) result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1) return result