我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.range()。
def sparse_tuple_from(sequences, dtype=np.int32): r"""Creates a sparse representention of ``sequences``. Args: * sequences: a list of lists of type dtype where each element is a sequence Returns a tuple with (indices, values, shape) """ indices = [] values = [] for n, seq in enumerate(sequences): indices.extend(zip([n]*len(seq), range(len(seq)))) values.extend(seq) indices = np.asarray(indices, dtype=np.int64) values = np.asarray(values, dtype=dtype) shape = np.asarray([len(sequences), indices.max(0)[1]+1], dtype=np.int64) return tf.SparseTensor(indices=indices, values=values, shape=shape)
def value_transition(self, curr_state, next_symbols, batch_size): first_value_token = self.num_functions + self.num_begin_tokens + self.num_control_tokens num_value_tokens = self.output_size - first_value_token with tf.name_scope('grammar_transition'): adjusted_next_symbols = tf.where(next_symbols >= self.num_control_tokens, next_symbols + (first_value_token - self.num_control_tokens), next_symbols) assert1 = tf.Assert(tf.reduce_all(tf.logical_and(next_symbols < num_value_tokens, next_symbols >= 0)), [curr_state, next_symbols]) with tf.control_dependencies([assert1]): transitions = tf.gather(tf.constant(self.transition_matrix), curr_state) assert transitions.get_shape()[1:] == (self.output_size,) indices = tf.stack((tf.range(0, batch_size), adjusted_next_symbols), axis=1) next_state = tf.gather_nd(transitions, indices) assert2 = tf.Assert(tf.reduce_all(next_state >= 0), [curr_state, adjusted_next_symbols, next_state]) with tf.control_dependencies([assert2]): return tf.identity(next_state)
def wers(originals, results): count = len(originals) rates = [] mean = 0.0 assert count == len(results) for i in range(count): rate = wer(originals[i], results[i]) mean = mean + rate rates.append(rate) return rates, mean / float(count) # The following code is from: http://hetland.org/coding/python/levenshtein.py # This is a straightforward implementation of a well-known algorithm, and thus # probably shouldn't be covered by copyright to begin with. But in case it is, # the author (Magnus Lie Hetland) has, to the extent possible under law, # dedicated all copyright and related and neighboring rights to this software # to the public domain worldwide, by distributing it under the CC0 license, # version 1.0. This software is distributed without any warranty. For more # information, see <http://creativecommons.org/publicdomain/zero/1.0>
def levenshtein(a,b): "Calculates the Levenshtein distance between a and b." n, m = len(a), len(b) if n > m: # Make sure n <= m, to use O(min(n,m)) space a,b = b,a n,m = m,n current = list(range(n+1)) for i in range(1,m+1): previous, current = current, [i]+[0]*n for j in range(1,n+1): add, delete = previous[j]+1, current[j-1]+1 change = previous[j-1] if a[j-1] != b[i-1]: change = change + 1 current[j] = min(add, delete, change) return current[n] # gather_nd is taken from https://github.com/tensorflow/tensorflow/issues/206#issuecomment-229678962 # # Unfortunately we can't just use tf.gather_nd because it does not have gradients # implemented yet, so we need this workaround. #
def highway(self, input_1, input_2, size_1, size_2, l2_penalty=1e-8, layer_size=1): output = input_2 for idx in range(layer_size): with tf.name_scope('output_lin_%d' % idx): W = tf.Variable(tf.truncated_normal([size_2,size_1], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[size_1]), name="b") tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(W)) tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(b)) output = tf.nn.relu(tf.nn.xw_plus_b(output,W,b)) with tf.name_scope('transform_lin_%d' % idx): W = tf.Variable(tf.truncated_normal([size_1,size_1], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[size_1]), name="b") tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(W)) tf.add_to_collection(name=tf.GraphKeys.REGULARIZATION_LOSSES, value=l2_penalty*tf.nn.l2_loss(b)) transform_gate = tf.sigmoid(tf.nn.xw_plus_b(input_1,W,b)) carry_gate = tf.constant(1.0) - transform_gate output = transform_gate * output + carry_gate * input_1 return output
def q_value(q_dist, num_atoms, num_actions, V_max, delta_z): V_min = -V_max start = V_min end = V_max + delta_z delta = delta_z z = tf.range(start, end, delta) q_as = [] for action in range(num_actions): dist = q_dist[:, num_atoms*action: num_atoms*(action+1)] q_a = tf.reduce_sum(tf.multiply(dist, z), axis = 1, keep_dims = True) q_as.append(q_a) q_values = tf.concat(q_as, axis=1) return q_values
def _validate(self, machine, n=10): N = n * n # same row same z z = tf.random_normal(shape=[n, self.arch['z_dim']]) z = tf.tile(z, [1, n]) z = tf.reshape(z, [N, -1]) z = tf.Variable(z, trainable=False, dtype=tf.float32) # same column same y y = tf.range(0, 10, 1, dtype=tf.int64) y = tf.reshape(y, [-1, 1]) y = tf.tile(y, [n, 1]) Xh = machine.generate(z, y) # 100, 64, 64, 3 # Xh = gray2jet(Xh) # Xh = make_png_thumbnail(Xh, n) Xh = make_png_jet_thumbnail(Xh, n) return Xh
def repeat(tensor: tf.Tensor, repeats: int, axis: int) -> tf.Tensor: """ Repeat elements of the input tensor in the specified axis ``repeats``-times. .. note:: Chaining of this op may produce TF warnings although the performance seems to be unaffected. :param tensor: TF tensor to be repeated :param repeats: number of repeats :param axis: axis to repeat :return: tensor with repeated elements """ shape = tensor.get_shape().as_list() dims = np.arange(len(tensor.shape)) prepare_perm = np.hstack(([axis], np.delete(dims, axis))) restore_perm = np.hstack((dims[1:axis+1], [0], dims[axis+1:])) indices = tf.cast(tf.floor(tf.range(0, shape[axis]*repeats)/tf.constant(repeats)), 'int32') shuffled = tf.transpose(tensor, prepare_perm) repeated = tf.gather(shuffled, indices) return tf.transpose(repeated, restore_perm)
def batch_transformer(U, thetas, out_size, name='BatchSpatialTransformer'): """Batch Spatial Transformer Layer Parameters ---------- U : float tensor of inputs [num_batch,height,width,num_channels] thetas : float a set of transformations for each input [num_batch,num_transforms,6] out_size : int the size of the output [out_height,out_width] Returns: float Tensor of size [num_batch*num_transforms,out_height,out_width,num_channels] """ with tf.variable_scope(name): num_batch, num_transforms = map(int, thetas.get_shape().as_list()[:2]) indices = [[i]*num_transforms for i in range(num_batch)] input_repeated = tf.gather(U, tf.reshape(indices, [-1])) return transformer(input_repeated, thetas, out_size)
def _rnn_attention_decoder(self, decoder_cell, training_wheels): loop_fn = self._custom_rnn_loop_fn(decoder_cell.output_size, training_wheels=training_wheels) decoder_outputs, _, (context_vectors_array, attention_logits_array, pointer_probability_array) = \ tf.nn.raw_rnn(decoder_cell, loop_fn, swap_memory=True) decoder_outputs = decoder_outputs.stack() decoder_outputs = tf.transpose(decoder_outputs, [1, 0, 2]) attention_logits = attention_logits_array.gather(tf.range(0, attention_logits_array.size() - 1)) attention_logits = tf.transpose(attention_logits, [1, 0, 2]) context_vectors = context_vectors_array.gather(tf.range(0, context_vectors_array.size() - 1)) context_vectors = tf.transpose(context_vectors, [1, 0, 2]) pointer_probabilities = pointer_probability_array.gather(tf.range(0, pointer_probability_array.size() - 1)) pointer_probabilities = tf.transpose(pointer_probabilities, [1, 0]) return decoder_outputs, context_vectors, attention_logits, pointer_probabilities
def setup_reader(self, image_paths, image_shape, num_concurrent, batch_size): # Path queue is list of image paths which will further be processed by another queue num_images = len(image_paths) indices = tf.range(0, num_images, 1) self.path_queue = tf.FIFOQueue(capacity=num_images, dtypes=[tf.int32, tf.string], name='path_queue') self.enqueue_path = self.path_queue.enqueue_many([indices, image_paths]) self.close_path = self.path_queue.close() processed_queue = tf.FIFOQueue(capacity=num_images, dtypes=[tf.int32, tf.float32], shapes=[(), image_shape], name='processed_queue') (idx, processed_image) = self.process() enqueue_process = processed_queue.enqueue([idx, processed_image]) self.dequeue_batch = processed_queue.dequeue_many(batch_size) self.queue_runner = tf.train.QueueRunner(processed_queue, [enqueue_process] * num_concurrent)
def distance_biases(time_steps, window_size=10, reuse=False): """ Return a 2-d tensor with the values of the distance biases to be applied on the intra-attention matrix of size sentence_size Args: time_steps: tensor scalar window_size: window size reuse: reuse variables Returns: 2-d tensor (time_steps, time_steps) """ with tf.variable_scope('distance-bias', reuse=reuse): # this is d_{i-j} distance_bias = tf.get_variable('dist_bias', [window_size], initializer=tf.zeros_initializer()) r = tf.range(0, time_steps) r_matrix = tf.tile(tf.reshape(r, [1, -1]), tf.stack([time_steps, 1])) raw_idxs = r_matrix - tf.reshape(r, [-1, 1]) clipped_idxs = tf.clip_by_value(raw_idxs, 0, window_size - 1) values = tf.nn.embedding_lookup(distance_bias, clipped_idxs) return values
def dot(x, y): '''Multiplies 2 tensors. When attempting to multiply a ND tensor with a ND tensor, reproduces the Theano behavior (e.g. (2, 3).(4, 3, 5) = (2, 4, 5)) ''' if ndim(x) is not None and (ndim(x) > 2 or ndim(y) > 2): x_shape = (-1,) + int_shape(x)[1:] y_shape = int_shape(y) y_permute_dim = list(range(ndim(y))) y_permute_dim = [y_permute_dim.pop(-2)] + y_permute_dim xt = tf.reshape(x, [-1, x_shape[-1]]) yt = tf.reshape(tf.transpose(y, perm=y_permute_dim), [y_shape[-2], -1]) return tf.reshape(tf.matmul(xt, yt), x_shape[:-1] + y_shape[:-2] + y_shape[-1:]) if is_sparse(x): out = tf.sparse_tensor_dense_matmul(x, y) else: out = tf.matmul(x, y) return out
def batch_gather(tensor, indices): """Gather in batch from a tensor of arbitrary size. In pseduocode this module will produce the following: output[i] = tf.gather(tensor[i], indices[i]) Args: tensor: Tensor of arbitrary size. indices: Vector of indices. Returns: output: A tensor of gathered values. """ shape = get_shape(tensor) flat_first = tf.reshape(tensor, [shape[0] * shape[1]] + shape[2:]) indices = tf.convert_to_tensor(indices) offset_shape = [shape[0]] + [1] * (indices.shape.ndims - 1) offset = tf.reshape(tf.range(shape[0]) * shape[1], offset_shape) output = tf.gather(flat_first, indices + offset) return output
def batch_gather(reference, indices): '''Batchwise gathering of row indices. The numpy equivalent is reference[np.arange(batch_size), indices]. # Arguments reference: tensor with ndim >= 2 of shape (batch_size, dim1, dim2, ..., dimN) indices: 1d integer tensor of shape (batch_size) satisfiying 0 <= i < dim2 for each element i. # Returns A tensor with shape (batch_size, dim2, ..., dimN) equal to reference[1:batch_size, indices] ''' batch_size = K.shape(reference)[0] indices = tf.pack([tf.range(batch_size), indices], axis=1) return tf.gather_nd(reference, indices)
def rnn(self, sequence, sequence_length, max_length, dropout, batch_size, training, num_hidden=TC_MODEL_HIDDEN, num_layers=TC_MODEL_LAYERS): # Recurrent network. cells = [] for _ in range(num_layers): cell = tf.nn.rnn_cell.GRUCell(num_hidden) if training: cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=dropout) cells.append(cell) network = tf.nn.rnn_cell.MultiRNNCell(cells) type = sequence.dtype sequence_output, _ = tf.nn.dynamic_rnn(network, sequence, dtype=tf.float32, sequence_length=sequence_length, initial_state=network.zero_state(batch_size, type)) # get last output of the dynamic_rnn sequence_output = tf.reshape(sequence_output, [batch_size * max_length, num_hidden]) indexes = tf.range(batch_size) * max_length + (sequence_length - 1) output = tf.gather(sequence_output, indexes) return output
def rnn(self, sequence, sequence_length, max_length, dropout, batch_size, training, num_hidden=TC_MODEL_HIDDEN, num_layers=TC_MODEL_LAYERS): # Recurrent network. cell_fw = tf.nn.rnn_cell.GRUCell(num_hidden) cell_bw = tf.nn.rnn_cell.GRUCell(num_hidden) type = sequence.dtype (fw_outputs, bw_outputs), _ = \ tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw, cell_bw=cell_bw, initial_state_fw=cell_fw.zero_state(batch_size, type), initial_state_bw=cell_bw.zero_state(batch_size, type), inputs=sequence, dtype=tf.float32, swap_memory=True, sequence_length=sequence_length) sequence_output = tf.concat((fw_outputs, bw_outputs), 2) # get last output of the dynamic_rnn sequence_output = tf.reshape(sequence_output, [batch_size * max_length, num_hidden * 2]) indexes = tf.range(batch_size) * max_length + (sequence_length - 1) output = tf.gather(sequence_output, indexes) return output
def segment_indices(segment_ids, name=None): """Returns a `Tensor` of indices within each segment. segment_ids should be a sequence of non-decreasing non-negative integers that define a set of segments, e.g. [0, 0, 1, 2, 2, 2] defines 3 segments of length 2, 1 and 3. The return value is a `Tensor` containing the indices within each segment. Example input: [0, 0, 1, 2, 2, 2] Example output: [0, 1, 0, 0, 1, 2] Args: segment_ids: A 1-d `Tensor` containing an non-decreasing sequence of non-negative integers with type `tf.int32` or `tf.int64`. name: (Optional) A name for this operation. Returns: A `Tensor` containing the indices within each segment. """ with tf.name_scope(name, 'segment_indices'): segment_lengths = tf.segment_sum(tf.ones_like(segment_ids), segment_ids) segment_starts = tf.gather(tf.concat([[0], tf.cumsum(segment_lengths)], 0), segment_ids) return (tf.range(tf.size(segment_ids, out_type=segment_ids.dtype)) - segment_starts)
def sample_rp_sequence(self, sequence_size=1): from_zero = True if np.random.randint(2) == 1 or len(self._zero_reward_indices) == 0: from_zero = False if len(self._non_zero_reward_indices) == 0: from_zero = True if from_zero: start_pos = np.random.randint(0, len(self._zero_reward_indices) - sequence_size + 1) if not from_zero: start_pos = np.random.randint(0, len(self._non_zero_reward_indices) - sequence_size + 1) sampled_frames = [] for i in range(sequence_size): if from_zero: frame = self._zero_reward_indices[start_pos+i] if not from_zero: frame = self._non_zero_reward_indices[start_pos+i] sampled_frames.append(frame) if frame.done: break return sampled_frames
def train(self): self.train_op = self.optim.minimize(self.loss, global_step=self.global_step) self.writer = tf.train.SummaryWriter("./logs/D_pretrained", self.sess.graph) self.summary_op = tf.merge_all_summaries() tf.initialize_all_variables().run() self.saver = tf.train.Saver(var_list=self.D_params_dict, max_to_keep=self.max_to_keep) count = 0 for idx in range(self.max_iter//3000): self.save(self.checkpoint_dir, count) self.evaluate('test', count) self.evaluate('train', count) for k in tqdm(range(3000)): right_images, right_text, _ = self.dataset.sequential_sample(self.batch_size) right_length = np.sum((right_text!=self.NOT)+0, 1) fake_images, fake_text, _ = self.negative_dataset.sequential_sample(self.batch_size) fake_length = np.sum((fake_text!=self.NOT)+0, 1) wrong_text = self.dataset.get_wrong_text(self.batch_size) wrong_length = np.sum((wrong_text!=self.NOT)+0, 1) feed_dict = {self.right_images:right_images, self.right_text:right_text, self.right_length:right_length, self.fake_images:fake_images, self.fake_text:fake_text, self.fake_length:fake_length, self.wrong_images:right_images, self.wrong_text:wrong_text, self.wrong_length:wrong_length} _, loss, summary_str = self.sess.run([self.train_op, self.loss, self.summary_op], feed_dict) self.writer.add_summary(summary_str, count) count += 1
def pos_loss_pred(self, i, pos_embeddings, pos_logit, NUM_POS, gold_pos, pos_trainables): if self.args.no_pos: pos_emb = tf.nn.embedding_lookup(pos_embeddings, gold_pos[i]) if self.train: return 0, pos_emb else: return tf.gather(gold_pos[i], tf.range(1, self.sent_length)), pos_emb else: pos_logit = pos_logit[1:] log_partition = tf.reduce_logsumexp(pos_logit, [1]) pos_pred = tf.exp(pos_logit - tf.reshape(log_partition, (-1, 1))) pos_emb = tf.concat([tf.reshape(tf.nn.embedding_lookup(pos_embeddings, NUM_POS), (1, -1)), tf.matmul(pos_pred, pos_trainables)], 0) if self.train: loss = tf.reduce_sum(tf.gather(log_partition, tf.range(self.sent_lengths[i]-1)) - tf.gather(tf.reshape(pos_logit, [-1]), tf.range(self.sent_lengths[i]-1) * NUM_POS + tf.gather(gold_pos[i], tf.range(1, self.sent_lengths[i])))) return loss, pos_emb else: return tf.cast(tf.argmax(pos_pred, 1), tf.int32), pos_emb
def _max_pool_grad_grad(dy, x, y, ksize, strides, padding, argmax=None): """Gradients of MaxPoolGrad.""" if argmax is None: _, argmax = tf.nn.max_pool_with_argmax(x, ksize, strides, padding) grad = dy grad_flat = tf.reshape(grad, [-1]) argmax_flat = tf.reshape(argmax, [-1]) x_shape = tf.cast(tf.shape(x), argmax.dtype) batch_dim = tf.reshape( tf.range( x_shape[0], dtype=argmax.dtype), [-1, 1, 1, 1]) nelem = tf.reduce_prod(x_shape[1:]) batch_dim *= nelem y_zero = tf.zeros_like(y, dtype=argmax.dtype) batch_dim += y_zero batch_dim = tf.reshape(batch_dim, [-1]) argmax_flat += batch_dim grad_input = tf.gather(grad_flat, argmax_flat) grad_input = tf.reshape(grad_input, tf.shape(y)) return grad_input
def ternary_encoder(input_data): """Encoding and compressing the signs """ a = tf.sign(input_data) # -1, 0, 1 a = tf.add(a,1) # shift -1,0,1 to 0,1,2 (2'b00,2'b01,2'b10) a = tf.reshape(a,[-1]) pad_size = 4 - tf.mod(tf.size(a), 4) pad = tf.range(0.0, pad_size) a = tf.concat([a, pad], 0) a_split1, a_split2, a_split3, a_split4 = tf.split(a,4) # assume the size is dividable by 4 # encode 4 grads into 1 Byte sum_1 = tf.add(a_split1, a_split2*4) sum_2 = tf.add(a_split3*16, a_split4*64) sum_all = tf.add(sum_1, sum_2) encoded = tf.cast(sum_all, tf.uint8) return encoded
def SampleRandomFrames(model_input, num_frames, num_samples): """Samples a random set of frames of size num_samples. Args: model_input: A tensor of size batch_size x max_frames x feature_size num_frames: A tensor of size batch_size x 1 num_samples: A scalar Returns: `model_input`: A tensor of size batch_size x num_samples x feature_size """ batch_size = tf.shape(model_input)[0] frame_index = tf.cast( tf.multiply( tf.random_uniform([batch_size, num_samples]), tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32) batch_index = tf.tile( tf.expand_dims(tf.range(batch_size), 1), [1, num_samples]) index = tf.stack([batch_index, frame_index], 2) return tf.gather_nd(model_input, index) ## A function to sample evenly spaced frames
def _process_entities(self, entities): for entity in entities: if entity['is_well_known'] == 1: continue self.entities.add(entity['type']) for j in range(MAX_ARG_VALUES): token = 'GENERIC_ENTITY_' + entity['type'] + "_" + str(j) self._token_canonicals[token] = ' '.join(tokenize(entity['name'])).strip()
def transition(self, curr_state, next_symbols, batch_size): with tf.name_scope('grammar_transition'): transitions = tf.gather(tf.constant(self.transition_matrix), curr_state) assert transitions.get_shape()[1:] == (self.output_size,) indices = tf.stack((tf.range(0, batch_size), next_symbols), axis=1) next_state = tf.gather_nd(transitions, indices) return next_state
def initialize(self): """Initialize the decoder. Args: name: Name scope for any created operations. Returns: `(finished, start_inputs, initial_state)`. """ start_inputs = self._embedding_fn(self._tiled_start_tokens) print('start_inputs', start_inputs) finished = tf.zeros((self.batch_size, self._beam_width), dtype=tf.bool) self._initial_num_available_beams = tf.ones((self._batch_size,), dtype=tf.int32) self._full_num_available_beams = tf.fill((self._batch_size,), self._beam_width) with tf.name_scope('first_beam_mask'): self._first_beam_mask = self._make_beam_mask(self._initial_num_available_beams) with tf.name_scope('full_beam_mask'): self._full_beam_mask = self._make_beam_mask(self._full_num_available_beams) with tf.name_scope('minus_inifinity_scores'): self._minus_inifinity_scores = tf.fill((self.batch_size, self._beam_width, self._output_size), -1e+8) self._batch_size_range = tf.range(self.batch_size) initial_state = BeamSearchOptimizationDecoderState( cell_state=self._tiled_initial_cell_state, previous_logits=tf.zeros([self.batch_size, self._beam_width, self._output_size], dtype=tf.float32), previous_score=tf.zeros([self.batch_size, self._beam_width], dtype=tf.float32), # During the first time step we only consider the initial beam num_available_beams=self._initial_num_available_beams, gold_beam_id=tf.zeros([self.batch_size], dtype=tf.int32), finished=finished) return (finished, start_inputs, initial_state)
def _maybe_tensor_gather_helper(gather_indices, gather_from, batch_size, range_size, gather_shape): """Maybe applies _tensor_gather_helper. This applies _tensor_gather_helper when the gather_from dims is at least as big as the length of gather_shape. This is used in conjunction with nest so that we don't apply _tensor_gather_helper to inapplicable values like scalars. Args: gather_indices: The tensor indices that we use to gather. gather_from: The tensor that we are gathering from. batch_size: The batch size. range_size: The number of values in each range. Likely equal to beam_width. gather_shape: What we should reshape gather_from to in order to preserve the correct values. An example is when gather_from is the attention from an AttentionWrapperState with shape [batch_size, beam_width, attention_size]. There, we want to preserve the attention_size elements, so gather_shape is [batch_size * beam_width, -1]. Then, upon reshape, we still have the attention_size as desired. Returns: output: Gathered tensor of shape tf.shape(gather_from)[:1+len(gather_shape)] or the original tensor if its dimensions are too small. """ if gather_from.shape.ndims >= len(gather_shape): return _tensor_gather_helper( gather_indices=gather_indices, gather_from=gather_from, batch_size=batch_size, range_size=range_size, gather_shape=gather_shape) else: return gather_from
def sparse_tuple_to_texts(tuple): indices = tuple[0] values = tuple[1] results = [''] * tuple[2][0] for i in range(len(indices)): index = indices[i][0] c = values[i] c = ' ' if c == SPACE_INDEX else chr(c + FIRST_INDEX) results[index] = results[index] + c # List of strings return results
def ndarray_to_text(value): results = '' for i in range(len(value)): results += chr(value[i] + FIRST_INDEX) return results.replace('`', ' ')
def ctc_label_dense_to_sparse(labels, label_lengths, batch_size): # The second dimension of labels must be equal to the longest label length in the batch correct_shape_assert = tf.assert_equal(tf.shape(labels)[1], tf.reduce_max(label_lengths)) with tf.control_dependencies([correct_shape_assert]): labels = tf.identity(labels) label_shape = tf.shape(labels) num_batches_tns = tf.stack([label_shape[0]]) max_num_labels_tns = tf.stack([label_shape[1]]) def range_less_than(previous_state, current_input): return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool) init = tf.expand_dims(init, 0) dense_mask = tf.scan(range_less_than, label_lengths, initializer=init, parallel_iterations=1) dense_mask = dense_mask[:, 0, :] label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape) label_ind = tf.boolean_mask(label_array, dense_mask) batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), tf.reverse(label_shape, [0]))) batch_ind = tf.boolean_mask(batch_array, dense_mask) indices = tf.transpose(tf.reshape(tf.concat([batch_ind, label_ind], 0), [2, -1])) shape = [batch_size, tf.reduce_max(label_lengths)] vals_sparse = gather_nd(labels, indices, shape) return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape)) # Validate and normalize transcriptions. Returns a cleaned version of the label # or None if it's invalid.
def sparse_tuple_to_texts(tuple): indices = tuple[0] values = tuple[1] results = [''] * tuple[2][0] for i in range(len(indices)): index = indices[i][0] c = values[i] # c = ' ' if c == SPACE_INDEX else chr(c + FIRST_INDEX) c = ' ' if c == SPACE_INDEX else (chr(c + FIRST_INDEX) if c <= 26 else str(c-27)) results[index] = results[index] + c # List of strings return results
def ndarray_to_text(value): results = '' for i in range(len(value)): results += (chr(value[i] + FIRST_INDEX) if value[i] <= 26 else str(value[i]-27)) # results += chr(value[i] + FIRST_INDEX) return results.replace('`', ' ')
def gather_nd(params, indices, shape): rank = len(shape) flat_params = tf.reshape(params, [-1]) multipliers = [reduce(lambda x, y: x*y, shape[i+1:], 1) for i in range(0, rank)] indices_unpacked = tf.unstack(tf.transpose(indices, [rank - 1] + list(range(0, rank - 1)))) flat_indices = sum([a*b for a,b in zip(multipliers, indices_unpacked)]) return tf.gather(flat_params, flat_indices) # ctc_label_dense_to_sparse is taken from https://github.com/tensorflow/tensorflow/issues/1742#issuecomment-205291527 # # The CTC implementation in TensorFlow needs labels in a sparse representation, # but sparse data and queues don't mix well, so we store padded tensors in the # queue and convert to a sparse representation after dequeuing a batch. #
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): """Creates a matrix regression model. Args: model_input: 'batch' x 'num_features' x 'num_methods' matrix of input features. vocab_size: The number of classes in the dataset. Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are batch_size x num_classes.""" lstm_size = FLAGS.lstm_cells number_of_layers = FLAGS.lstm_layers hidden_outputs = model_input state_outputs = [] for i in range(number_of_layers): state_output, hidden_outputs = self.rnn_gate(hidden_outputs, lstm_size, num_frames, sub_scope="lstm_lsyer%d" % i) state_outputs.append(state_output) state_outputs = tf.concat(state_outputs,axis=1) aggregated_model = getattr(video_level_models, FLAGS.video_level_classifier_model) return aggregated_model().create_model( model_input=state_outputs, vocab_size=vocab_size, **unused_params)
def create_model(self, model_input, vocab_size, num_frames, **unused_params): """Creates a model which uses a stack of LSTMs to represent the video. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ lstm_size = FLAGS.lstm_cells number_of_layers = FLAGS.lstm_layers ## Batch normalize the input stacked_lstm = tf.contrib.rnn.MultiRNNCell( [ tf.contrib.rnn.BasicLSTMCell( lstm_size, forget_bias=1.0, state_is_tuple=True) for _ in range(number_of_layers) ], state_is_tuple=True) with tf.variable_scope("RNN"): outputs, state = tf.nn.dynamic_rnn(stacked_lstm, model_input, sequence_length=num_frames, swap_memory=True, dtype=tf.float32) state_c = tf.concat(map(lambda x: x.c, state), axis=1) aggregated_model = getattr(video_level_models, FLAGS.video_level_classifier_model) return aggregated_model().create_model( model_input=state_c, vocab_size=vocab_size, **unused_params)
def cnn(self, model_input, l2_penalty=1e-8, num_filters = [1024, 1024, 1024], filter_sizes = [1,2,3], sub_scope="", **unused_params): max_frames = model_input.get_shape().as_list()[1] num_features = model_input.get_shape().as_list()[2] shift_inputs = [] for i in range(max(filter_sizes)): if i == 0: shift_inputs.append(model_input) else: shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:]) cnn_outputs = [] for nf, fs in zip(num_filters, filter_sizes): sub_input = tf.concat(shift_inputs[:fs], axis=2) sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs, shape=[num_features*fs, nf], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter)) cnn_output = tf.concat(cnn_outputs, axis=2) cnn_output = slim.batch_norm( cnn_output, center=True, scale=True, is_training=FLAGS.train, scope=sub_scope+"cluster_bn") return cnn_output, max_frames
def rnn(self, model_input, lstm_size, num_frames,sub_scope="", **unused_params): """Creates a model which uses a stack of LSTMs to represent the video. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ ## Batch normalize the input stacked_lstm = tf.contrib.rnn.MultiRNNCell( [ tf.contrib.rnn.BasicLSTMCell( lstm_size, forget_bias=1.0, state_is_tuple=True) for _ in range(1) ], state_is_tuple=True) with tf.variable_scope("RNN-"+sub_scope): outputs, state = tf.nn.dynamic_rnn(stacked_lstm, model_input, sequence_length=num_frames, swap_memory=True, dtype=tf.float32) state_out = tf.concat(map(lambda x: x.c, state), axis=1) return state_out
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = num_extend lstm_size = FLAGS.lstm_cells pool_size=2 cnn_input = model_input num_filters=[256,256,512] filter_sizes=[1,2,3] features_size = sum(num_filters) final_probilities = [] moe_inputs = [] for layer in range(num_layers): cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) cnn_output = tf.nn.relu(cnn_output) cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1)) moe_inputs.append(cnn_multiscale) final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1)) final_probilities.append(final_probility) num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) num_frames = tf.maximum(num_frames//pool_size,1) final_probilities = tf.stack(final_probilities,axis=1) moe_inputs = tf.stack(moe_inputs,axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, features_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size]) result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1) return result
def create_model(self, model_input, vocab_size, num_frames, distill_labels=None, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = num_extend lstm_size = FLAGS.lstm_cells pool_size = 2 cnn_input = model_input cnn_size = FLAGS.cnn_cells num_filters = [cnn_size, cnn_size, cnn_size*2] filter_sizes = [1, 2, 3] features_size = sum(num_filters) final_probilities = [] moe_inputs = [] for layer in range(num_layers): cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) cnn_output = tf.nn.relu(cnn_output) cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1)) moe_inputs.append(cnn_multiscale) final_probility = self.sub_moe(cnn_multiscale,vocab_size,distill_labels=distill_labels, scopename="moe%d"%(layer+1)) final_probilities.append(final_probility) num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) num_frames = tf.maximum(num_frames//pool_size,1) final_probilities = tf.stack(final_probilities,axis=1) moe_inputs = tf.stack(moe_inputs,axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, lstm_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size]) result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1) return result
def cnn(self, model_input, l2_penalty=1e-8, num_filters=[1024,1024,1024], filter_sizes=[1,2,3], sub_scope="", **unused_params): max_frames = model_input.get_shape().as_list()[1] num_features = model_input.get_shape().as_list()[2] shift_inputs = [] for i in range(max(filter_sizes)): if i == 0: shift_inputs.append(model_input) else: shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:]) cnn_outputs = [] for nf, fs in zip(num_filters, filter_sizes): sub_input = tf.concat(shift_inputs[:fs], axis=2) sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs, shape=[num_features*fs, nf], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter)) cnn_output = tf.concat(cnn_outputs, axis=2) cnn_output = slim.batch_norm( cnn_output, center=True, scale=True, is_training=FLAGS.train, scope=sub_scope+"cluster_bn") return cnn_output, max_frames
def rnn_standard(self, model_input, lstm_size, num_frames,sub_scope="", **unused_params): """Creates a model which uses a stack of LSTMs to represent the video. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ ## Batch normalize the input stacked_lstm = tf.contrib.rnn.MultiRNNCell( [ tf.contrib.rnn.BasicLSTMCell( lstm_size, forget_bias=1.0, state_is_tuple=True) for _ in range(1) ], state_is_tuple=True) with tf.variable_scope("RNN-"+sub_scope): outputs, state = tf.nn.dynamic_rnn(stacked_lstm, model_input, sequence_length=num_frames, swap_memory=True, dtype=tf.float32) state_out = tf.concat(map(lambda x: x.c, state), axis=1) return state_out