我们从Python开源项目中,提取了以下48个代码示例,用于说明如何使用tensorflow.parse_single_sequence_example()。
def _read_sequence_example(filename_queue, n_labels=50, n_samples=59049, n_segments=10): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) context, sequence = tf.parse_single_sequence_example( serialized_example, context_features={ 'raw_labels': tf.FixedLenFeature([], dtype=tf.string) }, sequence_features={ 'raw_segments': tf.FixedLenSequenceFeature([], dtype=tf.string) }) segments = tf.decode_raw(sequence['raw_segments'], tf.float32) segments.set_shape([n_segments, n_samples]) labels = tf.decode_raw(context['raw_labels'], tf.uint8) labels.set_shape([n_labels]) labels = tf.cast(labels, tf.float32) return segments, labels
def read_tfrecord(filename_queue): reader = tf.TFRecordReader() _,examples = reader.read(filename_queue) context_features = { "length": tf.FixedLenFeature([], dtype=tf.int64) } sequence_features = { "tokens": tf.FixedLenSequenceFeature([], dtype=tf.int64), "labels": tf.FixedLenSequenceFeature([], dtype=tf.int64) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=examples, context_features=context_features, sequence_features=sequence_features ) return context_parsed, sequence_parsed
def example_parser(self, filename_queue): reader = tf.TFRecordReader() key, record_string = reader.read(filename_queue) features = { 'labels': tf.FixedLenSequenceFeature([], tf.int64), 'tokens': tf.FixedLenSequenceFeature([], tf.int64), 'shapes': tf.FixedLenSequenceFeature([], tf.int64), 'chars': tf.FixedLenSequenceFeature([], tf.int64), 'seq_len': tf.FixedLenSequenceFeature([], tf.int64), 'tok_len': tf.FixedLenSequenceFeature([], tf.int64), } _, example = tf.parse_single_sequence_example(serialized=record_string, sequence_features=features) labels = example['labels'] tokens = example['tokens'] shapes = example['shapes'] chars = example['chars'] seq_len = example['seq_len'] tok_len = example['tok_len'] # context = c['context'] return labels, tokens, shapes, chars, seq_len, tok_len # return labels, tokens, labels, labels, labels
def read_and_decode_single_example(filenames, shuffle=False, num_epochs=None): # first construct a queue containing a list of filenames. # this lets a user split up there dataset in multiple files to keep size down # filename_queue = tf.train.string_input_producer([filename], num_epochs=10) filename_queue = tf.train.string_input_producer(filenames, shuffle=shuffle, num_epochs=num_epochs) reader = tf.TFRecordReader() # One can read a single serialized example from a filename # serialized_example is a Tensor of type string. _, serialized_ex = reader.read(filename_queue) context, sequences = tf.parse_single_sequence_example(serialized_ex, context_features={ "seq_length": tf.FixedLenFeature([], dtype=tf.int64) }, sequence_features={ "seq_feature": tf.VarLenFeature(dtype=tf.int64), "label": tf.VarLenFeature(dtype=tf.int64) }) return context, sequences
def ReadInput(self,num_epochs=None, val=False,test=False): if val: filenames = tf.gfile.Glob(self.data_dir+'/surfing_val.tfrecords') elif test: filenames = tf.gfile.Glob(self.data_dir+'/surfing_test.tfrecords') else: filenames = tf.gfile.Glob(self.data_dir+'/surfing.tfrecords') filename_queue = tf.train.string_input_producer(filenames,num_epochs=num_epochs, shuffle=True) reader = tf.TFRecordReader() _, example = reader.read(filename_queue) feature_sepc = { self.features: tf.FixedLenSequenceFeature( shape=[self.image_width * self.image_width * self.c_dim], dtype=tf.float32)} _, features = tf.parse_single_sequence_example( example, sequence_features=feature_sepc) moving_objs = tf.reshape( features[self.features], [self.video_len, self.image_width, self.image_width, self.c_dim]) examples = tf.train.shuffle_batch( [moving_objs], batch_size=self.batch_size, num_threads=self.batch_size, capacity=self.batch_size * 100, min_after_dequeue=self.batch_size * 4) return examples
def prepare_reader(self, filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={ "video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={ "rgb": tf.FixedLenSequenceFeature([], dtype=tf.string), "audio": tf.FixedLenSequenceFeature([], dtype=tf.string), }) # read ground truth labels labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1, validate_indices=False), tf.bool)) rgbs, num_frames = self.get_video_matrix(features["rgb"], 1024, self.max_frames) audios, num_frames = self.get_video_matrix(features["audio"], 1024, self.max_frames) batch_video_ids = tf.expand_dims(contexts["video_id"], 0) batch_rgbs = tf.expand_dims(rgbs, 0) batch_audios = tf.expand_dims(audios, 0) batch_labels = tf.expand_dims(labels, 0) batch_frames = tf.expand_dims(num_frames, 0) return batch_video_ids, batch_rgbs, batch_audios, batch_labels, batch_frames
def frame_example_2_np(seq_example_bytes, max_quantized_value=2, min_quantized_value=-2): feature_names=['rgb','audio'] feature_sizes = [1024, 128] with tf.Graph().as_default(): contexts, features = tf.parse_single_sequence_example( seq_example_bytes, context_features={"video_id": tf.FixedLenFeature( [], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={ feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in feature_names }) decoded_features = { name: tf.reshape( tf.cast(tf.decode_raw(features[name], tf.uint8), tf.float32), [-1, size]) for name, size in zip(feature_names, feature_sizes) } feature_matrices = { name: utils.Dequantize(decoded_features[name], max_quantized_value, min_quantized_value) for name in feature_names} with tf.Session() as sess: vid = sess.run(contexts['video_id']) labs = sess.run(contexts['labels'].values) rgb = sess.run(feature_matrices['rgb']) audio = sess.run(feature_matrices['audio']) return vid, labs, rgb, audio #%% Split frame level file into three video level files: all, 1st half, 2nd half.
def build_graph(): feature_names=['rgb','audio'] feature_sizes = [1024, 128] max_quantized_value=2 min_quantized_value=-2 seq_example_bytes = tf.placeholder(tf.string) contexts, features = tf.parse_single_sequence_example( seq_example_bytes, context_features={"video_id": tf.FixedLenFeature( [], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={ feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in feature_names }) decoded_features = { name: tf.reshape( tf.cast(tf.decode_raw(features[name], tf.uint8), tf.float32), [-1, size]) for name, size in zip(feature_names, feature_sizes) } feature_matrices = { name: utils.Dequantize(decoded_features[name], max_quantized_value, min_quantized_value) for name in feature_names} tf.add_to_collection("vid_tsr", contexts['video_id']) tf.add_to_collection("labs_tsr", contexts['labels'].values) tf.add_to_collection("rgb_tsr", feature_matrices['rgb']) tf.add_to_collection("audio_tsr", feature_matrices['audio']) tf.add_to_collection("seq_example_bytes", seq_example_bytes) # with tf.Session() as sess: # writer = tf.summary.FileWriter('./graphs', sess.graph)
def parse_record_fn(self): def _parse(proto): spec = self.record_spec() ctx, seq = tf.parse_single_sequence_example(proto, **spec) ctx.update(seq) result = [] for field in self.FIELDS: result.append(ctx[field]) return result return _parse
def parse_sequence_example(serialized, image_feature, caption_feature): """Parses a tensorflow.SequenceExample into an image and caption. Args: serialized: A scalar string Tensor; a single serialized SequenceExample. image_feature: Name of SequenceExample context feature containing image data. caption_feature: Name of SequenceExample feature list containing integer captions. Returns: encoded_image: A scalar string Tensor containing a JPEG encoded image. caption: A 1-D uint64 Tensor with dynamically specified length. """ context, sequence = tf.parse_single_sequence_example( serialized, context_features={ image_feature: tf.FixedLenFeature([], dtype=tf.string) }, sequence_features={ caption_feature: tf.FixedLenSequenceFeature([], dtype=tf.int64), }) encoded_image = context[image_feature] caption = sequence[caption_feature] return encoded_image, caption
def make_example(seq_len, spec_feat, labels): ''' Creates a SequenceExample for a single utterance. This function makes a SequenceExample given the sequence length, mfcc features and corresponding transcript. These sequence examples are read using tf.parse_single_sequence_example during training. Note: Some of the tf modules used in this function(such as tf.train.Feature) do not have comprehensive documentation in v0.12. This function was put together using the test routines in the tensorflow repo. See: https://github.com/tensorflow/tensorflow/ blob/246a3724f5406b357aefcad561407720f5ccb5dc/ tensorflow/python/kernel_tests/parsing_ops_test.py Args: seq_len: integer represents the sequence length in time frames. spec_feat: [TxF] matrix of mfcc features. labels: list of ints representing the encoded transcript. Returns: Serialized sequence example. ''' # Feature lists for the sequential features of the example feats_list = [tf.train.Feature(float_list=tf.train.FloatList(value=frame)) for frame in spec_feat] feat_dict = {"feats": tf.train.FeatureList(feature=feats_list)} sequence_feats = tf.train.FeatureLists(feature_list=feat_dict) # Context features for the entire sequence len_feat = tf.train.Feature(int64_list=tf.train.Int64List(value=[seq_len])) label_feat = tf.train.Feature(int64_list=tf.train.Int64List(value=labels)) context_feats = tf.train.Features(feature={"seq_len": len_feat, "labels": label_feat}) ex = tf.train.SequenceExample(context=context_feats, feature_lists=sequence_feats) return ex.SerializeToString()
def prepare_reader(self, filename_queue, max_quantized_value=2, min_quantized_value=-2): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) context_features, sequence_features = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)}, None if self.sequence_data: sequence_features = {self.feature_name[0]: tf.FixedLenSequenceFeature([], dtype=tf.string), self.feature_name[1]: tf.FixedLenSequenceFeature([], dtype=tf.string), } else: context_features[self.feature_name[0]] = tf.FixedLenFeature(self.feature_size[0], tf.float32) context_features[self.feature_name[1]] = tf.FixedLenFeature(self.feature_size[1], tf.float32) contexts, features = tf.parse_single_sequence_example(serialized_example, context_features=context_features, sequence_features=sequence_features) labels = (tf.cast(contexts["labels"].values, tf.int64)) if self.sequence_data: decoded_features = tf.reshape(tf.cast(tf.decode_raw(features[self.feature_name[0]], tf.uint8), tf.float32), [-1, self.feature_size[0]]) video_matrix = Dequantize(decoded_features, max_quantized_value, min_quantized_value) decoded_features = tf.reshape(tf.cast(tf.decode_raw(features[self.feature_name[1]], tf.uint8), tf.float32), [-1, self.feature_size[1]]) audio_matrix = Dequantize(decoded_features, max_quantized_value, min_quantized_value) num_frames = tf.minimum(tf.shape(decoded_features)[0], self.max_frames) else: video_matrix = contexts[self.feature_name[0]] audio_matrix = contexts[self.feature_name[1]] num_frames = tf.constant(-1) # Pad or truncate to 'max_frames' frames. # video_matrix = resize_axis(video_matrix, 0, self.max_frames) return contexts["video_id"], video_matrix, audio_matrix, labels, num_frames
def parse_example_queue(example_queue, config): """ Read one example. This function read one example and return context sequence and tag sequence correspondingly. Args: filename_queue: A filename queue returned by string_input_producer context_feature_name: Context feature name in TFRecord. Set in ModelConfig tag_feature_name: Tag feature name in TFRecord. Set in ModelConfig Returns: input_seq: An int32 Tensor with different length. tag_seq: An int32 Tensor with different length. """ #Parse one example context, features = tf.parse_single_sequence_example( example_queue, context_features={ config.length_name: tf.FixedLenFeature([], dtype=tf.int64) }, sequence_features={ config.context_feature_name: tf.FixedLenSequenceFeature([], dtype=tf.int64), config.tag_feature_name: tf.FixedLenSequenceFeature([], dtype=tf.int64) }) return (features[config.context_feature_name], features[config.tag_feature_name], context[config.length_name])
def decode(self, serialized_example, items=None): """Decodes the given serialized TF-example. Args: serialized_example: a serialized TF-example tensor. items: the list of items to decode. These must be a subset of the item keys in self._items_to_handlers. If `items` is left as None, then all of the items in self._items_to_handlers are decoded. Returns: the decoded items, a list of tensor. """ context, sequence = tf.parse_single_sequence_example( serialized_example, self._context_keys_to_features, self._sequence_keys_to_features) # Merge context and sequence features example = {} example.update(context) example.update(sequence) all_features = {} all_features.update(self._context_keys_to_features) all_features.update(self._sequence_keys_to_features) # Reshape non-sparse elements just once: for k, value in all_features.items(): if isinstance(value, tf.FixedLenFeature): example[k] = tf.reshape(example[k], value.shape) if not items: items = self._items_to_handlers.keys() outputs = [] for item in items: handler = self._items_to_handlers[item] keys_to_tensors = {key: example[key] for key in handler.keys} outputs.append(handler.tensors_to_item(keys_to_tensors)) return outputs
def parse_sequence_example(serialized_example): context, sequence = tf.parse_single_sequence_example( serialized=serialized_example, context_features={ "length": tf.FixedLenFeature([], dtype=tf.int64) }, sequence_features={ "source": tf.FixedLenSequenceFeature([], dtype=tf.int64), "target": tf.FixedLenSequenceFeature([], dtype=tf.int64) } ) return (context['length'], sequence['source'], sequence['target'])
def decode(self, data, items=None): """Decodes the given serialized TF-example. Args: data: a serialized TF-example tensor. items: the list of items to decode. These must be a subset of the item keys in self._items_to_handlers. If `items` is left as None, then all of the items in self._items_to_handlers are decoded. Returns: the decoded items, a list of tensor. """ context, sequence = tf.parse_single_sequence_example( data, self._context_keys_to_features, self._sequence_keys_to_features) # Merge context and sequence features example = {} example.update(context) example.update(sequence) all_features = {} all_features.update(self._context_keys_to_features) all_features.update(self._sequence_keys_to_features) # Reshape non-sparse elements just once: for k, value in all_features.items(): if isinstance(value, tf.FixedLenFeature): example[k] = tf.reshape(example[k], value.shape) if not items: items = self._items_to_handlers.keys() outputs = [] for item in items: handler = self._items_to_handlers[item] keys_to_tensors = {key: example[key] for key in handler.keys} outputs.append(handler.tensors_to_item(keys_to_tensors)) return outputs
def single_feature_file_reader(filename_queue, num_features): """ Read and interpret data from a set of TFRecord files. Args: filename_queue: a queue of filenames to read through. num_features: the depth of the features. Returns: A pair of tuples: 1. a context dictionary for the feature 2. the vessel movement features, tensor of dimension [width, num_features]. """ reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) # The serialized example is converted back to actual values. context_features, sequence_features = tf.parse_single_sequence_example( serialized_example, # Defaults are not specified since both keys are required. context_features={'mmsi': tf.FixedLenFeature([], tf.int64), }, sequence_features={ 'movement_features': tf.FixedLenSequenceFeature( shape=(num_features, ), dtype=tf.float32) }) return context_features, sequence_features
def prepare_serialized_examples(self, serialized_example, max_quantized_value=2, min_quantized_value=-2): contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={"video_id": tf.FixedLenFeature( [], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={ feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in self.feature_names }) # read ground truth labels labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1, validate_indices=False), tf.bool)) # loads (potentially) different types of features and concatenates them num_features = len(self.feature_names) assert num_features > 0, "No feature selected: feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) num_frames = -1 # the number of frames in the video feature_matrices = [None] * num_features # an array of different features for feature_index in range(num_features): feature_matrix, num_frames_in_this_feature = self.get_video_matrix( features[self.feature_names[feature_index]], self.feature_sizes[feature_index], self.max_frames, max_quantized_value, min_quantized_value) if num_frames == -1: num_frames = num_frames_in_this_feature else: tf.assert_equal(num_frames, num_frames_in_this_feature) feature_matrices[feature_index] = feature_matrix # cap the number of frames at self.max_frames num_frames = tf.minimum(num_frames, self.max_frames) # concatenate different features video_matrix = tf.concat(feature_matrices, 1) # convert to batch format. # TODO: Do proper batch reads to remove the IO bottleneck. batch_video_ids = tf.expand_dims(contexts["video_id"], 0) batch_video_matrix = tf.expand_dims(video_matrix, 0) batch_labels = tf.expand_dims(labels, 0) batch_frames = tf.expand_dims(num_frames, 0) return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def read_my_file_format_dis(filename_queue, is_training): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) context_features = { "height": tf.FixedLenFeature([], dtype=tf.int64), "width": tf.FixedLenFeature([], dtype=tf.int64), "sequence_length": tf.FixedLenFeature([], dtype=tf.int64), "text": tf.FixedLenFeature([], dtype=tf.string), "label": tf.FixedLenFeature([], dtype=tf.int64) } sequence_features = { "frames": tf.FixedLenSequenceFeature([], dtype=tf.string), "masks": tf.FixedLenSequenceFeature([], dtype=tf.string) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=serialized_example, context_features=context_features, sequence_features=sequence_features ) height = 128#context_parsed['height'].eval() width = 128#context_parsed['width'].eval() sequence_length = 32#context_parsed['sequence_length'].eval() clip = decode_frames(sequence_parsed['frames'], height, width, sequence_length) # generate one hot vector label = context_parsed['label'] label = tf.one_hot(label-1, FLAGS.num_class) text = context_parsed['text'] # randomly sample clips of 16 frames if is_training: idx = tf.squeeze(tf.random_uniform([1], 0, sequence_length-FLAGS.seq_length+1, dtype=tf.int32)) else: idx = 8 clip = clip[idx:idx+FLAGS.seq_length] / 255.0 * 2 - 1 if is_training: # randomly reverse data reverse = tf.squeeze(tf.random_uniform([1], 0, 2, dtype=tf.int32)) clip = tf.cond(tf.equal(reverse,0), lambda: clip, lambda: clip[::-1]) # randomly horizontally flip data flip = tf.squeeze(tf.random_uniform([1], 0, 2, dtype=tf.int32)) clip = tf.cond(tf.equal(flip,0), lambda: clip, lambda: \ tf.map_fn(lambda img: tf.image.flip_left_right(img), clip)) clip.set_shape([FLAGS.seq_length, height, width, 3]) return clip, label, text
def read_record(self, record): """Parse record TFRecord into a set a set of values, names and types that can be queued and then read. Returns: - queue_values: Dict with tensor values. - queue_names: Names for each tensor. - queue_types: Types for each tensor. """ # We parse variable length features (bboxes in a image) as sequence # features context_example, sequence_example = tf.parse_single_sequence_example( record, context_features=self.CONTEXT_FEATURES, sequence_features=self.SEQUENCE_FEATURES ) # Decode image image_raw = tf.image.decode_image( context_example['image_raw'], channels=3 ) image = tf.cast(image_raw, tf.float32) height = tf.cast(context_example['height'], tf.int32) width = tf.cast(context_example['width'], tf.int32) image_shape = tf.stack([height, width, 3]) image = tf.reshape(image, image_shape) label = self._sparse_to_tensor(sequence_example['label']) xmin = self._sparse_to_tensor(sequence_example['xmin']) xmax = self._sparse_to_tensor(sequence_example['xmax']) ymin = self._sparse_to_tensor(sequence_example['ymin']) ymax = self._sparse_to_tensor(sequence_example['ymax']) # Stack parsed tensors to define bounding boxes of shape (num_boxes, 5) bboxes = tf.stack([xmin, ymin, xmax, ymax, label], axis=1) image, bboxes, preprocessing_details = self.preprocess(image, bboxes) filename = tf.cast(context_example['filename'], tf.string) # TODO: Send additional metadata through the queue (scale_factor, # applied_augmentations) queue_dtypes = [tf.float32, tf.int32, tf.string] queue_names = ['image', 'bboxes', 'filename'] queue_values = { 'image': image, 'bboxes': bboxes, 'filename': filename, } return queue_values, queue_dtypes, queue_names
def prepare_serialized_examples(self, serialized_example, max_quantized_value=2, min_quantized_value=-2): contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={"video_id": tf.FixedLenFeature( [], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={ feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in self.feature_names }) # read ground truth labels labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (4716,), 1, validate_indices=False), tf.bool)) # loads (potentially) different types of features and concatenates them num_features = len(self.feature_names) assert num_features > 0, "No feature selected: feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) num_frames = -1 # the number of frames in the video feature_matrices = [None] * num_features # an array of different features for feature_index in range(num_features): feature_matrix, num_frames_in_this_feature = self.get_video_matrix( features[self.feature_names[feature_index]], self.feature_sizes[feature_index], self.max_frames, max_quantized_value, min_quantized_value) if num_frames == -1: num_frames = num_frames_in_this_feature else: tf.assert_equal(num_frames, num_frames_in_this_feature) feature_matrices[feature_index] = feature_matrix # cap the number of frames at self.max_frames num_frames = tf.minimum(num_frames, self.max_frames) # concatenate different features video_matrix = tf.concat(feature_matrices, 1) # convert to batch format. # TODO: Do proper batch reads to remove the IO bottleneck. batch_video_ids = tf.expand_dims(contexts["video_id"], 0) batch_video_matrix = tf.expand_dims(video_matrix, 0) batch_labels = tf.expand_dims(labels, 0) batch_frames = tf.expand_dims(num_frames, 0) return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def prepare_serialized_examples(self, serialized_example, max_quantized_value=2, min_quantized_value=-2): contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={"video_id": tf.FixedLenFeature( [], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={ feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in self.feature_names }) # read ground truth labels labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (4716,), 1, validate_indices=False), tf.bool)) # loads (potentially) different types of features and concatenates them num_features = len(self.feature_names) assert num_features > 0, "No feature selected: feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) num_frames = -1 # the number of frames in the video feature_matrices = [None] * num_features # an array of different features for feature_index in range(num_features): feature_matrix, num_frames_in_this_feature = self.get_video_matrix( features[self.feature_names[feature_index]], self.feature_sizes[feature_index], self.max_frames, max_quantized_value, min_quantized_value) if num_frames == -1: num_frames = num_frames_in_this_feature else: tf.assert_equal(num_frames, num_frames_in_this_feature) feature_matrices[feature_index] = feature_matrix # cap the number of frames at self.max_frames num_frames = tf.minimum(num_frames, self.max_frames) # concatenate different features video_matrix = tf.concat(feature_matrices, 1) # convert to batch format. # TODO: Do proper batch reads to remove the IO bottleneck. batch_video_ids = tf.expand_dims(contexts["video_id"], 0) batch_video_matrix = tf.expand_dims(video_matrix, 0) batch_labels = tf.expand_dims(labels, 0) batch_frames = tf.expand_dims(num_frames, 0) #return batch_video_ids, batch_video_matrix, batch_labels, batch_frames return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def get_padded_batch(file_list, batch_size, input_size, output_size, num_enqueuing_threads=4, num_epochs=1, shuffle=True): """Reads batches of SequenceExamples from TFRecords and pads them. Can deal with variable length SequenceExamples by padding each batch to the length of the longest sequence with zeros. Args: file_list: A list of paths to TFRecord files containing SequenceExamples. batch_size: The number of SequenceExamples to include in each batch. input_size: The size of each input vector. The returned batch of inputs will have a shape [batch_size, num_steps, input_size]. num_enqueuing_threads: The number of threads to use for enqueuing SequenceExamples. Returns: inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s. labels: A tensor of shape [batch_size, num_steps] of float32s. lengths: A tensor of shape [batch_size] of int32s. The lengths of each SequenceExample before padding. """ file_queue = tf.train.string_input_producer( file_list, num_epochs=num_epochs, shuffle=shuffle) reader = tf.TFRecordReader() _, serialized_example = reader.read(file_queue) sequence_features = { 'inputs': tf.FixedLenSequenceFeature(shape=[input_size], dtype=tf.float32), 'inputs_cmvn': tf.FixedLenSequenceFeature(shape=[input_size], dtype=tf.float32), 'labels1': tf.FixedLenSequenceFeature(shape=[output_size], dtype=tf.float32), 'labels2': tf.FixedLenSequenceFeature(shape=[output_size], dtype=tf.float32)} _, sequence = tf.parse_single_sequence_example( serialized_example, sequence_features=sequence_features) length = tf.shape(sequence['inputs'])[0] capacity = 1000 + (num_enqueuing_threads + 1) * batch_size queue = tf.PaddingFIFOQueue( capacity=capacity, dtypes=[tf.float32, tf.float32,tf.float32,tf.float32, tf.int32], shapes=[(None, input_size),(None,input_size), (None, output_size),(None,output_size), ()]) enqueue_ops = [queue.enqueue([sequence['inputs'], sequence['inputs_cmvn'], sequence['labels1'], sequence['labels2'], length])] * num_enqueuing_threads tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops)) return queue.dequeue_many(batch_size)
def read_and_decode(filename_queue, feature_columns): """ Read and decode one example from a TFRecords file :param feature_columns: list of feature columns :param filename_queue: filename queue containing the TFRecords filenames :return: list of tensors representing one example """ with tf.device('/cpu:0'): # New TFRecord file reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) # Contextual TFRecords features context_features = { "x_length": tf.FixedLenFeature([], dtype=tf.int64), "x_id": tf.FixedLenFeature([], dtype=tf.string) } # Sequential TFRecords features sequence_features = { "x_tokens": tf.FixedLenSequenceFeature([], dtype=tf.int64), "x_chars": tf.FixedLenSequenceFeature([], dtype=tf.int64), "x_chars_len": tf.FixedLenSequenceFeature([], dtype=tf.int64), "y": tf.FixedLenSequenceFeature([], dtype=tf.int64), } for col in feature_columns: sequence_features["x_att_{}".format(col)] = tf.FixedLenSequenceFeature([], dtype=tf.int64) # Parsing contextual and sequential features context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=serialized_example, context_features=context_features, sequence_features=sequence_features ) sequence_length = tf.cast(context_parsed["x_length"], tf.int32) chars = tf.reshape(sequence_parsed["x_chars"], tf.stack([sequence_length, -1])) # Preparing tensor list, casting values to 32 bits when necessary tensor_list = [ context_parsed["x_id"], tf.cast(context_parsed["x_length"], tf.int32), tf.cast(sequence_parsed["x_tokens"], dtype=tf.int32), tf.cast(chars, dtype=tf.int32), tf.cast(sequence_parsed["x_chars_len"], dtype=tf.int32), tf.cast(sequence_parsed["y"], dtype=tf.int32) ] for col in feature_columns: tensor_list.append(tf.cast(sequence_parsed["x_att_{}".format(col)], dtype=tf.int32)) return tensor_list
def read_and_decode_test(filename_queue, feature_columns): """ Read and decode one example from a TFRecords file :param feature_columns: list of feature columns :param filename_queue: filename queue containing the TFRecords filenames :return: list of tensors representing one example """ with tf.device('/cpu:0'): # New TFRecord file reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) # Contextual TFRecords features context_features = { "x_length": tf.FixedLenFeature([], dtype=tf.int64), "x_id": tf.FixedLenFeature([], dtype=tf.string) } # Sequential TFRecords features sequence_features = { "x_tokens": tf.FixedLenSequenceFeature([], dtype=tf.int64), "x_chars": tf.FixedLenSequenceFeature([], dtype=tf.int64), "x_chars_len": tf.FixedLenSequenceFeature([], dtype=tf.int64), } for col in feature_columns: sequence_features["x_att_{}".format(col)] = tf.FixedLenSequenceFeature([], dtype=tf.int64) # Parsing contextual and sequential features context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=serialized_example, context_features=context_features, sequence_features=sequence_features ) sequence_length = tf.cast(context_parsed["x_length"], tf.int32) chars = tf.reshape(sequence_parsed["x_chars"], tf.stack([sequence_length, -1])) # Preparing tensor list, casting values to 32 bits when necessary tensor_list = [ context_parsed["x_id"], tf.cast(context_parsed["x_length"], tf.int32), tf.cast(sequence_parsed["x_tokens"], dtype=tf.int32), tf.cast(chars, dtype=tf.int32), tf.cast(sequence_parsed["x_chars_len"], dtype=tf.int32), ] for col in feature_columns: tensor_list.append(tf.cast(sequence_parsed["x_att_{}".format(col)], dtype=tf.int32)) return tensor_list
def _generate_feats_and_label_batch(filename_queue, batch_size): """Construct a queued batch of spectral features and transcriptions. Args: filename_queue: queue of filenames to read data from. batch_size: Number of utterances per batch. Returns: feats: mfccs. 4D tensor of [batch_size, height, width, 3] size. labels: transcripts. List of length batch_size. seq_lens: Sequence Lengths. List of length batch_size. """ # Define how to parse the example reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) context_features = { "seq_len": tf.FixedLenFeature([], dtype=tf.int64), "labels": tf.VarLenFeature(dtype=tf.int64) } sequence_features = { # mfcc features are 13 dimensional "feats": tf.FixedLenSequenceFeature([13, ], dtype=tf.float32) } # Parse the example (returns a dictionary of tensors) context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=serialized_example, context_features=context_features, sequence_features=sequence_features ) # Generate a batch worth of examples after bucketing seq_len, (feats, labels) = tf.contrib.training.bucket_by_sequence_length( input_length=tf.cast(context_parsed['seq_len'], tf.int32), tensors=[sequence_parsed['feats'], context_parsed['labels']], batch_size=batch_size, bucket_boundaries=list(range(100, 1900, 100)), allow_smaller_final_batch=True, num_threads=16, dynamic_pad=True) return feats, tf.cast(labels, tf.int32), seq_len
def get_padded_batch(file_list, batch_size, input_size, output_size, num_enqueuing_threads=4, num_epochs=1, shuffle=True): """Reads batches of SequenceExamples from TFRecords and pads them. Can deal with variable length SequenceExamples by padding each batch to the length of the longest sequence with zeros. Args: file_list: A list of paths to TFRecord files containing SequenceExamples. batch_size: The number of SequenceExamples to include in each batch. input_size: The size of each input vector. The returned batch of inputs will have a shape [batch_size, num_steps, input_size]. num_enqueuing_threads: The number of threads to use for enqueuing SequenceExamples. Returns: inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s. labels: A tensor of shape [batch_size, num_steps] of float32s. lengths: A tensor of shape [batch_size] of int32s. The lengths of each SequenceExample before padding. """ file_queue = tf.train.string_input_producer( file_list, num_epochs=num_epochs, shuffle=shuffle) reader = tf.TFRecordReader() _, serialized_example = reader.read(file_queue) sequence_features = { 'inputs': tf.FixedLenSequenceFeature(shape=[input_size], dtype=tf.float32), 'labels': tf.FixedLenSequenceFeature(shape=[output_size], dtype=tf.float32), 'genders': tf.FixedLenSequenceFeature(shape=[2], dtype=tf.float32)} _, sequence = tf.parse_single_sequence_example( serialized_example, sequence_features=sequence_features) length = tf.shape(sequence['inputs'])[0] capacity = 1000 + (num_enqueuing_threads + 1) * batch_size queue = tf.PaddingFIFOQueue( capacity=capacity, dtypes=[tf.float32, tf.float32, tf.float32, tf.int32], shapes=[(None, input_size), (None, output_size),(1,2), ()]) enqueue_ops = [queue.enqueue([sequence['inputs'], sequence['labels'], sequence['genders'], length])] * num_enqueuing_threads tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops)) return queue.dequeue_many(batch_size)
def get_padded_batch_v2(file_list, batch_size, input_size, output_size, num_enqueuing_threads=4, num_epochs=1, shuffle=True): """Reads batches of SequenceExamples from TFRecords and pads them. Can deal with variable length SequenceExamples by padding each batch to the length of the longest sequence with zeros. Args: file_list: A list of paths to TFRecord files containing SequenceExamples. batch_size: The number of SequenceExamples to include in each batch. input_size: The size of each input vector. The returned batch of inputs will have a shape [batch_size, num_steps, input_size]. num_enqueuing_threads: The number of threads to use for enqueuing SequenceExamples. Returns: inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s. labels: A tensor of shape [batch_size, num_steps] of float32s. lengths: A tensor of shape [batch_size] of int32s. The lengths of each SequenceExample before padding. """ file_queue = tf.train.string_input_producer( file_list, num_epochs=num_epochs, shuffle=shuffle) reader = tf.TFRecordReader() _, serialized_example = reader.read(file_queue) sequence_features = { 'inputs': tf.FixedLenSequenceFeature(shape=[input_size],dtype=tf.float32), 'inputs_cmvn': tf.FixedLenSequenceFeature(shape=[input_size],dtype=tf.float32), 'labels1': tf.FixedLenSequenceFeature(shape=[output_size],dtype=tf.float32), 'labels2': tf.FixedLenSequenceFeature(shape=[output_size],dtype=tf.float32), } _, sequence = tf.parse_single_sequence_example( serialized_example, sequence_features=sequence_features) length = tf.shape(sequence['inputs'])[0] capacity = 1000 + (num_enqueuing_threads + 1) * batch_size queue = tf.PaddingFIFOQueue( capacity=capacity, dtypes=[tf.float32, tf.float32,tf.float32, tf.float32, tf.int32], shapes=[(None, input_size),(None, input_size),(None, output_size), (None, output_size), ()]) enqueue_ops = [queue.enqueue([sequence['inputs'], sequence['inputs_cmvn'], sequence['labels1'], sequence['labels2'], length])] * num_enqueuing_threads tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops)) return queue.dequeue_many(batch_size)