我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.TFRecordReader()。
def read_example(self, filename_queue): # TFRecoard reader reader = tf.TFRecordReader() key, serialized_example = reader.read(filename_queue) # read data from serialized examples features = tf.parse_single_example( serialized_example, features={ 'label': tf.FixedLenFeature([], tf.int64), 'image_raw': tf.FixedLenFeature([], tf.string) }) label = features['label'] image = features['image_raw'] # decode raw image data as integers if self.image_format == 'jpeg': decoded_image = tf.image.decode_jpeg( image, channels=self.image_channels) else: decoded_image = tf.decode_raw(image, tf.uint8) return decoded_image, label
def prepare_reader(self, filename_queue, batch_size=1024): reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_reader(self, filename_queue, max_quantized_value=2, min_quantized_value=-2): """Creates a single reader thread for YouTube8M SequenceExamples. Args: filename_queue: A tensorflow queue of filename locations. max_quantized_value: the maximum of the quantized value. min_quantized_value: the minimum of the quantized value. Returns: A tuple of video indexes, video features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) return self.prepare_serialized_examples(serialized_example, max_quantized_value, min_quantized_value)
def _tfrecord_to_graph_ops(self, num_epochs): with tf.variable_scope('tfrec_to_graph'): file_queue = tf.train.string_input_producer( self.filenames_list, name=self.name+'_file_queue', num_epochs=num_epochs ) reader = tf.TFRecordReader( options=tf.python_io.TFRecordOptions( compression_type=self.compression ), name=self.name+'_tfrec_reader' ) _, tfrecord = reader.read(file_queue) features, targets = parse_mnist_tfrec( tfrecord, self.name, self.features_shape ) return features, targets
def read_and_decode(filename_queue, batch_size): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) feature = features() feature = tf.parse_single_example( serialized_example, features = feature, ) hr_image = tf.decode_raw(feature['hr_image'], tf.uint8) height = tf.cast(feature['height'], tf.int32) width = tf.cast(feature['width'], tf.int32) print(height) image_shape = tf.stack([128, 128,3 ]) hr_image = tf.reshape(hr_image, image_shape) hr_image = tf.image.random_flip_left_right(hr_image) hr_image = tf.image.random_contrast(hr_image, 0.5, 1.3) hr_images = tf.train.shuffle_batch([hr_image], batch_size = batch_size, capacity = 30, num_threads = 2, min_after_dequeue = 10) return hr_images
def read(self, shuffle=True, num_epochs=None): with tf.name_scope('input'): reader = tf.TFRecordReader() filename_queue = tf.train.string_input_producer([self.filename], num_epochs=num_epochs) _, serialized_input = reader.read(filename_queue) inputs = tf.parse_single_example(serialized_input, features={ 'inputs_seq': tf.FixedLenFeature([self.seq_len * 2 + 3], tf.int64), 'output': tf.FixedLenFeature([1], tf.int64) }) inputs_seq = inputs['inputs_seq'] output = inputs['output'] min_after_dequeue = 100 if shuffle: inputs_seqs, outputs = tf.train.shuffle_batch([inputs_seq, output], batch_size=self.batch_size, num_threads=2, capacity=min_after_dequeue + 3 * self.batch_size, min_after_dequeue=min_after_dequeue) else: inputs_seqs, outputs = tf.train.batch([inputs_seq, output], batch_size=self.batch_size) return inputs_seqs, outputs
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'label_raw': tf.FixedLenFeature([], tf.string), }) image = tf.decode_raw(features['image_raw'], tf.int16) image.set_shape([IMAGE_HEIGHT * IMAGE_WIDTH]) image = tf.cast(image, tf.float32) * (1. / 255) - 0.5 reshape_image = tf.reshape(image, [IMAGE_HEIGHT, IMAGE_WIDTH, 1]) label = tf.decode_raw(features['label_raw'], tf.uint8) label.set_shape([CHARS_NUM * CLASSES_NUM]) reshape_label = tf.reshape(label, [CHARS_NUM, CLASSES_NUM]) return tf.cast(reshape_image, tf.float32), tf.cast(reshape_label, tf.float32)
def read_and_decode_embedding(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features={ 'label': tf.FixedLenFeature( [], tf.int64), 'sequence_raw': tf.FixedLenFeature( [], tf.string), }) sequence = features['sequence_raw'] # preprocess s_decode = tf.decode_raw(sequence, tf.int32) s_decode.set_shape([FLAGS.embed_length]) # Convert label from a scalar uint8 tensor to an int32 scalar. label = tf.cast(features['label'], tf.int32) return s_decode, label
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={ 'song_spec': tf.FixedLenFeature([], tf.string), 'voice_spec': tf.FixedLenFeature([], tf.string), 'mixed_spec': tf.FixedLenFeature([], tf.string) }) song_spec = transform_spec_from_raw(features['song_spec']) voice_spec = transform_spec_from_raw(features['voice_spec']) mixed_spec = transform_spec_from_raw(features['mixed_spec']) input_spec = stack_spectrograms(mixed_spec) # this will be the input target_spec = tf.concat([song_spec, voice_spec], axis=1) # target spec is going to be a concatenation of song_spec and voice_spec return input_spec, target_spec
def __init__(self, tfrecords_file, image_size=256, min_queue_examples=1000, batch_size=1, num_threads=8, name=''): """ Args: tfrecords_file: string, tfrecords file path min_queue_examples: integer, minimum number of samples to retain in the queue that provides of batches of examples batch_size: integer, number of images per batch num_threads: integer, number of preprocess threads """ self.tfrecords_file = tfrecords_file self.image_size = image_size self.min_queue_examples = min_queue_examples self.batch_size = batch_size self.num_threads = num_threads self.reader = tf.TFRecordReader() self.name = name
def prepare_reader(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ opts = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) if self.decode_zlib: reader = tf.TFRecordReader(options=opts) else: reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) tf.add_to_collection("serialized_examples", serialized_examples) return self.prepare_serialized_examples(serialized_examples)
def decode_from_tfrecords(filename,num_epoch=None): filename_queue=tf.train.string_input_producer([filename],num_epochs=num_epoch)#??????????????????????????????????????? reader=tf.TFRecordReader() _,serialized=reader.read(filename_queue) example=tf.parse_single_example(serialized,features={ 'height':tf.FixedLenFeature([],tf.int64), 'width':tf.FixedLenFeature([],tf.int64), 'nchannel':tf.FixedLenFeature([],tf.int64), 'image':tf.FixedLenFeature([],tf.string), 'label':tf.FixedLenFeature([],tf.int64) }) label=tf.cast(example['label'], tf.int32) image=tf.decode_raw(example['image'],tf.uint8) image=tf.reshape(image,tf.pack([ tf.cast(example['height'], tf.int32), tf.cast(example['width'], tf.int32), tf.cast(example['nchannel'], tf.int32)])) return image,label
def read_and_decode(filename, batch_size): # ??????????? filename_queue = tf.train.string_input_producer([filename]) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) # ???????? features = tf.parse_single_example( serialized_example, features={ 'label': tf.FixedLenFeature([], tf.int64), 'img_raw': tf.FixedLenFeature([], tf.string), } ) img = tf.decode_raw(features['img_raw'], tf.uint8) print('xxxx: ', img.get_shape()) img = tf.reshape(img, [512, 144, 3]) img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 label = tf.cast(features['label'], tf.int32) image_batch, label_batch = tf.train.batch([img, label], batch_size=batch_size, num_threads=64, capacity=2000) return image_batch, tf.reshape(label_batch, [batch_size])
def read_and_decode(filename, img_size=128, depth=1): if not filename.endswith('.tfrecords'): print "Invalid file \"{:s}\"".format(filename) return [], [] else: data_queue = tf.train.string_input_producer([filename]) reader = tf.TFRecordReader() _, serialized_example = reader.read(data_queue) features = tf.parse_single_example(serialized_example, features={ 'label' : tf.FixedLenFeature([], tf.int64), 'img_raw' : tf.FixedLenFeature([], tf.string), }) img = tf.decode_raw(features['img_raw'], tf.uint8) img = tf.reshape(img, [img_size, img_size, depth]) # Normalize the image img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 label = tf.cast(features['label'], tf.int32) label_onehot = tf.stack(tf.one_hot(label, n_classes)) return img, label_onehot #read_and_decode('test.tfrecords')
def read_and_decode(filename, img_size=128, depth=1): if not filename.endswith('.tfrecords'): print "Invalid file \"{:s}\"".format(filename) return [], [] else: data_queue = tf.train.string_input_producer([filename]) reader = tf.TFRecordReader() _, serialized_example = reader.read(data_queue) features = tf.parse_single_example(serialized_example, features={ 'label' : tf.FixedLenFeature([], tf.int64), 'img_raw' : tf.FixedLenFeature([], tf.string), }) img = tf.decode_raw(features['img_raw'], tf.uint8) img = tf.reshape(img, [img_size, img_size, depth]) # Normalize the image img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 label = tf.cast(features['label'], tf.int32) label_onehot = tf.stack(tf.one_hot(label, n_classes)) return img, label_onehot
def test_batch_randomized(self): batch_size = 17 queue_capacity = 1234 name = "my_batch" with tf.Graph().as_default() as g, self.test_session(graph=g) as sess: inputs = tf.contrib.learn.io.read_batch_examples( _VALID_FILE_PATTERN, batch_size, reader=tf.TFRecordReader, randomize_input=True, queue_capacity=queue_capacity, name=name) self.assertEqual("%s:1" % name, inputs.name) file_name_queue_name = "%s/file_name_queue" % name file_names_name = "%s/input" % file_name_queue_name example_queue_name = "%s/random_shuffle_queue" % name op_nodes = test_util.assert_ops_in_graph({ file_names_name: "Const", file_name_queue_name: "FIFOQueue", "%s/read/TFRecordReader" % name: "TFRecordReader", example_queue_name: "RandomShuffleQueue", name: "QueueDequeueMany" }, g) self.assertEqual( set(_FILE_NAMES), set(sess.run(["%s:0" % file_names_name])[0])) self.assertEqual( queue_capacity, op_nodes[example_queue_name].attr["capacity"].i)
def read_my_file_format(filename_queue, resize_shape=None): """Sets up part of the pipeline that takes elements from the filename queue and turns it into a tf.Tensor of a batch of images. :param filename_queue: tf.train.string_input_producer object :param resize_shape: 2 element list defining the shape to resize images to. """ reader = tf.TFRecordReader() key, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'image/encoded': tf.FixedLenFeature([], tf.string), 'image/height': tf.FixedLenFeature([], tf.int64), 'image/channels': tf.FixedLenFeature([], tf.int64), 'image/width': tf.FixedLenFeature([], tf.int64)}) example = tf.image.decode_jpeg(features['image/encoded'], 3) processed_example = preprocessing(example, resize_shape) return processed_example
def decode_image_objects(paths): with tf.name_scope(inspect.stack()[0][3]): with tf.name_scope('parse_example'): reader = tf.TFRecordReader() _, serialized = reader.read(tf.train.string_input_producer(paths)) example = tf.parse_single_example(serialized, features={ 'imagepath': tf.FixedLenFeature([], tf.string), 'imageshape': tf.FixedLenFeature([3], tf.int64), 'objects': tf.FixedLenFeature([2], tf.string), }) imagepath = example['imagepath'] objects = example['objects'] with tf.name_scope('decode_objects'): objects_class = tf.decode_raw(objects[0], tf.int64, name='objects_class') objects_coord = tf.decode_raw(objects[1], tf.float32) objects_coord = tf.reshape(objects_coord, [-1, 4], name='objects_coord') with tf.name_scope('load_image'): imagefile = tf.read_file(imagepath) image = tf.image.decode_jpeg(imagefile, channels=3) return image, example['imageshape'], objects_class, objects_coord
def get_dataset(dataset_name, dataset_dir, image_count, class_count, split_name): slim = tf.contrib.slim items_to_descriptions = {'image': 'A color image.', 'label': 'An integer in range(0, class_count)'} file_pattern = os.path.join(dataset_dir, '{}_{}_*.tfrecord'.format(dataset_name, split_name)) reader = tf.TFRecordReader keys_to_features = {'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), 'image/class/label': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64))} items_to_handlers = {'image': slim.tfexample_decoder.Image(), 'label': slim.tfexample_decoder.Tensor('image/class/label')} decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = read_label_file(dataset_dir) return(slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=image_count, items_to_descriptions=items_to_descriptions, num_classes=class_count, labels_to_names=labels_to_names, shuffle=True))
def decode_record(filename_queue, patch_size, channel_num=3): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'label': tf.FixedLenFeature([], tf.int64), 'image': tf.FixedLenFeature([], tf.string), }) img = tf.decode_raw(features['image'], tf.uint8) img = tf.reshape(img, [patch_size, patch_size, channel_num]) img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 label = tf.cast(features['label'], tf.int32) return img, label
def read(filename_queue, feature_num=2, dtypes=[list, int]): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) feature_dict={} for i in range(feature_num): # here, only three data types are allowed: tf.float32, tf.int64, tf.string if dtypes[i] is int: feature_dict['feature'+str(i+1)]=tf.FixedLenFeature([], tf.int64) else: feature_dict['feature'+str(i+1)]=tf.FixedLenFeature([], tf.string) features = tf.parse_single_example( serialized_example, features=feature_dict) return features #====================================================================================== ## test code
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features={ 'image_left': tf.FixedLenFeature([], tf.string), 'image_right': tf.FixedLenFeature([], tf.string), }) image_left = tf.decode_raw(features['image_left'], tf.uint8) image_right = tf.decode_raw(features['image_right'], tf.uint8) width = 960 height = 540 depth = 4 image_left.set_shape([width*height*depth]) image_right.set_shape([width*height*depth]) return image_left, image_right
def read_instances(self, count, shuffle, epochs): """Reads the data represented by this DataSource using a TensorFlow reader. Arguments: epochs: The number of epochs or passes over the data to perform. Returns: A tensor containing instances that are read. """ # None implies unlimited; switch the value to None when epochs is 0. epochs = epochs or None options = None if self._compressed: options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.GZIP) files = tf.train.match_filenames_once(self._path, name='files') queue = tf.train.string_input_producer(files, num_epochs=epochs, shuffle=shuffle, name='queue') reader = tf.TFRecordReader(options=options, name='reader') _, instances = reader.read_up_to(queue, count, name='read') return instances
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'image_raw': tf.FixedLenFeature([], tf.string), }) image = tf.decode_raw(features['image_raw'], tf.uint8) image.set_shape(128 * 128 * 3) image = tf.reshape(image, [128, 128, 3]) image = tf.cast(image, tf.float32) * (2. / 255) - 1. return image
def read_and_decode_with_labels(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'label' : tf.FixedLenFeature([], tf.int64) }) image = tf.decode_raw(features['image_raw'], tf.uint8) image.set_shape(128 * 128 * 3) image = tf.reshape(image, [128, 128, 3]) image = tf.cast(image, tf.float32) * (2. / 255) - 1. label = tf.cast(features['label'], tf.int32) return image, label
def batches(data_file_path, max_number_length, batch_size, size, num_preprocess_threads=1, is_training=True, channels=1): filename_queue = tf.train.string_input_producer([data_file_path]) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'image_png': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([max_number_length], tf.int64), 'length': tf.FixedLenFeature([1], tf.int64), 'bbox': tf.FixedLenFeature([4], tf.int64), }) image, bbox, label, length = features['image_png'], features['bbox'], features['label'], features['length'] bbox = tf.cast(bbox, tf.int32) dequeued_data = [] for i in range(num_preprocess_threads): dequeued_img = tf.image.decode_png(image, channels) dequeued_img = resize_image(dequeued_img, bbox, is_training, size, channels) dequeued_data.append([dequeued_img, tf.one_hot(length - 1, max_number_length)[0], tf.one_hot(label, 11)]) return tf.train.batch_join(dequeued_data, batch_size=batch_size, capacity=batch_size * 3)
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example,features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'label_raw': tf.FixedLenFeature([], tf.string)}) image = tf.cast(tf.decode_raw(features['image_raw'], tf.int16), tf.float32) labels = tf.decode_raw(features['label_raw'], tf.int16) #PW 2017/03/03: Zero-center data here? image.set_shape([IMG_DIM*IMG_DIM*IMG_DIM]) image = tf.reshape(image, [IMG_DIM,IMG_DIM,IMG_DIM,1]) labels.set_shape([IMG_DIM*IMG_DIM*IMG_DIM]) labels = tf.reshape(image, [IMG_DIM,IMG_DIM,IMG_DIM]) # Dimensions (X, Y, Z, channles) return image, labels
def read_examples(input_files, shuffle, num_epochs=None): """Creates readers and queues for reading example protos.""" files = [] for e in input_files: for path in e.split(','): files.extend(file_io.get_matching_files(path)) files = sorted(files) # Convert num_epochs == 0 -> num_epochs is None, if necessary num_epochs = num_epochs or None # Build a queue of the filenames to be read. filename_queue = tf.train.string_input_producer(files, num_epochs, shuffle) options = tf.python_io.TFRecordOptions( compression_type=tf.python_io.TFRecordCompressionType.GZIP) example_id, encoded_example = tf.TFRecordReader(options=options).read( filename_queue) return example_id, encoded_example
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ "label": tf.FixedLenFeature([], tf.float32), "categorical_features": tf.FixedLenFeature([CATEGORICAL_FEATURES_SIZE], tf.string), "continuous_features": tf.FixedLenFeature([CONTINUOUS_FEATURES_SIZE], tf.float32), }) label = features["label"] continuous_features = features["continuous_features"] categorical_features = tf.cast(tf.string_to_hash_bucket(features["categorical_features"], BUCKET_SIZE), tf.float32) return label, tf.concat(0, [continuous_features, categorical_features]) # Read serialized examples from filename queue
def prepare_reader(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_reader(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "predictions": tf.FixedLenFeature([self.num_classes], tf.float32), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]]), features["predictions"]
def prepare_reader(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "predictions": tf.FixedLenFeature([self.num_classes], tf.float32), "labels": tf.VarLenFeature(tf.int64)} features = tf.parse_example(serialized_examples, features=feature_map) return features["predictions"]
def prepare_writer(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_reader(self, filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={ "video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={ "rgb": tf.FixedLenSequenceFeature([], dtype=tf.string), "audio": tf.FixedLenSequenceFeature([], dtype=tf.string), }) # read ground truth labels labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1, validate_indices=False), tf.bool)) rgbs, num_frames = self.get_video_matrix(features["rgb"], 1024, self.max_frames) audios, num_frames = self.get_video_matrix(features["audio"], 1024, self.max_frames) batch_video_ids = tf.expand_dims(contexts["video_id"], 0) batch_rgbs = tf.expand_dims(rgbs, 0) batch_audios = tf.expand_dims(audios, 0) batch_labels = tf.expand_dims(labels, 0) batch_frames = tf.expand_dims(num_frames, 0) return batch_video_ids, batch_rgbs, batch_audios, batch_labels, batch_frames
def reader(self): return tf.TFRecordReader()
def read_and_decode(filename): #??????????? filename_queue = tf.train.string_input_producer([filename]) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) #???????? features = tf.parse_single_example(serialized_example, features={ 'label': tf.FixedLenFeature([], tf.int64), 'img_raw' : tf.FixedLenFeature([], tf.string), }) img = tf.decode_raw(features['img_raw'], tf.uint8) img = tf.reshape(img, [224, 224, 3]) img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 label = tf.cast(features['label'], tf.int32) return img, label
def __init__(self, fnames, shuffle=True, num_epochs=None): """Init from a list of filenames to enqueue. Args: fnames: list of .tfrecords filenames to enqueue. shuffle: if true, shuffle the list at each epoch """ self._fnames = fnames self._fname_queue = tf.train.string_input_producer( self._fnames, capacity=1000, shuffle=shuffle, num_epochs=num_epochs, shared_name='input_files') self._reader = tf.TFRecordReader() # Read first record to initialize the shape parameters with tf.Graph().as_default(): fname_queue = tf.train.string_input_producer(self._fnames) reader = tf.TFRecordReader() _, serialized = reader.read(fname_queue) shapes = self._parse_shape(serialized) dtypes = self._parse_dtype(serialized) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) self.shapes = sess.run(shapes) self.shapes = {k: self.shapes[k+'_sz'].tolist() for k in self.FEATURES} self.dtypes = sess.run(dtypes) self.dtypes = {k: REVERSE_TYPEMAP[self.dtypes[k+'_dtype'][0]] for k in self.FEATURES} coord.request_stop() coord.join(threads)
def _parse(self, filename_queue): with tf.name_scope("parsing"): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={'image':tf.FixedLenFeature([],tf.string), 'label':tf.FixedLenFeature([],tf.int64) } ) label = tf.cast(features['label'],tf.int32) return features, label
def get_input_op(self, fq, parsers): reader = tf.TFRecordReader() _, serialized_data = reader.read_up_to(fq, self.batch_size) return tf.parse_example(serialized_data, parsers)
def create_input_fn(mode, input_files, batch_size, num_epochs): def input_fn(): features = tf.contrib.layers.create_feature_spec_for_parsing( get_feature_columns(mode)) feature_map = tf.contrib.learn.io.read_batch_features( file_pattern=input_files, batch_size=batch_size, features=features, reader=tf.TFRecordReader, randomize_input=True, num_epochs=num_epochs, queue_capacity=200000 + batch_size * 10, name="read_batch_features_{}".format(mode)) # This is an ugly hack because of a current bug in tf.learn # During evaluation TF tries to restore the epoch variable which isn't defined during training # So we define the variable manually here if mode == tf.contrib.learn.ModeKeys.TRAIN: tf.get_variable( "read_batch_features_eval/file_name_queue/limit_epochs/epochs", initializer=tf.constant(0, dtype=tf.int64)) if mode == tf.contrib.learn.ModeKeys.TRAIN: target = feature_map.pop("label") else: # In evaluation we have 10 classes (utterances). # The first one (index 0) is always the correct one target = tf.zeros([batch_size, 1], dtype=tf.int64) return feature_map, target return input_fn
def decode(filename_queue): # Create TFRecords reader reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) # Feature keys in TFRecords example features = tf.parse_single_example(serialized_example, features={ 'id': tf.FixedLenFeature([], tf.string), 'vector': tf.FixedLenFeature([], tf.string), 'label': tf.VarLenFeature(tf.int64) }) video_id = features['id'] # Decode vector and pad to fixed size vector = tf.decode_raw(features['vector'], tf.float32) vector = tf.reshape(vector, [-1, 300]) vector = tf.pad(vector, [[0, 40 - tf.shape(vector)[0]], [0, 0]]) vector.set_shape([40, 300]) # Get label index label = tf.sparse_to_indicator(features['label'], 4716) label.set_shape([4716]) label = tf.cast(label, tf.float32) return video_id, vector, label # Creates input pipeline for tensorflow networks