我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.parse_example()。
def prepare_reader(self, filename_queue, batch_size=1024): reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_serialized_examples(self, serialized_examples): # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def example_serving_input_fn(default_batch_size=None): """Build the serving inputs. Args: default_batch_size (int): Batch size for the tf.placeholder shape """ feature_spec = {} for feat in CONTINUOUS_COLS: feature_spec[feat] = tf.FixedLenFeature(shape=[], dtype=tf.int64) for feat, _ in CATEGORICAL_COLS: feature_spec[feat] = tf.FixedLenFeature(shape=[], dtype=tf.string) example_bytestring = tf.placeholder( shape=[default_batch_size], dtype=tf.string, ) features = tf.parse_example(example_bytestring, feature_spec) return features, {'example': example_bytestring}
def example_serving_input_fn(): """Build the serving inputs.""" example_bytestring = tf.placeholder( shape=[None], dtype=tf.string, ) feature_scalars = tf.parse_example( example_bytestring, tf.feature_column.make_parse_example_spec(INPUT_COLUMNS) ) return tf.estimator.export.ServingInputReceiver( features, {'example_proto': example_bytestring} ) # [START serving-function]
def serving_input_receiver_fn(): """ A function to use for input processing when serving the model. NOTES: 1) This should still work, but I haven't tested it since using I think TensorFlow 1.2.1 """ feature_spec = {'str': tf.FixedLenFeature([1], tf.string)} serialized_tf_example = tf.placeholder(dtype=tf.string, name='input_example_tensor') receiver_tensors = {'example': serialized_tf_example} features = tf.parse_example(serialized_tf_example, feature_spec) # I could probably not do this and handle the data better within the graph features['str'] = tf.reshape(features['str'], [-1]) data = full_onehot_process_line_as_2d_input(features['str']) return tf.estimator.export.ServingInputReceiver(data, receiver_tensors)
def example_serving_input_fn(default_batch_size=None): """Build the serving inputs. Args: default_batch_size (int): Batch size for the tf.placeholder shape """ feature_spec = {} for feat in CONTINUOUS_COLS: feature_spec[feat] = tf.FixedLenFeature(shape=[], dtype=tf.float32) for feat, _ in CATEGORICAL_COLS: feature_spec[feat] = tf.FixedLenFeature(shape=[], dtype=tf.string) example_bytestring = tf.placeholder( shape=[default_batch_size], dtype=tf.string, ) feature_scalars = tf.parse_example(example_bytestring, feature_spec) features = { key: tf.expand_dims(tensor, -1) for key, tensor in feature_scalars.iteritems() } return features, {'example': example_bytestring}
def get_placeholder_input_fn(config, model_type, vocab_sizes, use_crosses): """Wrap the get input features function to provide the metadata.""" def get_input_features(): """Read the input features from the given placeholder.""" columns = feature_columns(config, model_type, vocab_sizes, use_crosses) feature_spec = tf.contrib.layers.create_feature_spec_for_parsing(columns) # Add a dense feature for the keys, use '' if not on the tf.Example proto. feature_spec[KEY_FEATURE_COLUMN] = tf.FixedLenFeature( [1], dtype=tf.string, default_value='') # Add a placeholder for the serialized tf.Example proto input. examples = tf.placeholder(tf.string, shape=(None,)) features = tf.parse_example(examples, feature_spec) # Pass the input tensor so it can be used for export. features[EXAMPLES_PLACEHOLDER_KEY] = examples return features, None # Return a function to input the feaures into the model from a placeholder. return get_input_features
def read_and_decode_batch(filename_queue, batch_size, capacity, min_after_dequeue): """Dequeue a batch of data from the TFRecord. Args: filename_queue: Filename Queue of the TFRecord. batch_size: How many records dequeued each time. capacity: The capacity of the queue. min_after_dequeue: Ensures a minimum amount of shuffling of examples. Returns: List of the dequeued (batch_label, batch_ids, batch_values). """ reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) batch_serialized_example = tf.train.shuffle_batch([serialized_example], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue) # The feature definition here should BE consistent with LibSVM TO TFRecord process. features = tf.parse_example(batch_serialized_example, features={ "label": tf.FixedLenFeature([], tf.float32), "ids": tf.VarLenFeature(tf.int64), "values": tf.VarLenFeature(tf.float32) }) batch_label = features["label"] batch_ids = features["ids"] batch_values = features["values"] return batch_label, batch_ids, batch_values
def prepare_serialized_examples(self, serialized_examples): feature_map = { 'image_raw': tf.FixedLenFeature([784], tf.int64), 'label': tf.FixedLenFeature([], tf.int64), } features = tf.parse_example(serialized_examples, features=feature_map) images = tf.cast(features["image_raw"], tf.float32) * (1. / 255) labels = tf.cast(features['label'], tf.int32) def dense_to_one_hot(label_batch, num_classes): one_hot = tf.map_fn(lambda x : tf.cast(slim.one_hot_encoding(x, num_classes), tf.int32), label_batch) one_hot = tf.reshape(one_hot, [-1, num_classes]) return one_hot labels = dense_to_one_hot(labels, 10) return images, labels
def _predict_input_fn(): """Supplies the input to the model. Returns: A tuple consisting of 1) a dictionary of tensors whose keys are the feature names, and 2) a tensor of target labels which for clustering must be 'None'. """ # Add a placeholder for the serialized tf.Example proto input. examples = tf.placeholder(tf.string, shape=(None,), name="examples") raw_features = tf.parse_example(examples, _get_feature_columns()) dense = _raw_features_to_dense_tensor(raw_features) return input_fn_utils.InputFnOps( features={DENSE_KEY: dense}, labels=None, default_inputs={EXAMPLE_KEY: examples})
def prepare_serialized_examples(self, serialized_examples): # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) ### Newly raw_labels = features["labels"] raw_coarse = tf.SparseTensor(indices = raw_labels.indices, values = tf.reshape(tf.gather(tf.constant(self.label_belongs, dtype = tf.int64), raw_labels.values), [-1]), dense_shape = raw_labels.dense_shape) coarse_labels = tf.sparse_to_indicator(raw_coarse, self.num_coarse_classes, name = 'coarse_transfer') coarse_labels.set_shape([None, self.num_coarse_classes]) ### concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) # return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]]) ### Newly return features["video_id"], concatenated_features, labels, coarse_labels, tf.ones([tf.shape(serialized_examples)[0]]) ###
def _predict_input_fn(): """Supplies the input to the model. Returns: A tuple consisting of 1) a dictionary of tensors whose keys are the feature names, and 2) a tensor of target labels if the mode is not INFER (and None, otherwise). """ feature_spec = tf.contrib.layers.create_feature_spec_for_parsing( feature_columns=_get_feature_columns(include_target_column=False)) feature_spec[FLAGS.id_field] = tf.FixedLenFeature([], dtype=tf.string) feature_spec[FLAGS.target_field + "_string"] = tf.FixedLenFeature( [], dtype=tf.string) # Add a placeholder for the serialized tf.Example proto input. examples = tf.placeholder(tf.string, shape=(None,), name="examples") features = tf.parse_example(examples, feature_spec) features[PREDICTION_KEY] = features[FLAGS.id_field] inputs = {PREDICTION_EXAMPLES: examples} return input_fn_utils.InputFnOps( features=features, labels=None, default_inputs=inputs)
def prepare_reader(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_reader(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "predictions": tf.FixedLenFeature([self.num_classes], tf.float32), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]]), features["predictions"]
def prepare_reader(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "predictions": tf.FixedLenFeature([self.num_classes], tf.float32), "labels": tf.VarLenFeature(tf.int64)} features = tf.parse_example(serialized_examples, features=feature_map) return features["predictions"]
def prepare_writer(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def get_input_op(self, fq, parsers): reader = tf.TFRecordReader() _, serialized_data = reader.read_up_to(fq, self.batch_size) return tf.parse_example(serialized_data, parsers)
def example_serving_input_fn(): """Build the serving inputs.""" example_bytestring = tf.placeholder( shape=[None], dtype=tf.string, ) features = tf.parse_example( example_bytestring, tf.feature_column.make_parse_example_spec(INPUT_COLUMNS) ) return tf.estimator.export.ServingInputReceiver( features, {'example_proto': example_bytestring})
def build_prediction_graph(self): """Builds prediction graph and registers appropriate endpoints.""" examples = tf.placeholder(tf.string, shape=(None,)) features = { 'image': tf.FixedLenFeature( shape=[IMAGE_PIXELS], dtype=tf.float32), 'key': tf.FixedLenFeature( shape=[], dtype=tf.string), } parsed = tf.parse_example(examples, features) images = parsed['image'] keys = parsed['key'] # Build a Graph that computes predictions from the inference model. logits = inference(images, self.hidden1, self.hidden2) softmax = tf.nn.softmax(logits) prediction = tf.argmax(softmax, 1) # Mark the inputs and the outputs # Marking the input tensor with an alias with suffix _bytes. This is to # indicate that this tensor value is raw bytes and will be base64 encoded # over HTTP. # Note that any output tensor marked with an alias with suffix _bytes, shall # be base64 encoded in the HTTP response. To get the binary value, it # should be base64 decoded. tf.add_to_collection('inputs', json.dumps({'examples_bytes': examples.name})) tf.add_to_collection('outputs', json.dumps({ 'key': keys.name, 'prediction': prediction.name, 'scores': softmax.name }))
def parse_examples(examples): feature_map = { 'labels': tf.FixedLenFeature( shape=[], dtype=tf.int64, default_value=[-1]), 'images': tf.FixedLenFeature( shape=[IMAGE_PIXELS], dtype=tf.float32), } return tf.parse_example(examples, features=feature_map)
def prepare_serialized_examples(self, serialized_examples, width=50, height=50): # set the mapping from the fields to data types in the proto feature_map = { 'image': tf.FixedLenFeature((), tf.string, default_value=''), 'label': tf.FixedLenFeature([], tf.int64) } features = tf.parse_example(serialized_examples, features=feature_map) def decode_and_resize(image_str_tensor): """Decodes png string, resizes it and returns a uint8 tensor.""" # Output a grayscale (channels=1) image image = tf.image.decode_png(image_str_tensor, channels=1) # Note resize expects a batch_size, but tf_map supresses that index, # thus we have to expand then squeeze. Resize returns float32 in the # range [0, uint8_max] image = tf.expand_dims(image, 0) image = tf.image.resize_bilinear( image, [height, width], align_corners=False) image = tf.squeeze(image, squeeze_dims=[0]) image = tf.cast(image, dtype=tf.uint8) return image images_str_tensor = features["image"] images = tf.map_fn( decode_and_resize, images_str_tensor, back_prop=False, dtype=tf.uint8) images = tf.image.convert_image_dtype(images, dtype=tf.float32) images = tf.subtract(images, 0.5) images = tf.multiply(images, 2.0) def dense_to_one_hot(label_batch, num_classes): one_hot = tf.map_fn(lambda x : tf.cast(slim.one_hot_encoding(x, num_classes), tf.int32), label_batch) one_hot = tf.reshape(one_hot, [-1, num_classes]) return one_hot labels = tf.cast(features['label'], tf.int32) labels = dense_to_one_hot(labels, 10) return images, labels
def prepare_serialized_examples(self, serialized_examples, width=50, height=50): # set the mapping from the fields to data types in the proto feature_map = { 'image': tf.FixedLenFeature((), tf.string, default_value=''), 'image_id': tf.FixedLenFeature((), tf.string, default_value=''), } features = tf.parse_example(serialized_examples, features=feature_map) def decode_and_resize(image_str_tensor): """Decodes png string, resizes it and returns a uint8 tensor.""" # Output a grayscale (channels=1) image image = tf.image.decode_png(image_str_tensor, channels=1) # Note resize expects a batch_size, but tf_map supresses that index, # thus we have to expand then squeeze. Resize returns float32 in the # range [0, uint8_max] image = tf.expand_dims(image, 0) image = tf.image.resize_bilinear( image, [height, width], align_corners=False) image = tf.squeeze(image, squeeze_dims=[0]) image = tf.cast(image, dtype=tf.uint8) return image images_str_tensor = features["image"] images = tf.map_fn( decode_and_resize, images_str_tensor, back_prop=False, dtype=tf.uint8) images = tf.image.convert_image_dtype(images, dtype=tf.float32) images = tf.subtract(images, 0.5) images = tf.multiply(images, 2.0) image_id = features["image_id"] return image_id, images
def prepare_serialized_examples(self, serialized_examples, width=32, height=32, channels=3): # set the mapping from the fields to data types in the proto feature_map = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/filename': tf.FixedLenFeature((), tf.string, default_value='') } features = tf.parse_example(serialized_examples, features=feature_map) def decode_and_resize(image_str_tensor): """Decodes jpeg string, resizes it and returns a uint8 tensor.""" image = tf.image.decode_jpeg(image_str_tensor, channels=channels) # Note resize expects a batch_size, but tf_map supresses that index, # thus we have to expand then squeeze. Resize returns float32 in the # range [0, uint8_max] image = tf.expand_dims(image, 0) image = tf.image.resize_bilinear( image, [height, width], align_corners=False) image = tf.squeeze(image, squeeze_dims=[0]) image = tf.cast(image, dtype=tf.uint8) return image images_str_tensor = features["image/encoded"] images = tf.map_fn( decode_and_resize, images_str_tensor, back_prop=False, dtype=tf.uint8) images = tf.image.convert_image_dtype(images, dtype=tf.float32) images = tf.subtract(images, 0.5) images = tf.multiply(images, 2.0) image_ids = features['image/filename'] return image_ids, images
def from_feature_spec(feature_spec): """Convert a feature_spec to a Schema. Args: feature_spec: a features specification in the format expected by tf.parse_example(), i.e. `{name: FixedLenFeature(...), name: VarLenFeature(...), ...' Returns: A Schema representing the provided set of columns. """ return Schema({ key: _from_parse_feature(parse_feature) for key, parse_feature in six.iteritems(feature_spec) })
def build_prediction_graph(self, export_dir): """Builds prediction graph and registers appropriate endpoints.""" logging.info('Exporting prediction graph to %s', export_dir) examples = tf.placeholder(tf.string, shape=(None,)) features = { 'image': tf.FixedLenFeature( shape=[IMAGE_PIXELS], dtype=tf.float32), 'key': tf.FixedLenFeature( shape=[], dtype=tf.string), } parsed = tf.parse_example(examples, features) images = parsed['image'] keys = parsed['key'] # Build a Graph that computes predictions from the inference model. logits = inference(images, self.hidden1, self.hidden2) softmax = tf.nn.softmax(logits) prediction = tf.argmax(softmax, 1) # Mark the inputs and the outputs # Marking the input tensor with an alias with suffix _bytes. This is to # indicate that this tensor value is raw bytes and will be base64 encoded # over HTTP. # Note that any output tensor marked with an alias with suffix _bytes, shall # be base64 encoded in the HTTP response. To get the binary value, it # should be base64 decoded. tf.add_to_collection('inputs', json.dumps({'examples_bytes': examples.name})) tf.add_to_collection('outputs', json.dumps({ 'key': keys.name, 'prediction': prediction.name, 'scores': softmax.name }))
def parse_instances(self, instances, prediction=False): """Parses input instances according to the associated schema. Arguments: instances: The tensor containing input strings. prediction: Whether the instances are being parsed for producing predictions or not. Returns: A dictionary of tensors key'ed by field names. """ # Convert the schema into an equivalent Example schema (expressed as features in Example # terminology). features = {} for field in self.schema: if field.type == SchemaFieldType.integer: dtype = tf.int64 default_value = [0] elif field.type == SchemaFieldType.real: dtype = tf.float32 default_value = [0.0] else: # discrete dtype = tf.string default_value = [''] if field.length == 0: feature = tf.VarLenFeature(dtype=dtype) else: if field.length != 1: default_value = default_value * field.length feature = tf.FixedLenFeature(shape=[field.length], dtype=dtype, default_value=default_value) features[field.name] = feature return tf.parse_example(instances, features, name='examples')
def import_images(tfrecord_file_names, max_reads=100, batch_size=50): with tf.variable_scope('import'): training_filename_queue = tf.train.string_input_producer(tfrecord_file_names, num_epochs=None) reader_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.GZIP) reader = tf.TFRecordReader(options=reader_options) keys, values = reader.read_up_to(training_filename_queue, max_reads) features = tf.parse_example( values, features={ 'raw': tf.FixedLenFeature([], tf.string), 'type': tf.FixedLenFeature([], tf.int64) }) types = features['type'] images = tf.decode_raw(features['raw'], tf.uint8) images = tf.reshape(images, shape=(-1, 180, 320, 3)) images = tf.image.convert_image_dtype(images, dtype=tf.float32) image_batch, type_batch = tf.train.shuffle_batch( [images, types], enqueue_many=True, batch_size=batch_size, min_after_dequeue=batch_size, allow_smaller_final_batch=True, capacity=2000, name='shuffle_batch') return image_batch, type_batch
def parse_serialized_examples_batch(serialized_examples_batch, batch_size): feature_to_tensor = { 'image': tf.FixedLenFeature([], tf.string), 'height': tf.FixedLenFeature([1], tf.int64), 'width': tf.FixedLenFeature([1], tf.int64), 'label': tf.VarLenFeature(tf.int64), 'label_length': tf.FixedLenFeature([1], tf.int64) } features = tf.parse_example(serialized_examples_batch, feature_to_tensor) class ocrRecord(object): pass result = ocrRecord() result.heights = tf.cast(features['height'], tf.int32) result.widths = tf.cast(features['width'], tf.int32) result.depth = 1 # shape_1d = result.height * result.width * result.depth shape_1d = IMAGE_HEIGHT * IMAGE_WIDTH * IMAGE_DEPTH def decode_image_string(string): decoded_image = tf.decode_raw(string, tf.uint8) return tf.cast(decoded_image, tf.uint8) imgs_1d = tf.map_fn(decode_image_string, features['image'], dtype=tf.uint8, back_prop=False, parallel_iterations=15) imgs_1d = tf.reshape(imgs_1d, [batch_size, shape_1d]) imgs_1d.set_shape([batch_size, shape_1d]) result.uint8images = tf.reshape(imgs_1d, [batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH]) result.uint8images.set_shape([batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH]) result.label_lengths = tf.cast(features['label_length'], tf.int32) result.label_lengths = tf.reshape(result.label_lengths, [batch_size]) result.label_lengths.set_shape([batch_size]) result.labels = tf.cast(features['label'], tf.int32) # Convert for timestep input result.uint8image = tf.transpose(result.uint8images, [0, 2, 1, 3]) return result
def prepare_serialized_examples(self, serialized_examples, width=32, height=32, channels=3): # set the mapping from the fields to data types in the proto feature_map = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/filename': tf.FixedLenFeature((), tf.string, default_value=''), 'image/class/label': tf.FixedLenFeature([], tf.int64) } features = tf.parse_example(serialized_examples, features=feature_map) def decode_and_resize(image_str_tensor): """Decodes jpeg string, resizes it and returns a uint8 tensor.""" image = tf.image.decode_jpeg(image_str_tensor, channels=channels) # Note resize expects a batch_size, but tf_map supresses that index, # thus we have to expand then squeeze. Resize returns float32 in the # range [0, uint8_max] image = tf.expand_dims(image, 0) image = tf.image.resize_bilinear( image, [height, width], align_corners=False) image = tf.squeeze(image, squeeze_dims=[0]) image = tf.cast(image, dtype=tf.uint8) return image images_str_tensor = features["image/encoded"] images = tf.map_fn( decode_and_resize, images_str_tensor, back_prop=False, dtype=tf.uint8) images = tf.image.convert_image_dtype(images, dtype=tf.float32) images = tf.subtract(images, 0.5) images = tf.multiply(images, 2.0) def dense_to_one_hot(label_batch, num_classes): one_hot = tf.map_fn(lambda x : tf.cast(slim.one_hot_encoding(x, num_classes), tf.int32), label_batch) one_hot = tf.reshape(one_hot, [-1, num_classes]) return one_hot labels = tf.cast(features['image/class/label'], tf.int32) labels = tf.reshape(labels, [-1, 1]) image_ids = features['image/filename'] return image_ids, images, labels