我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.VarLenFeature()。
def prepare_reader(self, filename_queue, batch_size=1024): reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_serialized_examples(self, serialized_examples): # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def _make_schema(columns, types, default_values): """Input schema definition. Args: columns: column names for fields appearing in input. types: column types for fields appearing in input. default_values: default values for fields appearing in input. Returns: feature_set dictionary of string to *Feature. """ result = {} assert len(columns) == len(types) assert len(columns) == len(default_values) for c, t, v in zip(columns, types, default_values): if isinstance(t, list): result[c] = tf.VarLenFeature(dtype=t[0]) else: result[c] = tf.FixedLenFeature(shape=[], dtype=t, default_value=v) return dataset_schema.from_feature_spec(result)
def _deserialize_image_record(cls, record): feature_map = { 'image/encoded': tf.FixedLenFeature([], tf.string, ''), 'image/class/label': tf.FixedLenFeature([1], tf.int64, -1), 'image/class/text': tf.FixedLenFeature([], tf.string, ''), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32) } with tf.name_scope('deserialize_image_record'): obj = tf.parse_single_example(record, feature_map) imgdata = obj['image/encoded'] label = tf.cast(obj['image/class/label'], tf.int32) bbox = tf.stack([obj['image/object/bbox/%s' % x].values for x in ['ymin', 'xmin', 'ymax', 'xmax']]) bbox = tf.transpose(tf.expand_dims(bbox, 0), [0, 2, 1]) text = obj['image/class/text'] return imgdata, label, bbox, text
def testWeightedSparseColumnDtypes(self): ids = tf.contrib.layers.sparse_column_with_keys( "ids", ["marlo", "omar", "stringer"]) weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights") self.assertDictEqual( {"ids": tf.VarLenFeature(tf.string), "weights": tf.VarLenFeature(tf.float32)}, weighted_ids.config) weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights", dtype=tf.int32) self.assertDictEqual( {"ids": tf.VarLenFeature(tf.string), "weights": tf.VarLenFeature(tf.int32)}, weighted_ids.config) with self.assertRaisesRegexp(ValueError, "dtype is not convertible to float"): weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights", dtype=tf.string)
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'height': tf.FixedLenFeature([], tf.int64), 'width': tf.FixedLenFeature([], tf.int64), 'image_raw': tf.FixedLenFeature([], tf.string), 'label': tf.VarLenFeature(tf.int64), }) image = tf.decode_raw(features['image_raw'], tf.uint8) image = tf.reshape(image, [730, 38]) image = tf.cast(image, tf.float32) * (1. / 255) - 0.5 label = tf.cast(features['label'], tf.int32) return image, label
def read_and_decode_batch(filename_queue, batch_size, capacity, min_after_dequeue): """Dequeue a batch of data from the TFRecord. Args: filename_queue: Filename Queue of the TFRecord. batch_size: How many records dequeued each time. capacity: The capacity of the queue. min_after_dequeue: Ensures a minimum amount of shuffling of examples. Returns: List of the dequeued (batch_label, batch_ids, batch_values). """ reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) batch_serialized_example = tf.train.shuffle_batch([serialized_example], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue) # The feature definition here should BE consistent with LibSVM TO TFRecord process. features = tf.parse_example(batch_serialized_example, features={ "label": tf.FixedLenFeature([], tf.float32), "ids": tf.VarLenFeature(tf.int64), "values": tf.VarLenFeature(tf.float32) }) batch_label = features["label"] batch_ids = features["ids"] batch_values = features["values"] return batch_label, batch_ids, batch_values
def read_and_decode_single_example(filenames, shuffle=False, num_epochs=None): # first construct a queue containing a list of filenames. # this lets a user split up there dataset in multiple files to keep size down # filename_queue = tf.train.string_input_producer([filename], num_epochs=10) filename_queue = tf.train.string_input_producer(filenames, shuffle=shuffle, num_epochs=num_epochs) reader = tf.TFRecordReader() # One can read a single serialized example from a filename # serialized_example is a Tensor of type string. _, serialized_ex = reader.read(filename_queue) context, sequences = tf.parse_single_sequence_example(serialized_ex, context_features={ "seq_length": tf.FixedLenFeature([], dtype=tf.int64) }, sequence_features={ "seq_feature": tf.VarLenFeature(dtype=tf.int64), "label": tf.VarLenFeature(dtype=tf.int64) }) return context, sequences
def prepare_serialized_examples(self, serialized_examples): # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) ### Newly raw_labels = features["labels"] raw_coarse = tf.SparseTensor(indices = raw_labels.indices, values = tf.reshape(tf.gather(tf.constant(self.label_belongs, dtype = tf.int64), raw_labels.values), [-1]), dense_shape = raw_labels.dense_shape) coarse_labels = tf.sparse_to_indicator(raw_coarse, self.num_coarse_classes, name = 'coarse_transfer') coarse_labels.set_shape([None, self.num_coarse_classes]) ### concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) # return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]]) ### Newly return features["video_id"], concatenated_features, labels, coarse_labels, tf.ones([tf.shape(serialized_examples)[0]]) ###
def prepare_reader(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_reader(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "predictions": tf.FixedLenFeature([self.num_classes], tf.float32), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]]), features["predictions"]
def prepare_reader(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "predictions": tf.FixedLenFeature([self.num_classes], tf.float32), "labels": tf.VarLenFeature(tf.int64)} features = tf.parse_example(serialized_examples, features=feature_map) return features["predictions"]
def prepare_writer(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_reader(self, filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={ "video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={ "rgb": tf.FixedLenSequenceFeature([], dtype=tf.string), "audio": tf.FixedLenSequenceFeature([], dtype=tf.string), }) # read ground truth labels labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1, validate_indices=False), tf.bool)) rgbs, num_frames = self.get_video_matrix(features["rgb"], 1024, self.max_frames) audios, num_frames = self.get_video_matrix(features["audio"], 1024, self.max_frames) batch_video_ids = tf.expand_dims(contexts["video_id"], 0) batch_rgbs = tf.expand_dims(rgbs, 0) batch_audios = tf.expand_dims(audios, 0) batch_labels = tf.expand_dims(labels, 0) batch_frames = tf.expand_dims(num_frames, 0) return batch_video_ids, batch_rgbs, batch_audios, batch_labels, batch_frames
def __init__(self, context_keys_to_features, sequence_keys_to_features, items_to_handlers): """Constructs the decoder. Args: keys_to_features: a dictionary from TF-Example keys to either tf.VarLenFeature or tf.FixedLenFeature instances. See tensorflow's parsing_ops.py. items_to_handlers: a dictionary from items (strings) to ItemHandler instances. Note that the ItemHandler's are provided the keys that they use to return the final item Tensors. """ self._context_keys_to_features = context_keys_to_features self._sequence_keys_to_features = sequence_keys_to_features self._items_to_handlers = items_to_handlers
def decode(filename_queue): # Create TFRecords reader reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) # Feature keys in TFRecords example features = tf.parse_single_example(serialized_example, features={ 'id': tf.FixedLenFeature([], tf.string), 'vector': tf.FixedLenFeature([], tf.string), 'label': tf.VarLenFeature(tf.int64) }) video_id = features['id'] # Decode vector and pad to fixed size vector = tf.decode_raw(features['vector'], tf.float32) vector = tf.reshape(vector, [-1, 300]) vector = tf.pad(vector, [[0, 40 - tf.shape(vector)[0]], [0, 0]]) vector.set_shape([40, 300]) # Get label index label = tf.sparse_to_indicator(features['label'], 4716) label.set_shape([4716]) label = tf.cast(label, tf.float32) return video_id, vector, label # Creates input pipeline for tensorflow networks
def _read_word_record(data_queue): reader = tf.TFRecordReader() # Construct a general reader key, example_serialized = reader.read(data_queue) feature_map = { 'image/encoded': tf.FixedLenFeature( [], dtype=tf.string, default_value='' ), 'image/labels': tf.VarLenFeature( dtype=tf.int64 ), 'image/width': tf.FixedLenFeature( [1], dtype=tf.int64, default_value=1 ), 'image/filename': tf.FixedLenFeature([], dtype=tf.string, default_value='' ), 'text/string': tf.FixedLenFeature([], dtype=tf.string, default_value='' ), 'text/length': tf.FixedLenFeature( [1], dtype=tf.int64, default_value=1 ) } features = tf.parse_single_example( example_serialized, feature_map ) image = tf.image.decode_jpeg( features['image/encoded'], channels=1 ) #gray width = tf.cast( features['image/width'], tf.int32) # for ctc_loss label = tf.serialize_sparse( features['image/labels'] ) # for batching length = features['text/length'] text = features['text/string'] filename = features['image/filename'] return image,width,label,length,text,filename
def read_and_decode_single_example(self,filename,test=False): with tf.name_scope('TFRecordReader'): # first construct a queue containing a list of filenames. # this lets a user split up there dataset in multiple files to keep # size down files = [filename] if self.filenameNr==1 or test else [filename.format(i) for i in range(self.filenameNr)] filename_queue = tf.train.string_input_producer(files, num_epochs=None) # Unlike the TFRecordWriter, the TFRecordReader is symbolic reader = tf.TFRecordReader() # One can read a single serialized example from a filename # serialized_example is a Tensor of type string. _, serialized_example = reader.read(filename_queue) # The serialized example is converted back to actual values. # One needs to describe the format of the objects to be returned features = tf.parse_single_example( serialized_example, features={ # We know the length of both fields. If not the # tf.VarLenFeature could be used 'seq_len': tf.FixedLenFeature([1], tf.int64), 'target': tf.VarLenFeature(tf.int64), 'imageInput': tf.FixedLenFeature([self.height*self.width], tf.float32) }) # now return the converted data imageInput = features['imageInput'] seq_len = features['seq_len'] target = features['target'] return imageInput, seq_len , target
def input_stream(record_path, scope=None): """ Input data stream ARGS `record_path`: tf records file path RETURN `streams`: data streams """ with tf.device('/cpu:0'): with tf.variable_scope(scope or 'input_stream'): reader = tf.TFRecordReader() filename_queue = tf.train.string_input_producer([record_path], None) _, record_value = reader.read(filename_queue) features = tf.parse_single_example(record_value, { 'image_jpeg': tf.FixedLenFeature([], tf.string), 'image_name': tf.FixedLenFeature([], tf.string), 'word_polygons': tf.VarLenFeature(tf.float32), # 'words': tf.VarLenFeature(tf.string) // FIXME: problem with parsing words }) # decode jpeg image image = tf.cast(tf.image.decode_jpeg(features['image_jpeg'], channels=3), tf.float32) # extract bounding polygons word_polygons = tf.sparse_tensor_to_dense(features['word_polygons']) word_polygons = tf.reshape(word_polygons, [-1, WORD_POLYGON_DIM]) # extract words # words = tf.sparse_tensor_to_dense(features['words']) # output streams streams = {'image': image, 'image_name': features['image_name'], 'image_jpeg': features['image_jpeg'], 'word_polygons': word_polygons} return streams
def testMakeOutputDictError(self): schema = self.toSchema({'a': tf.VarLenFeature(tf.string)}) # SparseTensor that cannot be represented as VarLenFeature. fetches = { 'a': tf.SparseTensorValue(indices=np.array([(0, 2), (0, 4), (0, 8)]), values=np.array([10.0, 20.0, 30.0]), dense_shape=(1, 20)) } with self.assertRaisesRegexp( ValueError, 'cannot be decoded by ListColumnRepresentation'): _ = impl_helper.make_output_dict(schema, fetches) # SparseTensor of invalid rank. fetches = { 'a': tf.SparseTensorValue( indices=np.array([(0, 0, 1), (0, 0, 2), (0, 0, 3)]), values=np.array([10.0, 20.0, 30.0]), dense_shape=(1, 10, 10)) } with self.assertRaisesRegexp( ValueError, 'cannot be decoded by ListColumnRepresentation'): _ = impl_helper.make_output_dict(schema, fetches) # SparseTensor with indices that are out of order. fetches = { 'a': tf.SparseTensorValue(indices=np.array([(0, 2), (2, 4), (1, 8)]), values=np.array([10.0, 20.0, 30.0]), dense_shape=(3, 20)) } with self.assertRaisesRegexp( ValueError, 'Encountered out-of-order sparse index'): _ = impl_helper.make_output_dict(schema, fetches)
def testRunPreprocessingFn(self): schema = self.toSchema({ 'dense_1': tf.FixedLenFeature((), tf.float32), 'dense_2': tf.FixedLenFeature((1, 2), tf.int64), 'var_len': tf.VarLenFeature(tf.string), 'sparse': tf.SparseFeature('ix', 'val', tf.float32, 100) }) def preprocessing_fn(inputs): return { 'dense_out': mappers.scale_to_0_1(inputs['dense_1']), 'sparse_out': tf.sparse_reshape(inputs['sparse'], (1, 10)), } _, inputs, outputs = impl_helper.run_preprocessing_fn( preprocessing_fn, schema) # Verify that the input placeholders have the correct types. expected_dtype_and_shape = { 'dense_1': (tf.float32, tf.TensorShape([None])), 'dense_2': (tf.int64, tf.TensorShape([None, 1, 2])), 'var_len': (tf.string, tf.TensorShape([None, None])), 'sparse': (tf.float32, tf.TensorShape([None, None])), 'dense_out': (tf.float32, tf.TensorShape([None])), 'sparse_out': (tf.float32, tf.TensorShape([None, None])), } for key, tensor in itertools.chain(six.iteritems(inputs), six.iteritems(outputs)): dtype, shape = expected_dtype_and_shape[key] self.assertEqual(tensor.dtype, dtype) tensor.get_shape().assert_is_compatible_with(shape)
def as_feature_spec(self): """Returns a representation of this ColumnSchema as a feature spec. A feature spec (for a specific column) is one of a FixedLenFeature, SparseFeature or VarLenFeature. Returns: A representation of this ColumnSchema as a feature spec. """ return self.representation.as_feature_spec(self)
def as_feature_spec(self, column): if column.domain.dtype not in _TF_EXAMPLE_ALLOWED_TYPES: raise ValueError('tf.Example parser supports only types {}, so it is ' 'invalid to generate a feature_spec with type ' '{}.'.format( _TF_EXAMPLE_ALLOWED_TYPES, repr(column.domain.dtype))) return tf.VarLenFeature(column.domain.dtype)
def from_feature_spec(feature_spec): """Convert a feature_spec to a Schema. Args: feature_spec: a features specification in the format expected by tf.parse_example(), i.e. `{name: FixedLenFeature(...), name: VarLenFeature(...), ...' Returns: A Schema representing the provided set of columns. """ return Schema({ key: _from_parse_feature(parse_feature) for key, parse_feature in six.iteritems(feature_spec) })
def _from_parse_feature(parse_feature): """Convert a single feature spec to a ColumnSchema.""" # FixedLenFeature if isinstance(parse_feature, tf.FixedLenFeature): representation = FixedColumnRepresentation(parse_feature.default_value) return ColumnSchema(parse_feature.dtype, parse_feature.shape, representation) # FixedLenSequenceFeature if isinstance(parse_feature, tf.FixedLenSequenceFeature): raise ValueError('DatasetSchema does not support ' 'FixedLenSequenceFeature yet.') # VarLenFeature if isinstance(parse_feature, tf.VarLenFeature): representation = ListColumnRepresentation() return ColumnSchema(parse_feature.dtype, [None], representation) # SparseFeature if isinstance(parse_feature, tf.SparseFeature): index_field = SparseIndexField(name=parse_feature.index_key, is_sorted=parse_feature.already_sorted) representation = SparseColumnRepresentation( value_field_name=parse_feature.value_key, index_fields=[index_field]) return ColumnSchema(parse_feature.dtype, [parse_feature.size], representation) raise ValueError('Cannot interpret feature spec {} with type {}'.format( parse_feature, type(parse_feature)))
def infer_column_schema_from_tensor(tensor): """Infer a ColumnSchema from a tensor.""" if isinstance(tensor, tf.SparseTensor): # For SparseTensor, there's insufficient information to distinguish between # ListColumnRepresentation and SparseColumnRepresentation. So we just guess # the former, and callers are expected to handle the latter case on their # own (e.g. by requiring the user to provide the schema). This is a policy # motivated by the prevalence of VarLenFeature in current tf.Learn code. axes = [Axis(None)] representation = ListColumnRepresentation() else: axes = _shape_to_axes(tensor.get_shape(), remove_batch_dimension=True) representation = FixedColumnRepresentation() return ColumnSchema(tensor.dtype, axes, representation)
def _decode(message): features = { 'key': tf.FixedLenFeature([], tf.int64), 'vector': tf.VarLenFeature(tf.int64) } parsed = tf.parse_single_example( serialized=message, features=features) key = parsed['key'] vector = tf.sparse_tensor_to_dense(parsed['vector']) return key, vector
def frame_example_2_np(seq_example_bytes, max_quantized_value=2, min_quantized_value=-2): feature_names=['rgb','audio'] feature_sizes = [1024, 128] with tf.Graph().as_default(): contexts, features = tf.parse_single_sequence_example( seq_example_bytes, context_features={"video_id": tf.FixedLenFeature( [], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={ feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in feature_names }) decoded_features = { name: tf.reshape( tf.cast(tf.decode_raw(features[name], tf.uint8), tf.float32), [-1, size]) for name, size in zip(feature_names, feature_sizes) } feature_matrices = { name: utils.Dequantize(decoded_features[name], max_quantized_value, min_quantized_value) for name in feature_names} with tf.Session() as sess: vid = sess.run(contexts['video_id']) labs = sess.run(contexts['labels'].values) rgb = sess.run(feature_matrices['rgb']) audio = sess.run(feature_matrices['audio']) return vid, labs, rgb, audio #%% Split frame level file into three video level files: all, 1st half, 2nd half.
def build_graph(): feature_names=['rgb','audio'] feature_sizes = [1024, 128] max_quantized_value=2 min_quantized_value=-2 seq_example_bytes = tf.placeholder(tf.string) contexts, features = tf.parse_single_sequence_example( seq_example_bytes, context_features={"video_id": tf.FixedLenFeature( [], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={ feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in feature_names }) decoded_features = { name: tf.reshape( tf.cast(tf.decode_raw(features[name], tf.uint8), tf.float32), [-1, size]) for name, size in zip(feature_names, feature_sizes) } feature_matrices = { name: utils.Dequantize(decoded_features[name], max_quantized_value, min_quantized_value) for name in feature_names} tf.add_to_collection("vid_tsr", contexts['video_id']) tf.add_to_collection("labs_tsr", contexts['labels'].values) tf.add_to_collection("rgb_tsr", feature_matrices['rgb']) tf.add_to_collection("audio_tsr", feature_matrices['audio']) tf.add_to_collection("seq_example_bytes", seq_example_bytes) # with tf.Session() as sess: # writer = tf.summary.FileWriter('./graphs', sess.graph)
def input_pipeline(file_pattern, mode, capacity=64): keys_to_features = { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) } items_to_handlers = { "inputs": tfexample_decoder.Tensor("inputs"), "targets": tfexample_decoder.Tensor("targets") } # Now the non-trivial case construction. with tf.name_scope("examples_queue"): training = (mode == "train") # Read serialized examples using slim parallel_reader. num_epochs = None if training else 1 data_files = parallel_reader.get_data_files(file_pattern) num_readers = min(4 if training else 1, len(data_files)) _, examples = parallel_reader.parallel_read([file_pattern], tf.TFRecordReader, num_epochs=num_epochs, shuffle=training, capacity=2 * capacity, min_after_dequeue=capacity, num_readers=num_readers) decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) decoded = decoder.decode(examples, items=list(items_to_handlers)) examples = {} for (field, tensor) in zip(keys_to_features, decoded): examples[field] = tensor # We do not want int64s as they do are not supported on GPUs. return {k: tf.to_int32(v) for (k, v) in six.iteritems(examples)}
def read_and_decode_single_example(filename_queue): # Unlike the TFRecordWriter, the TFRecordReader is symbolic reader = tf.TFRecordReader() # One can read a single serialized example from a filename # serialized_example is a Tensor of type string. _, serialized_example = reader.read(filename_queue) # The serialized example is converted back to actual values. # One needs to describe the format of the objects to be returned features = tf.parse_single_example( serialized_example, features={ # We know the length of both fields. If not the # tf.VarLenFeature could be used 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), 'image/class/label': tf.FixedLenFeature( [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)) }) # now return the converted data label = features['image/class/label'] image = features['image/encoded'] # image = tf.image.decode_jpeg(image, channels=3) image_format = features['image/format'] return label, image, image_format
def example_reading_spec(self): data_fields = { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64), "floats": tf.VarLenFeature(tf.float32), } data_items_to_decoders = None return (data_fields, data_items_to_decoders)
def example_reading_spec(self): data_fields = { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) } data_items_to_decoders = None return (data_fields, data_items_to_decoders)
def prepare_reader(self, filename_queue, max_quantized_value=2, min_quantized_value=-2): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) context_features, sequence_features = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)}, None if self.sequence_data: sequence_features = {self.feature_name[0]: tf.FixedLenSequenceFeature([], dtype=tf.string), self.feature_name[1]: tf.FixedLenSequenceFeature([], dtype=tf.string), } else: context_features[self.feature_name[0]] = tf.FixedLenFeature(self.feature_size[0], tf.float32) context_features[self.feature_name[1]] = tf.FixedLenFeature(self.feature_size[1], tf.float32) contexts, features = tf.parse_single_sequence_example(serialized_example, context_features=context_features, sequence_features=sequence_features) labels = (tf.cast(contexts["labels"].values, tf.int64)) if self.sequence_data: decoded_features = tf.reshape(tf.cast(tf.decode_raw(features[self.feature_name[0]], tf.uint8), tf.float32), [-1, self.feature_size[0]]) video_matrix = Dequantize(decoded_features, max_quantized_value, min_quantized_value) decoded_features = tf.reshape(tf.cast(tf.decode_raw(features[self.feature_name[1]], tf.uint8), tf.float32), [-1, self.feature_size[1]]) audio_matrix = Dequantize(decoded_features, max_quantized_value, min_quantized_value) num_frames = tf.minimum(tf.shape(decoded_features)[0], self.max_frames) else: video_matrix = contexts[self.feature_name[0]] audio_matrix = contexts[self.feature_name[1]] num_frames = tf.constant(-1) # Pad or truncate to 'max_frames' frames. # video_matrix = resize_axis(video_matrix, 0, self.max_frames) return contexts["video_id"], video_matrix, audio_matrix, labels, num_frames
def testFromCSVWithFeatureSpec(self): if not HAS_PANDAS: return num_batches = 100 batch_size = 8 data_path = _make_test_csv_sparse() feature_spec = { "int": tf.FixedLenFeature(None, dtypes.int16, np.nan), "float": tf.VarLenFeature(dtypes.float16), "bool": tf.VarLenFeature(dtypes.bool), "string": tf.FixedLenFeature(None, dtypes.string, "") } pandas_df = pd.read_csv(data_path, dtype={"string": object}) # Pandas insanely uses NaN for empty cells in a string column. # And, we can't use Pandas replace() to fix them because nan != nan s = pandas_df["string"] for i in range(0, len(s)): if isinstance(s[i], float) and math.isnan(s[i]): pandas_df.set_value(i, "string", "") tensorflow_df = df.TensorFlowDataFrame.from_csv_with_feature_spec( [data_path], batch_size=batch_size, shuffle=False, feature_spec=feature_spec) # These columns were sparse; re-densify them for comparison tensorflow_df["float"] = densify.Densify(np.nan)(tensorflow_df["float"]) tensorflow_df["bool"] = densify.Densify(np.nan)(tensorflow_df["bool"]) self._assert_pandas_equals_tensorflow(pandas_df, tensorflow_df, num_batches=num_batches, batch_size=batch_size)