我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.FixedLenFeature()。
def parse_example(serialized_example): features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features={ 'shape': tf.FixedLenFeature([], tf.string), 'img_raw': tf.FixedLenFeature([], tf.string), 'gt_raw': tf.FixedLenFeature([], tf.string), 'example_name': tf.FixedLenFeature([], tf.string) }) with tf.variable_scope('decoder'): shape = tf.decode_raw(features['shape'], tf.int32) image = tf.decode_raw(features['img_raw'], tf.float32) ground_truth = tf.decode_raw(features['gt_raw'], tf.uint8) example_name = features['example_name'] with tf.variable_scope('image'): # reshape and add 0 dimension (would be batch dimension) image = tf.expand_dims(tf.reshape(image, shape), 0) with tf.variable_scope('ground_truth'): # reshape ground_truth = tf.cast(tf.reshape(ground_truth, shape[:-1]), tf.float32) return image, ground_truth, example_name
def prepare_reader(self, filename_queue, batch_size=1024): reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_serialized_examples(self, serialized_examples): # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def parse_mnist_tfrec(tfrecord, features_shape): tfrecord_features = tf.parse_single_example( tfrecord, features={ 'features': tf.FixedLenFeature([], tf.string), 'targets': tf.FixedLenFeature([], tf.string) } ) features = tf.decode_raw(tfrecord_features['features'], tf.uint8) features = tf.reshape(features, features_shape) features = tf.cast(features, tf.float32) targets = tf.decode_raw(tfrecord_features['targets'], tf.uint8) targets = tf.reshape(targets, []) targets = tf.one_hot(indices=targets, depth=10, on_value=1, off_value=0) targets = tf.cast(targets, tf.float32) return features, targets
def parse_mnist_tfrec(tfrecord, name, features_shape, scalar_targs=False): tfrecord_features = tf.parse_single_example( tfrecord, features={ 'features': tf.FixedLenFeature([], tf.string), 'targets': tf.FixedLenFeature([], tf.string) }, name=name+'_data' ) with tf.variable_scope('features'): features = tf.decode_raw( tfrecord_features['features'], tf.uint8 ) features = tf.reshape(features, features_shape) features = tf.cast(features, tf.float32) with tf.variable_scope('targets'): targets = tf.decode_raw(tfrecord_features['targets'], tf.uint8) if scalar_targs: targets = tf.reshape(targets, []) targets = tf.one_hot( indices=targets, depth=10, on_value=1, off_value=0 ) targets = tf.cast(targets, tf.float32) return features, targets
def make_input_schema(mode=tf.contrib.learn.ModeKeys.TRAIN): """Input schema definition. Args: mode: tf.contrib.learn.ModeKeys specifying if the schema is being used for train/eval or prediction. Returns: A `Schema` object. """ result = ({} if mode == tf.contrib.learn.ModeKeys.INFER else {'clicked': tf.FixedLenFeature(shape=[], dtype=tf.int64)}) for name in INTEGER_COLUMN_NAMES: result[name] = tf.FixedLenFeature( shape=[], dtype=tf.int64, default_value=-1) for name in CATEGORICAL_COLUMN_NAMES: result[name] = tf.FixedLenFeature(shape=[], dtype=tf.string, default_value='') return dataset_schema.from_feature_spec(result)
def example_serving_input_fn(default_batch_size=None): """Build the serving inputs. Args: default_batch_size (int): Batch size for the tf.placeholder shape """ feature_spec = {} for feat in CONTINUOUS_COLS: feature_spec[feat] = tf.FixedLenFeature(shape=[], dtype=tf.int64) for feat, _ in CATEGORICAL_COLS: feature_spec[feat] = tf.FixedLenFeature(shape=[], dtype=tf.string) example_bytestring = tf.placeholder( shape=[default_batch_size], dtype=tf.string, ) features = tf.parse_example(example_bytestring, feature_spec) return features, {'example': example_bytestring}
def _make_schema(columns, types, default_values): """Input schema definition. Args: columns: column names for fields appearing in input. types: column types for fields appearing in input. default_values: default values for fields appearing in input. Returns: feature_set dictionary of string to *Feature. """ result = {} assert len(columns) == len(types) assert len(columns) == len(default_values) for c, t, v in zip(columns, types, default_values): if isinstance(t, list): result[c] = tf.VarLenFeature(dtype=t[0]) else: result[c] = tf.FixedLenFeature(shape=[], dtype=t, default_value=v) return dataset_schema.from_feature_spec(result)
def make_input_schema(mode=tf.contrib.learn.ModeKeys.TRAIN): """Input schema definition. Args: mode: tf.contrib.learn.ModeKeys specifying if the schema is being used for train/eval or prediction. Returns: A `Schema` object. """ result = ({} if mode == tf.contrib.learn.ModeKeys.INFER else { 'score': tf.FixedLenFeature(shape=[], dtype=tf.float32) }) result.update({ 'subreddit': tf.FixedLenFeature(shape=[], dtype=tf.string), 'author': tf.FixedLenFeature(shape=[], dtype=tf.string), 'comment_body': tf.FixedLenFeature(shape=[], dtype=tf.string, default_value=''), 'comment_parent_body': tf.FixedLenFeature(shape=[], dtype=tf.string, default_value=''), 'toplevel': tf.FixedLenFeature(shape=[], dtype=tf.int64), }) return dataset_schema.from_feature_spec(result)
def read(self, shuffle=True, num_epochs=None): with tf.name_scope('input'): reader = tf.TFRecordReader() filename_queue = tf.train.string_input_producer([self.filename], num_epochs=num_epochs) _, serialized_input = reader.read(filename_queue) inputs = tf.parse_single_example(serialized_input, features={ 'inputs_seq': tf.FixedLenFeature([self.seq_len * 2 + 3], tf.int64), 'output': tf.FixedLenFeature([1], tf.int64) }) inputs_seq = inputs['inputs_seq'] output = inputs['output'] min_after_dequeue = 100 if shuffle: inputs_seqs, outputs = tf.train.shuffle_batch([inputs_seq, output], batch_size=self.batch_size, num_threads=2, capacity=min_after_dequeue + 3 * self.batch_size, min_after_dequeue=min_after_dequeue) else: inputs_seqs, outputs = tf.train.batch([inputs_seq, output], batch_size=self.batch_size) return inputs_seqs, outputs
def read_and_decode(self, example_serialized): """ Read and decode binarized, raw MNIST dataset from .tfrecords file generated by MNIST.py """ num = self.flags['num_classes'] # Parse features from binary file features = tf.parse_single_example( example_serialized, features={ 'image': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([num], tf.int64, default_value=[-1] * num), 'height': tf.FixedLenFeature([], tf.int64), 'width': tf.FixedLenFeature([], tf.int64), 'depth': tf.FixedLenFeature([], tf.int64), }) # Return the converted data label = features['label'] image = tf.decode_raw(features['image'], tf.float32) image.set_shape([784]) image = tf.reshape(image, [28, 28, 1]) image = (image - 0.5) * 2 # max value = 1, min value = -1 return image, tf.cast(label, tf.int32)
def read_and_decode(self, example_serialized): """ Read and decode binarized, raw MNIST dataset from .tfrecords file generated by MNIST.py """ features = tf.parse_single_example( example_serialized, features={ 'image': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([self.flags['num_classes']], tf.int64, default_value=[-1]*self.flags['num_classes']), 'height': tf.FixedLenFeature([], tf.int64), 'width': tf.FixedLenFeature([], tf.int64), 'depth': tf.FixedLenFeature([], tf.int64), }) # now return the converted data label = features['label'] image = tf.decode_raw(features['image'], tf.float32) image.set_shape([784]) image = tf.reshape(image, [28, 28, 1]) image = (image - 0.5) * 2 # max value = 1, min value = -1 return image, tf.cast(label, tf.int32)
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'label_raw': tf.FixedLenFeature([], tf.string), }) image = tf.decode_raw(features['image_raw'], tf.int16) image.set_shape([IMAGE_HEIGHT * IMAGE_WIDTH]) image = tf.cast(image, tf.float32) * (1. / 255) - 0.5 reshape_image = tf.reshape(image, [IMAGE_HEIGHT, IMAGE_WIDTH, 1]) label = tf.decode_raw(features['label_raw'], tf.uint8) label.set_shape([CHARS_NUM * CLASSES_NUM]) reshape_label = tf.reshape(label, [CHARS_NUM, CLASSES_NUM]) return tf.cast(reshape_image, tf.float32), tf.cast(reshape_label, tf.float32)
def parse_example_proto(example_serialized): """Parses an Example proto containing a training example of an image. The output of the build_image_data.py image preprocessing script is a dataset containing serialized Example protocol buffers. """ # Dense features in Example proto. feature_map = { 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1), } with tf.name_scope('decode_tfrecord'): features = tf.parse_single_example(example_serialized, feature_map) image = decode_jpeg(features['image/encoded']) label = tf.cast(features['image/class/label'], dtype=tf.int32) return image, label
def test_example_proto_coder_error(self): input_schema = dataset_schema.from_feature_spec({ '2d_vector_feature': tf.FixedLenFeature(shape=[2, 2], dtype=tf.int64), }) coder = example_proto_coder.ExampleProtoCoder(input_schema) example_decoded_value = { '2d_vector_feature': [1, 2, 3] } example_proto_text = """ features { feature { key: "1d_vector_feature" value { int64_list { value: [ 1, 2, 3 ] } } } } """ example = tf.train.Example() text_format.Merge(example_proto_text, example) # Ensure that we raise an exception for trying to encode invalid data. with self.assertRaisesRegexp(ValueError, 'got wrong number of values'): _ = coder.encode(example_decoded_value) # Ensure that we raise an exception for trying to parse invalid data. with self.assertRaisesRegexp(ValueError, 'got wrong number of values'): _ = coder.decode(example.SerializeToString())
def __init__(self, name, feature_spec, index, reader=None, encoder=None): self._name = name self._cast_fn = _make_cast_fn(feature_spec.dtype) self._default_value = feature_spec.default_value self._index = index self._reader = reader self._encoder = encoder self._dtype = feature_spec.dtype self._shape = feature_spec.shape self._rank = len(feature_spec.shape) self._size = 1 for dim in feature_spec.shape: self._size *= dim # Check that the size of the feature matches the valency. if self._size != 1 and not self._reader: raise ValueError( 'FixedLenFeature %r was not multivalent (see CsvCoder constructor) ' 'but had shape %r whose size was not 1' % (name, feature_spec.shape))
def encode_value(self, string_list, values): """Encode the value of this feature into the CSV line.""" if self._rank == 0: flattened_values = [values] elif self._rank == 1: # Short-circuit the reshaping logic needed for rank > 1. flattened_values = values else: flattened_values = np.asarray(values).reshape(-1) if len(flattened_values) != self._size: raise ValueError('FixedLenFeature %r got wrong number of values. Expected' ' %d but got %d' % (self._name, self._size, len(flattened_values))) if self._encoder: string_list[self._index] = self._encoder.encode_record( map(str, flattened_values)) else: string_list[self._index] = str(flattened_values[0])
def test_valency(self): data = ('11|12,"this is a ,text",categorical_value|other_value,1|3,89.0|' '91.0,12.0|15.0,False') feature_spec = self._INPUT_SCHEMA.as_feature_spec().copy() feature_spec['numeric1'] = tf.FixedLenFeature(shape=[2], dtype=tf.int64) schema = dataset_schema.from_feature_spec(feature_spec) multivalent_columns = ['numeric1', 'numeric2', 'y'] coder = csv_coder.CsvCoder(self._COLUMNS, schema, delimiter=',', secondary_delimiter='|', multivalent_columns=multivalent_columns) expected_decoded = {'category1': ['categorical_value|other_value'], 'numeric1': [11, 12], 'numeric2': [89.0, 91.0], 'boolean1': [False], 'text1': 'this is a ,text', 'y': ([1, 3], [12.0, 15.0])} self._assert_encode_decode(coder, data, expected_decoded) # Test successful decoding with a single column.
def __init__(self, name, feature_spec): self._name = name self._np_dtype = feature_spec.dtype.as_numpy_dtype self._value_fn = _make_feature_value_fn(feature_spec.dtype) self._shape = feature_spec.shape self._rank = len(feature_spec.shape) self._size = 1 for dim in feature_spec.shape: self._size *= dim default_value = feature_spec.default_value if default_value is not None: np_default_value = np.asarray(default_value) if list(np_default_value.shape) != self._shape: raise ValueError( 'FixedLenFeature %r got default value with incorrect shape' % (self._name,)) default_value = np_default_value.reshape(-1).tolist() self._default_value = default_value
def read_and_decode_embedding(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features={ 'label': tf.FixedLenFeature( [], tf.int64), 'sequence_raw': tf.FixedLenFeature( [], tf.string), }) sequence = features['sequence_raw'] # preprocess s_decode = tf.decode_raw(sequence, tf.int32) s_decode.set_shape([FLAGS.embed_length]) # Convert label from a scalar uint8 tensor to an int32 scalar. label = tf.cast(features['label'], tf.int32) return s_decode, label
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={ 'song_spec': tf.FixedLenFeature([], tf.string), 'voice_spec': tf.FixedLenFeature([], tf.string), 'mixed_spec': tf.FixedLenFeature([], tf.string) }) song_spec = transform_spec_from_raw(features['song_spec']) voice_spec = transform_spec_from_raw(features['voice_spec']) mixed_spec = transform_spec_from_raw(features['mixed_spec']) input_spec = stack_spectrograms(mixed_spec) # this will be the input target_spec = tf.concat([song_spec, voice_spec], axis=1) # target spec is going to be a concatenation of song_spec and voice_spec return input_spec, target_spec
def decode_from_tfrecords(filename,num_epoch=None): filename_queue=tf.train.string_input_producer([filename],num_epochs=num_epoch)#??????????????????????????????????????? reader=tf.TFRecordReader() _,serialized=reader.read(filename_queue) example=tf.parse_single_example(serialized,features={ 'height':tf.FixedLenFeature([],tf.int64), 'width':tf.FixedLenFeature([],tf.int64), 'nchannel':tf.FixedLenFeature([],tf.int64), 'image':tf.FixedLenFeature([],tf.string), 'label':tf.FixedLenFeature([],tf.int64) }) label=tf.cast(example['label'], tf.int32) image=tf.decode_raw(example['image'],tf.uint8) image=tf.reshape(image,tf.pack([ tf.cast(example['height'], tf.int32), tf.cast(example['width'], tf.int32), tf.cast(example['nchannel'], tf.int32)])) return image,label
def _deserialize_image_record(cls, record): feature_map = { 'image/encoded': tf.FixedLenFeature([], tf.string, ''), 'image/class/label': tf.FixedLenFeature([1], tf.int64, -1), 'image/class/text': tf.FixedLenFeature([], tf.string, ''), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32) } with tf.name_scope('deserialize_image_record'): obj = tf.parse_single_example(record, feature_map) imgdata = obj['image/encoded'] label = tf.cast(obj['image/class/label'], tf.int32) bbox = tf.stack([obj['image/object/bbox/%s' % x].values for x in ['ymin', 'xmin', 'ymax', 'xmax']]) bbox = tf.transpose(tf.expand_dims(bbox, 0), [0, 2, 1]) text = obj['image/class/text'] return imgdata, label, bbox, text
def read_and_decode(filename, batch_size): # ??????????? filename_queue = tf.train.string_input_producer([filename]) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) # ???????? features = tf.parse_single_example( serialized_example, features={ 'label': tf.FixedLenFeature([], tf.int64), 'img_raw': tf.FixedLenFeature([], tf.string), } ) img = tf.decode_raw(features['img_raw'], tf.uint8) print('xxxx: ', img.get_shape()) img = tf.reshape(img, [512, 144, 3]) img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 label = tf.cast(features['label'], tf.int32) image_batch, label_batch = tf.train.batch([img, label], batch_size=batch_size, num_threads=64, capacity=2000) return image_batch, tf.reshape(label_batch, [batch_size])
def read_and_decode(filename, img_size=128, depth=1): if not filename.endswith('.tfrecords'): print "Invalid file \"{:s}\"".format(filename) return [], [] else: data_queue = tf.train.string_input_producer([filename]) reader = tf.TFRecordReader() _, serialized_example = reader.read(data_queue) features = tf.parse_single_example(serialized_example, features={ 'label' : tf.FixedLenFeature([], tf.int64), 'img_raw' : tf.FixedLenFeature([], tf.string), }) img = tf.decode_raw(features['img_raw'], tf.uint8) img = tf.reshape(img, [img_size, img_size, depth]) # Normalize the image img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 label = tf.cast(features['label'], tf.int32) label_onehot = tf.stack(tf.one_hot(label, n_classes)) return img, label_onehot #read_and_decode('test.tfrecords')
def read_and_decode(filename, img_size=128, depth=1): if not filename.endswith('.tfrecords'): print "Invalid file \"{:s}\"".format(filename) return [], [] else: data_queue = tf.train.string_input_producer([filename]) reader = tf.TFRecordReader() _, serialized_example = reader.read(data_queue) features = tf.parse_single_example(serialized_example, features={ 'label' : tf.FixedLenFeature([], tf.int64), 'img_raw' : tf.FixedLenFeature([], tf.string), }) img = tf.decode_raw(features['img_raw'], tf.uint8) img = tf.reshape(img, [img_size, img_size, depth]) # Normalize the image img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 label = tf.cast(features['label'], tf.int32) label_onehot = tf.stack(tf.one_hot(label, n_classes)) return img, label_onehot
def parse_example_proto(example_serialized): # Dense features in Example proto. feature_map = { 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 'image/filename': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1), 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 'image/height': tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1), 'image/width': tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1), } features = tf.parse_single_example(example_serialized, feature_map) label = tf.cast(features['image/class/label'], dtype=tf.int32) return features['image/encoded'], label, features['image/filename']
def __init__(self): self._Image_handle = 'image_raw' self._Seq_handle = 'sequence' self._Seq_mask = 'seq_mask' self._Height_handle = 'height' self._Width_handle = 'width' self._Depth_handle = 'depth' self._Image_name = 'image_name' self._Feature_dict = {\ self._Seq_handle : tf.FixedLenFeature([], tf.string),\ self._Seq_mask : tf.FixedLenFeature([], tf.string),\ self._Image_handle : tf.FixedLenFeature([], tf.string),\ self._Height_handle : tf.FixedLenFeature([], tf.int64),\ self._Width_handle : tf.FixedLenFeature([], tf.int64),\ self._Depth_handle : tf.FixedLenFeature([], tf.int64),\ self._Image_name : tf.FixedLenFeature([], tf.string)}
def __init__(self): self._Image_handle = 'image_raw' self._Height_handle = 'height' self._Width_handle = 'width' self._Depth_handle = 'depth' self._Image_name = 'image_name' self._Image_mask = 'image_mask' self._Mask_weights = 'mask_weights' self._Feature_dict = {\ self._Image_handle : tf.FixedLenFeature([], tf.string),\ self._Height_handle : tf.FixedLenFeature([], tf.int64),\ self._Width_handle : tf.FixedLenFeature([], tf.int64),\ self._Depth_handle : tf.FixedLenFeature([], tf.int64),\ self._Image_name : tf.FixedLenFeature([], tf.string),\ self._Image_mask : tf.FixedLenFeature([], tf.string),\ self._Mask_weights : tf.FixedLenFeature([], tf.string)}
def _setup_data_ops(self, data_dir, data_dir_val, features_keys=None, training_set_size=50000, val_set_size=10000, dataset_name='datarandom'): num_readers = self.cnf.get('num_readers', 8) if features_keys is None: features_keys = { 'image/encoded/image': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'), 'image/class/label': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), } decoder = Decoder(features_keys) dataset = Dataset(dataset_name, decoder, data_dir, num_examples_per_epoch=training_set_size, batch_size=self.cnf['batch_size_train']) dataflow_train = Dataflow(dataset, num_readers=num_readers, shuffle=True, min_queue_examples=self.cnf.get('min_queue_examples', 1000), capacity=self.cnf.get('capacity', 2000)) if data_dir_val is not None: dataset_val = Dataset(dataset_name, decoder, data_dir_val, num_examples_per_epoch=val_set_size, batch_size=self.cnf['batch_size_train']) dataflow_val = Dataflow(dataset_val, num_readers=num_readers, shuffle=False, min_queue_examples=self.cnf.get('min_queue_examples', 1000), capacity=self.cnf.get('capacity', 2000)) return dataflow_train, dataflow_val else: return dataflow_train, None
def testRealValuedColumnDtypes(self): rvc = tf.contrib.layers.real_valued_column("rvc") self.assertDictEqual( {"rvc": tf.FixedLenFeature( [1], dtype=tf.float32)}, rvc.config) rvc = tf.contrib.layers.real_valued_column("rvc", dtype=tf.int32) self.assertDictEqual( {"rvc": tf.FixedLenFeature( [1], dtype=tf.int32)}, rvc.config) with self.assertRaisesRegexp(ValueError, "dtype must be convertible to float"): tf.contrib.layers.real_valued_column("rvc", dtype=tf.string)
def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self): real_valued_col1 = tf.contrib.layers.real_valued_column( "real_valued_column1", default_value=2) real_valued_col2 = tf.contrib.layers.real_valued_column( "real_valued_column2", 5, default_value=4) real_valued_col3 = tf.contrib.layers.real_valued_column( "real_valued_column3", default_value=[8]) real_valued_col4 = tf.contrib.layers.real_valued_column( "real_valued_column4", 3, default_value=[1, 0, 6]) feature_columns = [real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4] config = tf.contrib.layers.create_feature_spec_for_parsing(feature_columns) self.assertEqual(4, len(config)) self.assertDictEqual({ "real_valued_column1": tf.FixedLenFeature([1], dtype=tf.float32, default_value=[2.]), "real_valued_column2": tf.FixedLenFeature([5], dtype=tf.float32, default_value=[4., 4., 4., 4., 4.]), "real_valued_column3": tf.FixedLenFeature([1], dtype=tf.float32, default_value=[8.]), "real_valued_column4": tf.FixedLenFeature([3], dtype=tf.float32, default_value=[1., 0., 6.])}, config)
def read_my_file_format(filename_queue, resize_shape=None): """Sets up part of the pipeline that takes elements from the filename queue and turns it into a tf.Tensor of a batch of images. :param filename_queue: tf.train.string_input_producer object :param resize_shape: 2 element list defining the shape to resize images to. """ reader = tf.TFRecordReader() key, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'image/encoded': tf.FixedLenFeature([], tf.string), 'image/height': tf.FixedLenFeature([], tf.int64), 'image/channels': tf.FixedLenFeature([], tf.int64), 'image/width': tf.FixedLenFeature([], tf.int64)}) example = tf.image.decode_jpeg(features['image/encoded'], 3) processed_example = preprocessing(example, resize_shape) return processed_example
def decode_image_objects(paths): with tf.name_scope(inspect.stack()[0][3]): with tf.name_scope('parse_example'): reader = tf.TFRecordReader() _, serialized = reader.read(tf.train.string_input_producer(paths)) example = tf.parse_single_example(serialized, features={ 'imagepath': tf.FixedLenFeature([], tf.string), 'imageshape': tf.FixedLenFeature([3], tf.int64), 'objects': tf.FixedLenFeature([2], tf.string), }) imagepath = example['imagepath'] objects = example['objects'] with tf.name_scope('decode_objects'): objects_class = tf.decode_raw(objects[0], tf.int64, name='objects_class') objects_coord = tf.decode_raw(objects[1], tf.float32) objects_coord = tf.reshape(objects_coord, [-1, 4], name='objects_coord') with tf.name_scope('load_image'): imagefile = tf.read_file(imagepath) image = tf.image.decode_jpeg(imagefile, channels=3) return image, example['imageshape'], objects_class, objects_coord
def get_dataset(dataset_name, dataset_dir, image_count, class_count, split_name): slim = tf.contrib.slim items_to_descriptions = {'image': 'A color image.', 'label': 'An integer in range(0, class_count)'} file_pattern = os.path.join(dataset_dir, '{}_{}_*.tfrecord'.format(dataset_name, split_name)) reader = tf.TFRecordReader keys_to_features = {'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), 'image/class/label': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64))} items_to_handlers = {'image': slim.tfexample_decoder.Image(), 'label': slim.tfexample_decoder.Tensor('image/class/label')} decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = read_label_file(dataset_dir) return(slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=image_count, items_to_descriptions=items_to_descriptions, num_classes=class_count, labels_to_names=labels_to_names, shuffle=True))
def decode_record(filename_queue, patch_size, channel_num=3): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'label': tf.FixedLenFeature([], tf.int64), 'image': tf.FixedLenFeature([], tf.string), }) img = tf.decode_raw(features['image'], tf.uint8) img = tf.reshape(img, [patch_size, patch_size, channel_num]) img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 label = tf.cast(features['label'], tf.int32) return img, label
def read(filename_queue, feature_num=2, dtypes=[list, int]): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) feature_dict={} for i in range(feature_num): # here, only three data types are allowed: tf.float32, tf.int64, tf.string if dtypes[i] is int: feature_dict['feature'+str(i+1)]=tf.FixedLenFeature([], tf.int64) else: feature_dict['feature'+str(i+1)]=tf.FixedLenFeature([], tf.string) features = tf.parse_single_example( serialized_example, features=feature_dict) return features #====================================================================================== ## test code
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features={ 'image_left': tf.FixedLenFeature([], tf.string), 'image_right': tf.FixedLenFeature([], tf.string), }) image_left = tf.decode_raw(features['image_left'], tf.uint8) image_right = tf.decode_raw(features['image_right'], tf.uint8) width = 960 height = 540 depth = 4 image_left.set_shape([width*height*depth]) image_right.set_shape([width*height*depth]) return image_left, image_right
def prepare_reader(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_reader(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "predictions": tf.FixedLenFeature([self.num_classes], tf.float32), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]]), features["predictions"]
def prepare_reader(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "predictions": tf.FixedLenFeature([self.num_classes], tf.float32), "labels": tf.VarLenFeature(tf.int64)} features = tf.parse_example(serialized_examples, features=feature_map) return features["predictions"]
def prepare_writer(self, filename_queue, batch_size=1024): """Creates a single reader thread for pre-aggregated YouTube 8M Examples. Args: filename_queue: A tensorflow queue of filename locations. Returns: A tuple of video indexes, features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_examples = reader.read_up_to(filename_queue, batch_size) # set the mapping from the fields to data types in the proto num_features = len(self.feature_names) assert num_features > 0, "self.feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) feature_map = {"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)} for feature_index in range(num_features): feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature( [self.feature_sizes[feature_index]], tf.float32) features = tf.parse_example(serialized_examples, features=feature_map) labels = tf.sparse_to_indicator(features["labels"], self.num_classes) labels.set_shape([None, self.num_classes]) concatenated_features = tf.concat([ features[feature_name] for feature_name in self.feature_names], 1) return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])