Python tensorflow 模块,VarLenFeature() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.VarLenFeature()

项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def prepare_reader(self, filename_queue, batch_size=1024):

    reader = tf.TFRecordReader()
    _, serialized_examples = reader.read_up_to(filename_queue, batch_size)

    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:yt8m    作者:forwchen    | 项目源码 | 文件源码
def prepare_serialized_examples(self, serialized_examples):
    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:cloudml-samples    作者:GoogleCloudPlatform    | 项目源码 | 文件源码
def _make_schema(columns, types, default_values):
  """Input schema definition.

  Args:
    columns: column names for fields appearing in input.
    types: column types for fields appearing in input.
    default_values: default values for fields appearing in input.
  Returns:
    feature_set dictionary of string to *Feature.
  """
  result = {}
  assert len(columns) == len(types)
  assert len(columns) == len(default_values)
  for c, t, v in zip(columns, types, default_values):
    if isinstance(t, list):
      result[c] = tf.VarLenFeature(dtype=t[0])
    else:
      result[c] = tf.FixedLenFeature(shape=[], dtype=t, default_value=v)
  return dataset_schema.from_feature_spec(result)
项目:youtube-8m    作者:google    | 项目源码 | 文件源码
def prepare_serialized_examples(self, serialized_examples):
    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:Video-Classification    作者:boyaolin    | 项目源码 | 文件源码
def prepare_serialized_examples(self, serialized_examples):
    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:Youtube-8M-WILLOW    作者:antoine77340    | 项目源码 | 文件源码
def prepare_serialized_examples(self, serialized_examples):
    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)

    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:Y8M    作者:mpekalski    | 项目源码 | 文件源码
def prepare_serialized_examples(self, serialized_examples):
    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:keras_experiments    作者:avolkov1    | 项目源码 | 文件源码
def _deserialize_image_record(cls, record):
        feature_map = {
            'image/encoded': tf.FixedLenFeature([], tf.string, ''),
            'image/class/label': tf.FixedLenFeature([1], tf.int64, -1),
            'image/class/text': tf.FixedLenFeature([], tf.string, ''),
            'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
            'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
            'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
            'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32)
        }
        with tf.name_scope('deserialize_image_record'):
            obj = tf.parse_single_example(record, feature_map)
            imgdata = obj['image/encoded']
            label = tf.cast(obj['image/class/label'], tf.int32)
            bbox = tf.stack([obj['image/object/bbox/%s' % x].values
                             for x in ['ymin', 'xmin', 'ymax', 'xmax']])
            bbox = tf.transpose(tf.expand_dims(bbox, 0), [0, 2, 1])
            text = obj['image/class/text']
            return imgdata, label, bbox, text
项目:Youtube8mdataset_kagglechallenge    作者:jasonlee27    | 项目源码 | 文件源码
def prepare_serialized_examples(self, serialized_examples):
    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def testWeightedSparseColumnDtypes(self):
    ids = tf.contrib.layers.sparse_column_with_keys(
        "ids", ["marlo", "omar", "stringer"])
    weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights")
    self.assertDictEqual(
        {"ids": tf.VarLenFeature(tf.string),
         "weights": tf.VarLenFeature(tf.float32)},
        weighted_ids.config)

    weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights",
                                                            dtype=tf.int32)
    self.assertDictEqual(
        {"ids": tf.VarLenFeature(tf.string),
         "weights": tf.VarLenFeature(tf.int32)},
        weighted_ids.config)

    with self.assertRaisesRegexp(ValueError,
                                 "dtype is not convertible to float"):
      weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights",
                                                              dtype=tf.string)
项目:youtube    作者:taufikxu    | 项目源码 | 文件源码
def prepare_serialized_examples(self, serialized_examples):
    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:video_subtitle_extract    作者:thewintersun    | 项目源码 | 文件源码
def read_and_decode(filename_queue):
  reader = tf.TFRecordReader()
  _, serialized_example = reader.read(filename_queue)
  features = tf.parse_single_example(
      serialized_example,
      features={
          'height': tf.FixedLenFeature([], tf.int64),
          'width': tf.FixedLenFeature([], tf.int64),
          'image_raw': tf.FixedLenFeature([], tf.string),
          'label': tf.VarLenFeature(tf.int64),
      })

  image = tf.decode_raw(features['image_raw'], tf.uint8)
  image = tf.reshape(image, [730, 38])

  image = tf.cast(image, tf.float32) * (1. / 255) - 0.5

  label = tf.cast(features['label'], tf.int32)

  return image, label
项目:video_subtitle_extract    作者:thewintersun    | 项目源码 | 文件源码
def read_and_decode(filename_queue):
  reader = tf.TFRecordReader()
  _, serialized_example = reader.read(filename_queue)
  features = tf.parse_single_example(
      serialized_example,
      features={
          'height': tf.FixedLenFeature([], tf.int64),
          'width': tf.FixedLenFeature([], tf.int64),
          'image_raw': tf.FixedLenFeature([], tf.string),
          'label': tf.VarLenFeature(tf.int64),
      })

  image = tf.decode_raw(features['image_raw'], tf.uint8)
  image = tf.reshape(image, [730, 38])

  image = tf.cast(image, tf.float32) * (1. / 255) - 0.5

  label = tf.cast(features['label'], tf.int32)

  return image, label
项目:kaggle-youtube-8m    作者:liufuyang    | 项目源码 | 文件源码
def prepare_serialized_examples(self, serialized_examples):
    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)

    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:EasySparse    作者:physicso    | 项目源码 | 文件源码
def read_and_decode_batch(filename_queue, batch_size, capacity, min_after_dequeue):
    """Dequeue a batch of data from the TFRecord.
    Args:
    filename_queue: Filename Queue of the TFRecord.
    batch_size: How many records dequeued each time.
    capacity: The capacity of the queue.
    min_after_dequeue: Ensures a minimum amount of shuffling of examples.
    Returns:
     List of the dequeued (batch_label, batch_ids, batch_values).
    """
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    batch_serialized_example = tf.train.shuffle_batch([serialized_example], 
        batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue)
    # The feature definition here should BE consistent with LibSVM TO TFRecord process.
    features = tf.parse_example(batch_serialized_example,
                                       features={
                                           "label": tf.FixedLenFeature([], tf.float32),
                                           "ids": tf.VarLenFeature(tf.int64),
                                           "values": tf.VarLenFeature(tf.float32)
                                       })
    batch_label = features["label"]
    batch_ids = features["ids"]
    batch_values = features["values"]
    return batch_label, batch_ids, batch_values
项目:u8m_test    作者:hxkk    | 项目源码 | 文件源码
def prepare_serialized_examples(self, serialized_examples):
    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:tf-text-workshop    作者:tf-dl-workshop    | 项目源码 | 文件源码
def read_and_decode_single_example(filenames, shuffle=False, num_epochs=None):
    # first construct a queue containing a list of filenames.
    # this lets a user split up there dataset in multiple files to keep size down
    # filename_queue = tf.train.string_input_producer([filename], num_epochs=10)
    filename_queue = tf.train.string_input_producer(filenames,
                                                    shuffle=shuffle, num_epochs=num_epochs)

    reader = tf.TFRecordReader()
    # One can read a single serialized example from a filename
    # serialized_example is a Tensor of type string.
    _, serialized_ex = reader.read(filename_queue)
    context, sequences = tf.parse_single_sequence_example(serialized_ex,
                                                          context_features={
                                                              "seq_length": tf.FixedLenFeature([], dtype=tf.int64)
                                                          },
                                                          sequence_features={
                                                              "seq_feature": tf.VarLenFeature(dtype=tf.int64),
                                                              "label": tf.VarLenFeature(dtype=tf.int64)
                                                          })
    return context, sequences
项目:youtube-8m    作者:Tsingularity    | 项目源码 | 文件源码
def prepare_serialized_examples(self, serialized_examples):
    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:youtube-8m    作者:Tsingularity    | 项目源码 | 文件源码
def prepare_serialized_examples(self, serialized_examples):
    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    ### Newly
    raw_labels = features["labels"]
    raw_coarse = tf.SparseTensor(indices = raw_labels.indices, values = tf.reshape(tf.gather(tf.constant(self.label_belongs, dtype = tf.int64), raw_labels.values), [-1]), dense_shape = raw_labels.dense_shape)
    coarse_labels = tf.sparse_to_indicator(raw_coarse, self.num_coarse_classes, name = 'coarse_transfer')
    coarse_labels.set_shape([None, self.num_coarse_classes])
    ###
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    # return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
    ### Newly
    return features["video_id"], concatenated_features, labels, coarse_labels, tf.ones([tf.shape(serialized_examples)[0]])
    ###
项目:youtube-8m    作者:Tsingularity    | 项目源码 | 文件源码
def prepare_serialized_examples(self, serialized_examples):
    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def prepare_reader(self, filename_queue, batch_size=1024):
    """Creates a single reader thread for pre-aggregated YouTube 8M Examples.

    Args:
      filename_queue: A tensorflow queue of filename locations.

    Returns:
      A tuple of video indexes, features, labels, and padding data.
    """
    reader = tf.TFRecordReader()
    _, serialized_examples = reader.read_up_to(filename_queue, batch_size)

    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def prepare_reader(self, filename_queue, batch_size=1024):
    """Creates a single reader thread for pre-aggregated YouTube 8M Examples.

    Args:
      filename_queue: A tensorflow queue of filename locations.

    Returns:
      A tuple of video indexes, features, labels, and padding data.
    """
    reader = tf.TFRecordReader()
    _, serialized_examples = reader.read_up_to(filename_queue, batch_size)

    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "predictions": tf.FixedLenFeature([self.num_classes], tf.float32),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]]), features["predictions"]
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def prepare_reader(self, filename_queue, batch_size=1024):
        """Creates a single reader thread for pre-aggregated YouTube 8M Examples.

        Args:
          filename_queue: A tensorflow queue of filename locations.

        Returns:
          A tuple of video indexes, features, labels, and padding data.
        """
        reader = tf.TFRecordReader()
        _, serialized_examples = reader.read_up_to(filename_queue, batch_size)

        # set the mapping from the fields to data types in the proto
        num_features = len(self.feature_names)
        assert num_features > 0, "self.feature_names is empty!"
        assert len(self.feature_names) == len(self.feature_sizes), \
            "length of feature_names (={}) != length of feature_sizes (={})".format( \
                len(self.feature_names), len(self.feature_sizes))

        feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                       "predictions": tf.FixedLenFeature([self.num_classes], tf.float32),
                       "labels": tf.VarLenFeature(tf.int64)}

        features = tf.parse_example(serialized_examples, features=feature_map)

        return features["predictions"]
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def prepare_writer(self, filename_queue, batch_size=1024):
    """Creates a single reader thread for pre-aggregated YouTube 8M Examples.

    Args:
      filename_queue: A tensorflow queue of filename locations.

    Returns:
      A tuple of video indexes, features, labels, and padding data.
    """
    reader = tf.TFRecordReader()
    _, serialized_examples = reader.read_up_to(filename_queue, batch_size)

    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def prepare_reader(self, filename_queue, batch_size=1024):
    """Creates a single reader thread for pre-aggregated YouTube 8M Examples.

    Args:
      filename_queue: A tensorflow queue of filename locations.

    Returns:
      A tuple of video indexes, features, labels, and padding data.
    """
    reader = tf.TFRecordReader()
    _, serialized_examples = reader.read_up_to(filename_queue, batch_size)

    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)
    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def prepare_reader(self, filename_queue):

    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)

    contexts, features = tf.parse_single_sequence_example(
        serialized_example,
        context_features={
            "video_id": tf.FixedLenFeature([], tf.string),
            "labels": tf.VarLenFeature(tf.int64)},
        sequence_features={
            "rgb": tf.FixedLenSequenceFeature([], dtype=tf.string),
            "audio": tf.FixedLenSequenceFeature([], dtype=tf.string),
        })

    # read ground truth labels
    labels = (tf.cast(
        tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
            validate_indices=False),
        tf.bool))

    rgbs, num_frames = self.get_video_matrix(features["rgb"], 1024, self.max_frames)
    audios, num_frames = self.get_video_matrix(features["audio"], 1024, self.max_frames)

    batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
    batch_rgbs = tf.expand_dims(rgbs, 0)
    batch_audios = tf.expand_dims(audios, 0)
    batch_labels = tf.expand_dims(labels, 0)
    batch_frames = tf.expand_dims(num_frames, 0)

    return batch_video_ids, batch_rgbs, batch_audios, batch_labels, batch_frames
项目:seq2seq    作者:google    | 项目源码 | 文件源码
def __init__(self, context_keys_to_features, sequence_keys_to_features,
               items_to_handlers):
    """Constructs the decoder.
    Args:
      keys_to_features: a dictionary from TF-Example keys to either
        tf.VarLenFeature or tf.FixedLenFeature instances. See tensorflow's
        parsing_ops.py.
      items_to_handlers: a dictionary from items (strings) to ItemHandler
        instances. Note that the ItemHandler's are provided the keys that they
        use to return the final item Tensors.
    """
    self._context_keys_to_features = context_keys_to_features
    self._sequence_keys_to_features = sequence_keys_to_features
    self._items_to_handlers = items_to_handlers
项目:DL2W    作者:gauravmm    | 项目源码 | 文件源码
def decode(filename_queue):
    # Create TFRecords reader
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)

    # Feature keys in TFRecords example
    features = tf.parse_single_example(serialized_example, features={
        'id': tf.FixedLenFeature([], tf.string),
        'vector': tf.FixedLenFeature([], tf.string),
        'label': tf.VarLenFeature(tf.int64)
    })

    video_id = features['id']

    # Decode vector and pad to fixed size
    vector = tf.decode_raw(features['vector'], tf.float32)
    vector = tf.reshape(vector, [-1, 300])
    vector = tf.pad(vector, [[0, 40 - tf.shape(vector)[0]], [0, 0]])
    vector.set_shape([40, 300])

    # Get label index
    label = tf.sparse_to_indicator(features['label'], 4716)
    label.set_shape([4716])
    label = tf.cast(label, tf.float32)

    return video_id, vector, label

# Creates input pipeline for tensorflow networks
项目:cnn_lstm_ctc_ocr    作者:weinman    | 项目源码 | 文件源码
def _read_word_record(data_queue):

    reader = tf.TFRecordReader() # Construct a general reader
    key, example_serialized = reader.read(data_queue) 

    feature_map = {
        'image/encoded':  tf.FixedLenFeature( [], dtype=tf.string, 
                                              default_value='' ),
        'image/labels':   tf.VarLenFeature( dtype=tf.int64 ), 
        'image/width':    tf.FixedLenFeature( [1], dtype=tf.int64,
                                              default_value=1 ),
        'image/filename': tf.FixedLenFeature([], dtype=tf.string,
                                             default_value='' ),
        'text/string':     tf.FixedLenFeature([], dtype=tf.string,
                                             default_value='' ),
        'text/length':    tf.FixedLenFeature( [1], dtype=tf.int64,
                                              default_value=1 )
    }
    features = tf.parse_single_example( example_serialized, feature_map )

    image = tf.image.decode_jpeg( features['image/encoded'], channels=1 ) #gray
    width = tf.cast( features['image/width'], tf.int32) # for ctc_loss
    label = tf.serialize_sparse( features['image/labels'] ) # for batching
    length = features['text/length']
    text = features['text/string']
    filename = features['image/filename']
    return image,width,label,length,text,filename
项目:handwritten-sequence-tensorflow    作者:johnsmithm    | 项目源码 | 文件源码
def read_and_decode_single_example(self,filename,test=False):
    with tf.name_scope('TFRecordReader'):
        # first construct a queue containing a list of filenames.
        # this lets a user split up there dataset in multiple files to keep
        # size down
        files = [filename] if self.filenameNr==1 or test else [filename.format(i) for i in range(self.filenameNr)]
        filename_queue = tf.train.string_input_producer(files,
                                                        num_epochs=None)
        # Unlike the TFRecordWriter, the TFRecordReader is symbolic
        reader = tf.TFRecordReader()
        # One can read a single serialized example from a filename
        # serialized_example is a Tensor of type string.
        _, serialized_example = reader.read(filename_queue)
        # The serialized example is converted back to actual values.
        # One needs to describe the format of the objects to be returned
        features = tf.parse_single_example(
            serialized_example,
            features={
                # We know the length of both fields. If not the
                # tf.VarLenFeature could be used
                'seq_len': tf.FixedLenFeature([1], tf.int64),
                'target': tf.VarLenFeature(tf.int64),     
                'imageInput': tf.FixedLenFeature([self.height*self.width], tf.float32)
            })
        # now return the converted data
        imageInput = features['imageInput']
        seq_len     = features['seq_len']
        target     = features['target']
    return imageInput, seq_len , target
项目:seglink    作者:bgshih    | 项目源码 | 文件源码
def input_stream(record_path, scope=None):
  """
  Input data stream
  ARGS
    `record_path`: tf records file path
  RETURN
    `streams`: data streams
  """
  with tf.device('/cpu:0'):
    with tf.variable_scope(scope or 'input_stream'):
      reader = tf.TFRecordReader()
      filename_queue = tf.train.string_input_producer([record_path], None)
      _, record_value = reader.read(filename_queue)
      features = tf.parse_single_example(record_value,
        {
          'image_jpeg': tf.FixedLenFeature([], tf.string),
          'image_name': tf.FixedLenFeature([], tf.string),
          'word_polygons': tf.VarLenFeature(tf.float32),
          # 'words': tf.VarLenFeature(tf.string) // FIXME: problem with parsing words
        })
      # decode jpeg image
      image = tf.cast(tf.image.decode_jpeg(features['image_jpeg'], channels=3), tf.float32)

      # extract bounding polygons
      word_polygons = tf.sparse_tensor_to_dense(features['word_polygons'])
      word_polygons = tf.reshape(word_polygons, [-1, WORD_POLYGON_DIM])

      # extract words
      # words = tf.sparse_tensor_to_dense(features['words'])

      # output streams
      streams = {'image': image,
                 'image_name': features['image_name'],
                 'image_jpeg': features['image_jpeg'],
                 'word_polygons': word_polygons}
      return streams
项目:conv_seq2seq    作者:tobyyouup    | 项目源码 | 文件源码
def __init__(self, context_keys_to_features, sequence_keys_to_features,
               items_to_handlers):
    """Constructs the decoder.
    Args:
      keys_to_features: a dictionary from TF-Example keys to either
        tf.VarLenFeature or tf.FixedLenFeature instances. See tensorflow's
        parsing_ops.py.
      items_to_handlers: a dictionary from items (strings) to ItemHandler
        instances. Note that the ItemHandler's are provided the keys that they
        use to return the final item Tensors.
    """
    self._context_keys_to_features = context_keys_to_features
    self._sequence_keys_to_features = sequence_keys_to_features
    self._items_to_handlers = items_to_handlers
项目:transform    作者:tensorflow    | 项目源码 | 文件源码
def testMakeOutputDictError(self):
    schema = self.toSchema({'a': tf.VarLenFeature(tf.string)})

    # SparseTensor that cannot be represented as VarLenFeature.
    fetches = {
        'a': tf.SparseTensorValue(indices=np.array([(0, 2), (0, 4), (0, 8)]),
                                  values=np.array([10.0, 20.0, 30.0]),
                                  dense_shape=(1, 20))
    }
    with self.assertRaisesRegexp(
        ValueError, 'cannot be decoded by ListColumnRepresentation'):
      _ = impl_helper.make_output_dict(schema, fetches)

    # SparseTensor of invalid rank.
    fetches = {
        'a': tf.SparseTensorValue(
            indices=np.array([(0, 0, 1), (0, 0, 2), (0, 0, 3)]),
            values=np.array([10.0, 20.0, 30.0]),
            dense_shape=(1, 10, 10))
    }
    with self.assertRaisesRegexp(
        ValueError, 'cannot be decoded by ListColumnRepresentation'):
      _ = impl_helper.make_output_dict(schema, fetches)

    # SparseTensor with indices that are out of order.
    fetches = {
        'a': tf.SparseTensorValue(indices=np.array([(0, 2), (2, 4), (1, 8)]),
                                  values=np.array([10.0, 20.0, 30.0]),
                                  dense_shape=(3, 20))
    }
    with self.assertRaisesRegexp(
        ValueError, 'Encountered out-of-order sparse index'):
      _ = impl_helper.make_output_dict(schema, fetches)
项目:transform    作者:tensorflow    | 项目源码 | 文件源码
def testRunPreprocessingFn(self):
    schema = self.toSchema({
        'dense_1': tf.FixedLenFeature((), tf.float32),
        'dense_2': tf.FixedLenFeature((1, 2), tf.int64),
        'var_len': tf.VarLenFeature(tf.string),
        'sparse': tf.SparseFeature('ix', 'val', tf.float32, 100)
    })
    def preprocessing_fn(inputs):
      return {
          'dense_out': mappers.scale_to_0_1(inputs['dense_1']),
          'sparse_out': tf.sparse_reshape(inputs['sparse'], (1, 10)),
      }

    _, inputs, outputs = impl_helper.run_preprocessing_fn(
        preprocessing_fn, schema)

    # Verify that the input placeholders have the correct types.
    expected_dtype_and_shape = {
        'dense_1': (tf.float32, tf.TensorShape([None])),
        'dense_2': (tf.int64, tf.TensorShape([None, 1, 2])),
        'var_len': (tf.string, tf.TensorShape([None, None])),
        'sparse': (tf.float32, tf.TensorShape([None, None])),
        'dense_out': (tf.float32, tf.TensorShape([None])),
        'sparse_out': (tf.float32, tf.TensorShape([None, None])),
    }

    for key, tensor in itertools.chain(six.iteritems(inputs),
                                       six.iteritems(outputs)):
      dtype, shape = expected_dtype_and_shape[key]
      self.assertEqual(tensor.dtype, dtype)
      tensor.get_shape().assert_is_compatible_with(shape)
项目:transform    作者:tensorflow    | 项目源码 | 文件源码
def as_feature_spec(self):
    """Returns a representation of this ColumnSchema as a feature spec.

    A feature spec (for a specific column) is one of a FixedLenFeature,
    SparseFeature or VarLenFeature.

    Returns:
      A representation of this ColumnSchema as a feature spec.
    """
    return self.representation.as_feature_spec(self)
项目:transform    作者:tensorflow    | 项目源码 | 文件源码
def as_feature_spec(self, column):
    if column.domain.dtype not in _TF_EXAMPLE_ALLOWED_TYPES:
      raise ValueError('tf.Example parser supports only types {}, so it is '
                       'invalid to generate a feature_spec with type '
                       '{}.'.format(
                           _TF_EXAMPLE_ALLOWED_TYPES,
                           repr(column.domain.dtype)))
    return tf.VarLenFeature(column.domain.dtype)
项目:transform    作者:tensorflow    | 项目源码 | 文件源码
def from_feature_spec(feature_spec):
  """Convert a feature_spec to a Schema.

  Args:
    feature_spec: a features specification in the format expected by
        tf.parse_example(), i.e.
        `{name: FixedLenFeature(...), name: VarLenFeature(...), ...'

  Returns:
    A Schema representing the provided set of columns.
  """
  return Schema({
      key: _from_parse_feature(parse_feature)
      for key, parse_feature in six.iteritems(feature_spec)
  })
项目:transform    作者:tensorflow    | 项目源码 | 文件源码
def _from_parse_feature(parse_feature):
  """Convert a single feature spec to a ColumnSchema."""

  # FixedLenFeature
  if isinstance(parse_feature, tf.FixedLenFeature):
    representation = FixedColumnRepresentation(parse_feature.default_value)
    return ColumnSchema(parse_feature.dtype, parse_feature.shape,
                        representation)

  # FixedLenSequenceFeature
  if isinstance(parse_feature, tf.FixedLenSequenceFeature):
    raise ValueError('DatasetSchema does not support '
                     'FixedLenSequenceFeature yet.')

  # VarLenFeature
  if isinstance(parse_feature, tf.VarLenFeature):
    representation = ListColumnRepresentation()
    return ColumnSchema(parse_feature.dtype, [None], representation)

  # SparseFeature
  if isinstance(parse_feature, tf.SparseFeature):
    index_field = SparseIndexField(name=parse_feature.index_key,
                                   is_sorted=parse_feature.already_sorted)
    representation = SparseColumnRepresentation(
        value_field_name=parse_feature.value_key,
        index_fields=[index_field])
    return ColumnSchema(parse_feature.dtype, [parse_feature.size],
                        representation)

  raise ValueError('Cannot interpret feature spec {} with type {}'.format(
      parse_feature, type(parse_feature)))
项目:transform    作者:tensorflow    | 项目源码 | 文件源码
def infer_column_schema_from_tensor(tensor):
  """Infer a ColumnSchema from a tensor."""
  if isinstance(tensor, tf.SparseTensor):
    # For SparseTensor, there's insufficient information to distinguish between
    # ListColumnRepresentation and SparseColumnRepresentation. So we just guess
    # the former, and callers are expected to handle the latter case on their
    # own (e.g. by requiring the user to provide the schema). This is a policy
    # motivated by the prevalence of VarLenFeature in current tf.Learn code.
    axes = [Axis(None)]
    representation = ListColumnRepresentation()
  else:
    axes = _shape_to_axes(tensor.get_shape(),
                          remove_batch_dimension=True)
    representation = FixedColumnRepresentation()
  return ColumnSchema(tensor.dtype, axes, representation)
项目:LiTeFlow    作者:petrux    | 项目源码 | 文件源码
def _decode(message):
    features = {
        'key': tf.FixedLenFeature([], tf.int64),
        'vector': tf.VarLenFeature(tf.int64)
    }
    parsed = tf.parse_single_example(
        serialized=message,
        features=features)
    key = parsed['key']
    vector = tf.sparse_tensor_to_dense(parsed['vector'])
    return key, vector
项目:Y8M    作者:mpekalski    | 项目源码 | 文件源码
def frame_example_2_np(seq_example_bytes, 
                       max_quantized_value=2,
                       min_quantized_value=-2):
  feature_names=['rgb','audio']
  feature_sizes = [1024, 128]
  with tf.Graph().as_default():
    contexts, features = tf.parse_single_sequence_example(
        seq_example_bytes,
        context_features={"video_id": tf.FixedLenFeature(
            [], tf.string),
                          "labels": tf.VarLenFeature(tf.int64)},
        sequence_features={
            feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
            for feature_name in feature_names
        })

    decoded_features = { name: tf.reshape(
        tf.cast(tf.decode_raw(features[name], tf.uint8), tf.float32),
        [-1, size]) for name, size in zip(feature_names, feature_sizes)
        }
    feature_matrices = {
        name: utils.Dequantize(decoded_features[name],
          max_quantized_value, min_quantized_value) for name in feature_names}

    with tf.Session() as sess:
      vid = sess.run(contexts['video_id'])
      labs = sess.run(contexts['labels'].values)
      rgb = sess.run(feature_matrices['rgb'])
      audio = sess.run(feature_matrices['audio'])

  return vid, labs, rgb, audio


#%% Split frame level file into three video level files: all, 1st half, 2nd half.
项目:Y8M    作者:mpekalski    | 项目源码 | 文件源码
def build_graph():
    feature_names=['rgb','audio']
    feature_sizes = [1024, 128] 
    max_quantized_value=2
    min_quantized_value=-2

    seq_example_bytes = tf.placeholder(tf.string)
    contexts, features = tf.parse_single_sequence_example(
        seq_example_bytes,
        context_features={"video_id": tf.FixedLenFeature(
            [], tf.string),
                          "labels": tf.VarLenFeature(tf.int64)},
        sequence_features={
            feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
            for feature_name in feature_names
        })

    decoded_features = { name: tf.reshape(
        tf.cast(tf.decode_raw(features[name], tf.uint8), tf.float32),
        [-1, size]) for name, size in zip(feature_names, feature_sizes)
        }
    feature_matrices = {
        name: utils.Dequantize(decoded_features[name],
          max_quantized_value, min_quantized_value) for name in feature_names}

    tf.add_to_collection("vid_tsr", contexts['video_id'])
    tf.add_to_collection("labs_tsr", contexts['labels'].values)
    tf.add_to_collection("rgb_tsr", feature_matrices['rgb'])
    tf.add_to_collection("audio_tsr", feature_matrices['audio'])
    tf.add_to_collection("seq_example_bytes", seq_example_bytes)

#   with tf.Session() as sess:
#       writer = tf.summary.FileWriter('./graphs', sess.graph)
项目:XMUNMT    作者:XMUNLP    | 项目源码 | 文件源码
def input_pipeline(file_pattern, mode, capacity=64):
    keys_to_features = {
        "inputs": tf.VarLenFeature(tf.int64),
        "targets": tf.VarLenFeature(tf.int64)
    }

    items_to_handlers = {
        "inputs": tfexample_decoder.Tensor("inputs"),
        "targets": tfexample_decoder.Tensor("targets")
    }

    # Now the non-trivial case construction.
    with tf.name_scope("examples_queue"):
        training = (mode == "train")
        # Read serialized examples using slim parallel_reader.
        num_epochs = None if training else 1
        data_files = parallel_reader.get_data_files(file_pattern)
        num_readers = min(4 if training else 1, len(data_files))
        _, examples = parallel_reader.parallel_read([file_pattern],
                                                    tf.TFRecordReader,
                                                    num_epochs=num_epochs,
                                                    shuffle=training,
                                                    capacity=2 * capacity,
                                                    min_after_dequeue=capacity,
                                                    num_readers=num_readers)

        decoder = tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                     items_to_handlers)

        decoded = decoder.decode(examples, items=list(items_to_handlers))
        examples = {}

        for (field, tensor) in zip(keys_to_features, decoded):
            examples[field] = tensor

        # We do not want int64s as they do are not supported on GPUs.
        return {k: tf.to_int32(v) for (k, v) in six.iteritems(examples)}
项目:SSD_tensorflow_VOC    作者:LevinJ    | 项目源码 | 文件源码
def read_and_decode_single_example(filename_queue):

    # Unlike the TFRecordWriter, the TFRecordReader is symbolic
    reader = tf.TFRecordReader()
    # One can read a single serialized example from a filename
    # serialized_example is a Tensor of type string.
    _, serialized_example = reader.read(filename_queue)
    # The serialized example is converted back to actual values.
    # One needs to describe the format of the objects to be returned
    features = tf.parse_single_example(
        serialized_example,
        features={
            # We know the length of both fields. If not the
            # tf.VarLenFeature could be used
            'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
            'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
            'image/class/label': tf.FixedLenFeature(
                [], tf.int64, default_value=tf.zeros([], dtype=tf.int64))
        })
    # now return the converted data
    label = features['image/class/label']
    image = features['image/encoded']

#     image = tf.image.decode_jpeg(image, channels=3)
    image_format = features['image/format']



    return label, image, image_format
项目:tefla    作者:openAGI    | 项目源码 | 文件源码
def example_reading_spec(self):
        data_fields = {
            "inputs": tf.VarLenFeature(tf.int64),
            "targets": tf.VarLenFeature(tf.int64),
            "floats": tf.VarLenFeature(tf.float32),
        }
        data_items_to_decoders = None
        return (data_fields, data_items_to_decoders)
项目:tefla    作者:openAGI    | 项目源码 | 文件源码
def example_reading_spec(self):
        data_fields = {
            "inputs": tf.VarLenFeature(tf.int64),
            "targets": tf.VarLenFeature(tf.int64),
            "floats": tf.VarLenFeature(tf.float32),
        }
        data_items_to_decoders = None
        return (data_fields, data_items_to_decoders)
项目:tefla    作者:openAGI    | 项目源码 | 文件源码
def example_reading_spec(self):
        data_fields = {
            "inputs": tf.VarLenFeature(tf.int64),
            "targets": tf.VarLenFeature(tf.int64)
        }
        data_items_to_decoders = None
        return (data_fields, data_items_to_decoders)
项目:Youtube8mdataset_kagglechallenge    作者:jasonlee27    | 项目源码 | 文件源码
def prepare_reader(self,
                       filename_queue,
                       max_quantized_value=2,
                       min_quantized_value=-2):
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)
        context_features, sequence_features = {"video_id": tf.FixedLenFeature([], tf.string),
                                               "labels": tf.VarLenFeature(tf.int64)}, None
        if self.sequence_data:
            sequence_features = {self.feature_name[0]: tf.FixedLenSequenceFeature([], dtype=tf.string),
                                 self.feature_name[1]: tf.FixedLenSequenceFeature([], dtype=tf.string), }
        else:
            context_features[self.feature_name[0]] = tf.FixedLenFeature(self.feature_size[0], tf.float32)
            context_features[self.feature_name[1]] = tf.FixedLenFeature(self.feature_size[1], tf.float32)

        contexts, features = tf.parse_single_sequence_example(serialized_example,
                                                              context_features=context_features,
                                                              sequence_features=sequence_features)
        labels = (tf.cast(contexts["labels"].values, tf.int64))

        if self.sequence_data:
            decoded_features = tf.reshape(tf.cast(tf.decode_raw(features[self.feature_name[0]], tf.uint8), tf.float32),
                                          [-1, self.feature_size[0]])
            video_matrix = Dequantize(decoded_features, max_quantized_value, min_quantized_value)

            decoded_features = tf.reshape(tf.cast(tf.decode_raw(features[self.feature_name[1]], tf.uint8), tf.float32),
                                          [-1, self.feature_size[1]])
            audio_matrix = Dequantize(decoded_features, max_quantized_value, min_quantized_value)

            num_frames = tf.minimum(tf.shape(decoded_features)[0], self.max_frames)
        else:
            video_matrix = contexts[self.feature_name[0]]
            audio_matrix = contexts[self.feature_name[1]]
            num_frames = tf.constant(-1)

        # Pad or truncate to 'max_frames' frames.
        # video_matrix = resize_axis(video_matrix, 0, self.max_frames)
        return contexts["video_id"], video_matrix, audio_matrix, labels, num_frames
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def testFromCSVWithFeatureSpec(self):
    if not HAS_PANDAS:
      return
    num_batches = 100
    batch_size = 8

    data_path = _make_test_csv_sparse()
    feature_spec = {
        "int": tf.FixedLenFeature(None, dtypes.int16, np.nan),
        "float": tf.VarLenFeature(dtypes.float16),
        "bool": tf.VarLenFeature(dtypes.bool),
        "string": tf.FixedLenFeature(None, dtypes.string, "")
    }

    pandas_df = pd.read_csv(data_path, dtype={"string": object})
    # Pandas insanely uses NaN for empty cells in a string column.
    # And, we can't use Pandas replace() to fix them because nan != nan
    s = pandas_df["string"]
    for i in range(0, len(s)):
      if isinstance(s[i], float) and math.isnan(s[i]):
        pandas_df.set_value(i, "string", "")
    tensorflow_df = df.TensorFlowDataFrame.from_csv_with_feature_spec(
        [data_path],
        batch_size=batch_size,
        shuffle=False,
        feature_spec=feature_spec)

    # These columns were sparse; re-densify them for comparison
    tensorflow_df["float"] = densify.Densify(np.nan)(tensorflow_df["float"])
    tensorflow_df["bool"] = densify.Densify(np.nan)(tensorflow_df["bool"])

    self._assert_pandas_equals_tensorflow(pandas_df,
                                          tensorflow_df,
                                          num_batches=num_batches,
                                          batch_size=batch_size)
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def testFromCSVWithFeatureSpec(self):
    if not HAS_PANDAS:
      return
    num_batches = 100
    batch_size = 8

    data_path = _make_test_csv_sparse()
    feature_spec = {
        "int": tf.FixedLenFeature(None, dtypes.int16, np.nan),
        "float": tf.VarLenFeature(dtypes.float16),
        "bool": tf.VarLenFeature(dtypes.bool),
        "string": tf.FixedLenFeature(None, dtypes.string, "")
    }

    pandas_df = pd.read_csv(data_path, dtype={"string": object})
    # Pandas insanely uses NaN for empty cells in a string column.
    # And, we can't use Pandas replace() to fix them because nan != nan
    s = pandas_df["string"]
    for i in range(0, len(s)):
      if isinstance(s[i], float) and math.isnan(s[i]):
        pandas_df.set_value(i, "string", "")
    tensorflow_df = df.TensorFlowDataFrame.from_csv_with_feature_spec(
        [data_path],
        batch_size=batch_size,
        shuffle=False,
        feature_spec=feature_spec)

    # These columns were sparse; re-densify them for comparison
    tensorflow_df["float"] = densify.Densify(np.nan)(tensorflow_df["float"])
    tensorflow_df["bool"] = densify.Densify(np.nan)(tensorflow_df["bool"])

    self._assert_pandas_equals_tensorflow(pandas_df,
                                          tensorflow_df,
                                          num_batches=num_batches,
                                          batch_size=batch_size)