Python tensorflow.python.framework.dtypes 模块,string() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.framework.dtypes.string()

项目:lsdc    作者:febert    | 项目源码 | 文件源码
def next_key(self):
    """The key names of the next (in iteration) truncated unrolled examples.

    The format of the key is:

    ```python
    "%05d_of_%05d:%s" % (sequence + 1, sequence_count, original_key)
if `sequence + 1 < sequence_count`, otherwise:

```python
"STOP:%s" % original_key
```

where `original_key` is the unique key read in by the prefetcher.

Returns:
  A string vector of length `batch_size`, the keys.
"""
return self._state_saver._received_next_key

```

项目:lsdc    作者:febert    | 项目源码 | 文件源码
def _store_index_maps(self, sequences, context, states):
    """Prepares the internal dictionaries _name_to_index and _index_to_name.

    These dictionaries are used to keep track of indices into the barrier.

    Args:
      sequences: `OrderedDict` of string, `Tensor` pairs.
      context: `OrderedDict` of string, `Tensor` pairs.
      states: `OrderedDict` of string, `Tensor` pairs.
    """
    assert isinstance(sequences, dict)
    assert isinstance(context, dict)
    assert isinstance(states, dict)
    self._name_to_index = dict((name, ix) for (ix, name) in enumerate(
        ["__length", "__total_length", "__next_key",
         "__sequence", "__sequence_count"]
        + ["__sequence__%s" % k for k in sequences.keys()]
        + ["__context__%s" % k for k in context.keys()]
        + ["__state__%s" % k for k in states.keys()]))
    self._index_to_name = [
        name for (name, _) in sorted(
            self._name_to_index.items(), key=lambda n_ix: n_ix[1])]
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def _store_index_maps(self, sequences, context, states):
    """Prepares the internal dictionaries _name_to_index and _index_to_name.

    These dictionaries are used to keep track of indices into the barrier.

    Args:
      sequences: `OrderedDict` of string, `Tensor` pairs.
      context: `OrderedDict` of string, `Tensor` pairs.
      states: `OrderedDict` of string, `Tensor` pairs.
    """
    assert isinstance(sequences, dict)
    assert isinstance(context, dict)
    assert isinstance(states, dict)
    self._name_to_index = dict((name, ix) for (ix, name) in enumerate(
        ["__length", "__total_length", "__next_key",
         "__sequence", "__sequence_count"]
        + ["__sequence__%s" % k for k in sequences.keys()]
        + ["__context__%s" % k for k in context.keys()]
        + ["__state__%s" % k for k in states.keys()]))
    self._index_to_name = [
        name for (name, _) in sorted(
            self._name_to_index.items(), key=lambda n_ix: n_ix[1])]
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def _make_test_csv_sparse():
  f = tempfile.NamedTemporaryFile(
      dir=tf.test.get_temp_dir(), delete=False, mode="w")
  w = csv.writer(f)
  w.writerow(["int", "float", "bool", "string"])
  for _ in range(100):
    # leave columns empty; these will be read as default value (e.g. 0 or NaN)
    intvalue = np.random.randint(-10, 10) if np.random.rand() > 0.5 else ""
    floatvalue = np.random.rand() if np.random.rand() > 0.5 else ""
    boolvalue = int(np.random.rand() > 0.3) if np.random.rand() > 0.5 else ""
    stringvalue = (("S: %.4f" % np.random.rand())
                   if np.random.rand() > 0.5 else "")

    row = [intvalue, floatvalue, boolvalue, stringvalue]
    w.writerow(row)
  f.close()
  return f.name
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def __new__(cls,
              column_name,
              hash_bucket_size,
              combiner="sum",
              dtype=dtypes.string):

    if dtype != dtypes.string and not dtype.is_integer:
      raise ValueError("dtype must be string or integer. "
                       "dtype: {}, column_name: {}".format(dtype, column_name))

    return super(_SparseColumnHashed, cls).__new__(
        cls,
        column_name,
        bucket_size=hash_bucket_size,
        combiner=combiner,
        dtype=dtype)
项目:Tensorflow-SegNet    作者:tkuanlun350    | 项目源码 | 文件源码
def CamVidInputs(image_filenames, label_filenames, batch_size):

  images = ops.convert_to_tensor(image_filenames, dtype=dtypes.string)
  labels = ops.convert_to_tensor(label_filenames, dtype=dtypes.string)

  filename_queue = tf.train.slice_input_producer([images, labels], shuffle=True)

  image, label = CamVid_reader(filename_queue)
  reshaped_image = tf.cast(image, tf.float32)

  min_fraction_of_examples_in_queue = 0.4
  min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                           min_fraction_of_examples_in_queue)
  print ('Filling queue with %d CamVid images before starting to train. '
         'This will take a few minutes.' % min_queue_examples)

  # Generate a batch of images and labels by building up a queue of examples.
  return _generate_image_and_label_batch(reshaped_image, label,
                                         min_queue_examples, batch_size,
                                         shuffle=True)
项目:TF-SegNet    作者:mathildor    | 项目源码 | 文件源码
def dataset_reader(filename_queue): #prev name: CamVid_reader

    image_filename = filename_queue[0] #tensor of type string
    label_filename = filename_queue[1] #tensor of type string

    #get png encoded image
    imageValue = tf.read_file(image_filename)
    labelValue = tf.read_file(label_filename)

    #decodes a png image into a uint8 or uint16 tensor
    #returns a tensor of type dtype with shape [height, width, depth]
    image_bytes = tf.image.decode_png(imageValue)
    label_bytes = tf.image.decode_png(labelValue) #Labels are png, not jpeg

    image = tf.reshape(image_bytes, (FLAGS.image_h, FLAGS.image_w, FLAGS.image_c))
    label = tf.reshape(label_bytes, (FLAGS.image_h, FLAGS.image_w, 1))

    return image, label
项目:TF-SegNet    作者:mathildor    | 项目源码 | 文件源码
def dataset_inputs(image_filenames, label_filenames, batch_size, running_train_set=True):
  images = ops.convert_to_tensor(image_filenames, dtype=dtypes.string)
  labels = ops.convert_to_tensor(label_filenames, dtype=dtypes.string)


  filename_queue = tf.train.slice_input_producer([images, labels], shuffle=True)

  image, label = dataset_reader(filename_queue)
  reshaped_image = tf.cast(image, tf.float32)
  min_fraction_of_examples_in_queue = FLAGS.fraction_of_examples_in_queue
  min_queue_examples = int(FLAGS.num_examples_epoch_train *
                           min_fraction_of_examples_in_queue)

  print ('Filling queue with %d input images before starting to train. '
         'This may take some time.' % min_queue_examples)

  # Generate a batch of images and labels by building up a queue of examples.
  return _generate_image_and_label_batch(reshaped_image, label,
                                         min_queue_examples, batch_size,
                                         shuffle=True)
项目:polyaxon    作者:polyaxon    | 项目源码 | 文件源码
def decode_example(self, serialized_example, item_handler, image_format):
        """Decodes the given serialized example with the specified item handler.

        Args:
            serialized_example: a serialized TF example string.
            item_handler: the item handler used to decode the image.
            image_format: the image format being decoded.

        Returns:
            the decoded image found in the serialized Example.
        """
        serialized_example = array_ops.reshape(serialized_example, shape=[])
        decoder = TFExampleDecoder(
            keys_to_features={
                'image/encoded': tf.FixedLenFeature((), dtypes.string, default_value=''),
                'image/format': tf.FixedLenFeature((), dtypes.string, default_value=image_format),
            },
            items_to_handlers={'image': item_handler})
        [tf_image] = decoder.decode(serialized_example, ['image'])
        return tf_image
项目:polyaxon    作者:polyaxon    | 项目源码 | 文件源码
def test_decode_example_with_string_tensor(self):
        tensor_shape = (2, 3, 1)
        np_array = np.array([[['ab'], ['cd'], ['ef']], [['ghi'], ['jkl'], ['mnop']]])

        example = example_pb2.Example(features=feature_pb2.Features(feature={
            'labels': self._bytes_feature(np_array),
        }))

        serialized_example = example.SerializeToString()

        with self.test_session():
            serialized_example = array_ops.reshape(serialized_example, shape=[])
            keys_to_features = {
                'labels': parsing_ops.FixedLenFeature(
                    tensor_shape, dtypes.string,
                    default_value=constant_op.constant('', shape=tensor_shape, dtype=dtypes.string))
            }
            items_to_handlers = {'labels': tfexample_decoder.Tensor('labels')}
            decoder = TFExampleDecoder(keys_to_features, items_to_handlers)
            [tf_labels] = decoder.decode(serialized_example, ['labels'])
            labels = tf_labels.eval()

            labels = labels.astype(np_array.dtype)
            self.assertTrue(np.array_equal(np_array, labels))
项目:polyaxon    作者:polyaxon    | 项目源码 | 文件源码
def _create_tfrecord_dataset(tmpdir):
    if not gfile.Exists(tmpdir):
        gfile.MakeDirs(tmpdir)

    data_sources = test_utils.create_tfrecord_files(tmpdir, num_files=1)

    keys_to_features = {
        'image/encoded': tf.FixedLenFeature(shape=(), dtype=dtypes.string, default_value=''),
        'image/format': tf.FixedLenFeature(shape=(), dtype=dtypes.string, default_value='jpeg'),
        'image/class/label': tf.FixedLenFeature(
            shape=[1], dtype=dtypes.int64,
            default_value=array_ops.zeros([1], dtype=dtypes.int64))
    }

    items_to_handlers = {
        'image': tfslim.tfexample_decoder.Image(),
        'label': tfslim.tfexample_decoder.Tensor('image/class/label'),
    }

    decoder = TFExampleDecoder(keys_to_features, items_to_handlers)

    return Dataset(
        data_sources=data_sources, reader=tf.TFRecordReader, decoder=decoder, num_samples=100)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testMutableHashTableDuplicateInsert(self):
    with self.test_session():
      default_val = -1
      keys = constant_op.constant(["brain", "salad", "surgery", "brain"])
      values = constant_op.constant([0, 1, 2, 3], dtypes.int64)
      table = lookup.MutableHashTable(dtypes.string, dtypes.int64,
                                      default_val)
      self.assertAllEqual(0, table.size().eval())

      table.insert(keys, values).run()
      self.assertAllEqual(3, table.size().eval())

      input_string = constant_op.constant(["brain", "salad", "tank"])
      output = table.lookup(input_string)

      result = output.eval()
      self.assertAllEqual([3, 1, -1], result)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testMutableHashTableFindHighRank(self):
    with self.test_session():
      default_val = -1
      keys = constant_op.constant(["brain", "salad", "surgery"])
      values = constant_op.constant([0, 1, 2], dtypes.int64)
      table = lookup.MutableHashTable(dtypes.string, dtypes.int64,
                                      default_val)

      table.insert(keys, values).run()
      self.assertAllEqual(3, table.size().eval())

      input_string = constant_op.constant(
          [["brain", "salad"], ["tank", "tarkus"]])
      output = table.lookup(input_string)
      self.assertAllEqual([2, 2], output.get_shape())

      result = output.eval()
      self.assertAllEqual([[0, 1], [-1, -1]], result)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testMutableHashTableOfTensorsFindHighRank(self):
    with self.test_session():
      default_val = constant_op.constant([-1, -1, -1], dtypes.int64)
      keys = constant_op.constant(["brain", "salad", "surgery"])
      values = constant_op.constant([[0, 1, 2], [2, 3, 4], [4, 5, 6]],
                                    dtypes.int64)
      table = lookup.MutableHashTable(dtypes.string, dtypes.int64,
                                      default_val)

      table.insert(keys, values).run()
      self.assertAllEqual(3, table.size().eval())

      input_string = constant_op.constant(
          [["brain", "salad"], ["tank", "tarkus"]])
      output = table.lookup(input_string)
      self.assertAllEqual([2, 2, 3], output.get_shape())

      result = output.eval()
      self.assertAllEqual(
          [[[0, 1, 2], [2, 3, 4]], [[-1, -1, -1], [-1, -1, -1]]], result)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testMutableHashTableWithTensorDefault(self):
    with self.test_session():
      default_val = constant_op.constant(-1, dtypes.int64)
      keys = constant_op.constant(["brain", "salad", "surgery"])
      values = constant_op.constant([0, 1, 2], dtypes.int64)
      table = lookup.MutableHashTable(dtypes.string, dtypes.int64,
                                      default_val)

      table.insert(keys, values).run()
      self.assertAllEqual(3, table.size().eval())

      input_string = constant_op.constant(["brain", "salad", "tank"])
      output = table.lookup(input_string)

      result = output.eval()
      self.assertAllEqual([0, 1, -1], result)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testMutableHashTableStringFloat(self):
    with self.test_session():
      default_val = -1.5
      keys = constant_op.constant(["brain", "salad", "surgery"])
      values = constant_op.constant([0, 1.1, 2.2], dtypes.float32)
      table = lookup.MutableHashTable(dtypes.string, dtypes.float32,
                                      default_val)
      self.assertAllEqual(0, table.size().eval())

      table.insert(keys, values).run()
      self.assertAllEqual(3, table.size().eval())

      input_string = constant_op.constant(["brain", "salad", "tank"])
      output = table.lookup(input_string)

      result = output.eval()
      self.assertAllClose([0, 1.1, -1.5], result)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testMapStringToFloat(self):
    with self.test_session():
      keys = constant_op.constant(["a", "b", "c"], dtypes.string)
      values = constant_op.constant([0.0, 1.1, 2.2], dtypes.float32)
      default_value = constant_op.constant(-1.5, dtypes.float32)
      table = lookup.MutableDenseHashTable(
          dtypes.string,
          dtypes.float32,
          default_value=default_value,
          empty_key="")
      self.assertAllEqual(0, table.size().eval())

      table.insert(keys, values).run()
      self.assertAllEqual(3, table.size().eval())

      input_string = constant_op.constant(["a", "b", "d"], dtypes.string)
      output = table.lookup(input_string)
      self.assertAllEqual([3], output.get_shape())

      result = output.eval()
      self.assertAllClose([0, 1.1, -1.5], result)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testInitializeTable(self):
    vocabulary_file = self._createVocabFile("one_column_1.txt")

    with self.test_session():
      default_value = -1
      table = lookup.HashTable(
          lookup.TextFileInitializer(vocabulary_file, dtypes.string,
                                     lookup.TextFileIndex.WHOLE_LINE,
                                     dtypes.int64,
                                     lookup.TextFileIndex.LINE_NUMBER),
          default_value)
      table.init.run()

      input_string = constant_op.constant(["brain", "salad", "tank"])
      output = table.lookup(input_string)

      result = output.eval()
      self.assertAllEqual([0, 1, -1], result)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testInitializeIndexTable(self):
    vocabulary_file = self._createVocabFile("one_column_2.txt")

    with self.test_session():
      default_value = "UNK"
      key_index = lookup.TextFileIndex.LINE_NUMBER
      value_index = lookup.TextFileIndex.WHOLE_LINE
      table = lookup.HashTable(
          lookup.TextFileInitializer(vocabulary_file, dtypes.int64,
                                     key_index, dtypes.string, value_index),
          default_value)
      table.init.run()

      input_values = constant_op.constant([0, 1, 2, 3], dtypes.int64)
      output = table.lookup(input_values)

      result = output.eval()
      self.assertAllEqual([b"brain", b"salad", b"surgery", b"UNK"], result)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testMultiColumn(self):
    vocabulary_file = os.path.join(self.get_temp_dir(), "three_columns.txt")
    with open(vocabulary_file, "w") as f:
      f.write("\n".join(["0\tbrain\t1", "1\tsalad\t5", "2\tsurgery\t6"]) + "\n")

    with self.test_session():
      default_value = -1
      key_index = 1
      value_index = 2

      table = lookup.HashTable(
          lookup.TextFileInitializer(vocabulary_file, dtypes.string,
                                     key_index, dtypes.int64, value_index),
          default_value)
      table.init.run()

      input_string = constant_op.constant(["brain", "salad", "surgery"])
      output = table.lookup(input_string)

      result = output.eval()
      self.assertAllEqual([1, 5, 6], result)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def _make_test_csv_sparse():
  f = tempfile.NamedTemporaryFile(
      dir=test.get_temp_dir(), delete=False, mode="w")
  w = csv.writer(f)
  w.writerow(["int", "float", "bool", "string"])
  for _ in range(100):
    # leave columns empty; these will be read as default value (e.g. 0 or NaN)
    intvalue = np.random.randint(-10, 10) if np.random.rand() > 0.5 else ""
    floatvalue = np.random.rand() if np.random.rand() > 0.5 else ""
    boolvalue = int(np.random.rand() > 0.3) if np.random.rand() > 0.5 else ""
    stringvalue = (("S: %.4f" % np.random.rand()) if np.random.rand() > 0.5 else
                   "")

    row = [intvalue, floatvalue, boolvalue, stringvalue]
    w.writerow(row)
  f.close()
  return f.name
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testParse(self):
    parser = csv_parser.CSVParser(
        column_names=["col0", "col1", "col2"], default_values=["", "", 1.4])
    csv_lines = ["one,two,2.5", "four,five,6.0"]
    csv_input = constant_op.constant(
        csv_lines, dtype=dtypes.string, shape=[len(csv_lines)])
    csv_column = mocks.MockSeries("csv", csv_input)
    expected_output = [
        np.array([b"one", b"four"]), np.array([b"two", b"five"]),
        np.array([2.5, 6.0])
    ]
    output_columns = parser(csv_column)
    self.assertEqual(3, len(output_columns))
    cache = {}
    output_tensors = [o.build(cache) for o in output_columns]
    self.assertEqual(3, len(output_tensors))
    with self.test_session() as sess:
      output = sess.run(output_tensors)
      for expected, actual in zip(expected_output, output):
        np.testing.assert_array_equal(actual, expected)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def make_parsing_export_strategy(feature_columns, exports_to_keep=5):
  """Create an ExportStrategy for use with Experiment, using `FeatureColumn`s.

  Creates a SavedModel export that expects to be fed with a single string
  Tensor containing serialized tf.Examples.  At serving time, incoming
  tf.Examples will be parsed according to the provided `FeatureColumn`s.

  Args:
    feature_columns: An iterable of `FeatureColumn`s representing the features
      that must be provided at serving time (excluding labels!).
    exports_to_keep: Number of exports to keep.  Older exports will be
      garbage-collected.  Defaults to 5.  Set to None to disable garbage
      collection.

  Returns:
    An ExportStrategy that can be passed to the Experiment constructor.
  """
  feature_spec = feature_column.create_feature_spec_for_parsing(feature_columns)
  serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
  return make_export_strategy(serving_input_fn, exports_to_keep=exports_to_keep)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testExportMonitorInputFeatureKeyNoFeatures(self):
    random.seed(42)
    input_feature_key = 'my_example_key'

    def _serving_input_fn():
      return {
          input_feature_key:
              array_ops.placeholder(
                  dtype=dtypes.string, shape=(1,))
      }, None

    monitor = learn.monitors.ExportMonitor(
        every_n_steps=1,
        export_dir=tempfile.mkdtemp() + 'export/',
        input_fn=_serving_input_fn,
        input_feature_key=input_feature_key,
        exports_to_keep=2,
        signature_fn=export.generic_signature_fn)
    regressor = learn.LinearRegressor(feature_columns=[_X_COLUMN])
    with self.assertRaisesRegexp(KeyError, _X_KEY):
      regressor.fit(input_fn=_training_input_fn, steps=10, monitors=[monitor])
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testExportMonitorInputFeature(self):
    random.seed(42)
    input_feature_key = 'my_example_key'

    def _serving_input_fn():
      return {
          input_feature_key:
              array_ops.placeholder(
                  dtype=dtypes.string, shape=(1,)),
          _X_KEY:
              random_ops.random_uniform(
                  shape=(1,), minval=0.0, maxval=1000.0)
      }, None

    export_dir = tempfile.mkdtemp() + 'export/'
    monitor = learn.monitors.ExportMonitor(
        every_n_steps=1,
        export_dir=export_dir,
        input_fn=_serving_input_fn,
        input_feature_key=input_feature_key,
        exports_to_keep=2,
        signature_fn=export.generic_signature_fn)
    regressor = learn.LinearRegressor(feature_columns=[_X_COLUMN])
    regressor.fit(input_fn=_training_input_fn, steps=10, monitors=[monitor])
    self._assert_export(monitor, export_dir, 'generic_signature')
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def test_dense(self):
    """Tests only dense inputs.
    """
    op = sparse_feature_cross_op.sparse_feature_cross([
        constant_op.constant([['batch1-FC1-F1', 'batch1-FC1-F2'],
                              ['batch2-FC1-F1', 'batch2-FC1-F2']],
                             dtypes.string),
        constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'],
                              ['batch2-FC2-F1', 'batch2-FC2-F2']],
                             dtypes.string),
    ])
    expected_out = self._sparse_tensor([[
        'batch1-FC1-F1_X_batch1-FC2-F1', 'batch1-FC1-F1_X_batch1-FC2-F2',
        'batch1-FC1-F2_X_batch1-FC2-F1', 'batch1-FC1-F2_X_batch1-FC2-F2'
    ], [
        'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2',
        'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
    ]])
    with self.test_session() as sess:
      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def test_integer_mixed_string_dense(self):
    """Tests mixed dense inputs.
    """
    op = sparse_feature_cross_op.sparse_feature_cross([
        constant_op.constant([[11, 333], [55555, 999999]], dtypes.int64),
        constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'],
                              ['batch2-FC2-F1', 'batch2-FC2-F2']],
                             dtypes.string),
    ])
    expected_out = self._sparse_tensor([[
        '11_X_batch1-FC2-F1', '11_X_batch1-FC2-F2', '333_X_batch1-FC2-F1',
        '333_X_batch1-FC2-F2'
    ], [
        '55555_X_batch2-FC2-F1', '55555_X_batch2-FC2-F2',
        '999999_X_batch2-FC2-F1', '999999_X_batch2-FC2-F2'
    ]])
    with self.test_session() as sess:
      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def __new__(cls,
              column_name,
              hash_bucket_size,
              combiner="sum",
              dtype=dtypes.string):

    if dtype != dtypes.string and not dtype.is_integer:
      raise ValueError("dtype must be string or integer. "
                       "dtype: {}, column_name: {}".format(dtype, column_name))

    return super(_SparseColumnHashed, cls).__new__(
        cls,
        column_name,
        bucket_size=hash_bucket_size,
        combiner=combiner,
        dtype=dtype)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def __new__(cls,
              column_name,
              vocabulary_file,
              num_oov_buckets=0,
              vocab_size=None,
              default_value=-1,
              combiner="sum",
              dtype=dtypes.string):

    if dtype != dtypes.string and not dtype.is_integer:
      raise ValueError("dtype must be string or integer. "
                       "dtype: {}, column_name: {}".format(dtype, column_name))

    return super(_SparseColumnVocabulary, cls).__new__(
        cls,
        column_name,
        combiner=combiner,
        lookup_config=_SparseIdLookupConfig(
            vocabulary_file=vocabulary_file,
            num_oov_buckets=num_oov_buckets,
            vocab_size=vocab_size,
            default_value=default_value),
        dtype=dtype)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testWeightedSparseColumnDtypes(self):
    ids = fc.sparse_column_with_keys("ids", ["marlo", "omar", "stringer"])
    weighted_ids = fc.weighted_sparse_column(ids, "weights")
    self.assertDictEqual({
        "ids": parsing_ops.VarLenFeature(dtypes.string),
        "weights": parsing_ops.VarLenFeature(dtypes.float32)
    }, weighted_ids.config)

    weighted_ids = fc.weighted_sparse_column(ids, "weights", dtype=dtypes.int32)
    self.assertDictEqual({
        "ids": parsing_ops.VarLenFeature(dtypes.string),
        "weights": parsing_ops.VarLenFeature(dtypes.int32)
    }, weighted_ids.config)

    with self.assertRaisesRegexp(ValueError,
                                 "dtype is not convertible to float"):
      weighted_ids = fc.weighted_sparse_column(
          ids, "weights", dtype=dtypes.string)
项目:taskcv-2017-public    作者:VisionLearningGroup    | 项目源码 | 文件源码
def tf_ops(self, capacity=32):
        images = ops.convert_to_tensor(self._image_fn_list, dtype=dtypes.string)
        labels = ops.convert_to_tensor(self._label_list, dtype=dtypes.int32)

        # Makes an input queue
        im_fn_q, labl_q = tf.train.slice_input_producer(
            [images, labels], capacity=capacity, shuffle=True)

        file_contents_q = tf.read_file(im_fn_q)
        im_q = self._decoder(file_contents_q, channels=3)

        return im_q, labl_q
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def _shard_indices(self, keys):
    if self._key_dtype == dtypes.string:
      indices = string_ops.string_to_hash_bucket_fast(keys, self._num_shards)
    else:
      indices = math_ops.mod(keys, self._num_shards)
    return math_ops.cast(indices, dtypes.int32)
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def __init__(self, length, key, sequences, context):
    length = ops.convert_to_tensor(length, name="length")
    key = ops.convert_to_tensor(key, name="key")
    if not isinstance(sequences, dict):
      raise TypeError("sequences must be a dict")
    if not isinstance(context, dict):
      raise TypeError("context must be a dict")
    if not sequences:
      raise ValueError("must have at least one sequence tensor")
    for k in sequences.keys():
      if not isinstance(k, six.string_types):
        raise TypeError("sequence key must be string: %s" % k)
      if ":" in k:
        raise ValueError("sequence key may not have a colon: '%s'" % k)
    for k in context.keys():
      if not isinstance(k, six.string_types):
        raise TypeError("context key must be string: %s" % k)
      if ":" in k:
        raise ValueError("context key may not have a colon: '%s'" % k)
    sequences = dict(
        (k, ops.convert_to_tensor(v, name="sequence_%s" % k))
        for k, v in sequences.items())
    context = dict(
        (k, ops.convert_to_tensor(v, name="context_%s" % k))
        for k, v in context.items())
    self._length = length
    self._key = key
    self._sequences = sequences
    self._context = context
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def key(self):
    """The key names of the given truncated unrolled examples.

    The format of the key is:

    ```python
    "%05d_of_%05d:%s" % (sequence, sequence_count, original_key)
where `original_key` is the unique key read in by the prefetcher.

Returns:
  A string vector of length `batch_size`, the keys.
"""
return self._state_saver._received_keys

```

项目:lsdc    作者:febert    | 项目源码 | 文件源码
def _create_barrier(self):
    """Create the barrier.

    This method initializes the Barrier object with the right types and shapes.
    """
    # Create the barrier
    sequence_dtypes = [v.dtype for k, v in self._sorted_sequences.items()]
    context_dtypes = [v.dtype for k, v in self._sorted_context.items()]
    state_dtypes = [v.dtype for k, v in self._sorted_states.items()]
    types = ([dtypes.int32,   # length
              dtypes.int32,   # total_length
              dtypes.string,  # next_keys
              dtypes.int32,   # sequence
              dtypes.int32]   # expanded_sequence_count
             + sequence_dtypes + context_dtypes + state_dtypes)
    sequence_shapes = [
        [self._num_unroll] + self._sorted_sequences[k].get_shape().as_list()[1:]
        for k in self._sorted_sequences.keys()]
    context_shapes = [
        self._sorted_context[k].get_shape().as_list()
        for k in self._sorted_context.keys()]
    state_shapes = [
        self._sorted_states[k].get_shape().as_list()
        for k in self._sorted_states.keys()]
    shapes = ([(),  # length
               (),  # total_length
               (),  # next_keys
               (),  # sequence
               ()]  # expanded_sequence_count
              + sequence_shapes + context_shapes + state_shapes)

    self._barrier = data_flow_ops.Barrier(types=types, shapes=shapes)
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def initialize(self, table):
    """Initializes the table from a text file.

    Args:
      table: The table to be initialized.

    Returns:
      The operation that initializes the table.

    Raises:
      TypeError: when the keys and values data types do not match the table
      key and value data types.
    """
    # pylint: disable=protected-access
    table._check_table_dtypes(self.key_dtype, self.value_dtype)
    with ops.name_scope(self._name, "text_file_init", [table]) as scope:
      filename = ops.convert_to_tensor(self._filename,
                                       dtypes.string,
                                       name="asset_filepath")
      init_op = gen_data_flow_ops._initialize_table_from_text_file(
          table.table_ref,
          filename,
          self._key_index,
          self._value_index,
          -1 if self._vocab_size is None else self._vocab_size,
          self._delimiter,
          name=scope)
    # pylint: enable=protected-access
    ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, init_op)
    ops.add_to_collection(ops.GraphKeys.ASSET_FILEPATHS, filename)
    return init_op
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def _file_path_value(self, path_tensor):
    """Returns the filepath value stored in constant `path_tensor`."""
    if not isinstance(path_tensor, ops.Tensor):
      raise TypeError("tensor is not a Tensor")
    if path_tensor.op.type != "Const":
      raise TypeError("Only constants tensor are supported")
    if path_tensor.dtype != dtypes.string:
      raise TypeError("File paths should be string")
    str_value = path_tensor.op.get_attr("value").string_val
    if len(str_value) != 1:
      raise TypeError("Only scalar tensors are supported")
    return str_value[0]
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def _make_test_csv():
  f = tempfile.NamedTemporaryFile(
      dir=tf.test.get_temp_dir(), delete=False, mode="w")
  w = csv.writer(f)
  w.writerow(["int", "float", "bool", "string"])
  for _ in range(100):
    intvalue = np.random.randint(-10, 10)
    floatvalue = np.random.rand()
    boolvalue = int(np.random.rand() > 0.3)
    stringvalue = "S: %.4f" % np.random.rand()

    row = [intvalue, floatvalue, boolvalue, stringvalue]
    w.writerow(row)
  f.close()
  return f.name
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def testFromCSVWithFeatureSpec(self):
    if not HAS_PANDAS:
      return
    num_batches = 100
    batch_size = 8

    data_path = _make_test_csv_sparse()
    feature_spec = {
        "int": tf.FixedLenFeature(None, dtypes.int16, np.nan),
        "float": tf.VarLenFeature(dtypes.float16),
        "bool": tf.VarLenFeature(dtypes.bool),
        "string": tf.FixedLenFeature(None, dtypes.string, "")
    }

    pandas_df = pd.read_csv(data_path, dtype={"string": object})
    # Pandas insanely uses NaN for empty cells in a string column.
    # And, we can't use Pandas replace() to fix them because nan != nan
    s = pandas_df["string"]
    for i in range(0, len(s)):
      if isinstance(s[i], float) and math.isnan(s[i]):
        pandas_df.set_value(i, "string", "")
    tensorflow_df = df.TensorFlowDataFrame.from_csv_with_feature_spec(
        [data_path],
        batch_size=batch_size,
        shuffle=False,
        feature_spec=feature_spec)

    # These columns were sparse; re-densify them for comparison
    tensorflow_df["float"] = densify.Densify(np.nan)(tensorflow_df["float"])
    tensorflow_df["bool"] = densify.Densify(np.nan)(tensorflow_df["bool"])

    self._assert_pandas_equals_tensorflow(pandas_df,
                                          tensorflow_df,
                                          num_batches=num_batches,
                                          batch_size=batch_size)
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def _dtype_to_nan(dtype):
  if dtype is dtypes.string:
    return b""
  elif dtype.is_integer:
    return np.nan
  elif dtype.is_floating:
    return np.nan
  elif dtype is dtypes.bool:
    return np.nan
  else:
    raise ValueError("Can't parse type without NaN into sparse tensor: %s" %
                     dtype)
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def split(self, index_series, proportion, batch_size=None):
    """Deterministically split a `DataFrame` into two `DataFrame`s.

    Note this split is only as deterministic as the underlying hash function;
    see `tf.string_to_hash_bucket_fast`.  The hash function is deterministic
    for a given binary, but may change occasionally.  The only way to achieve
    an absolute guarantee that the split `DataFrame`s do not change across runs
    is to materialize them.

    Note too that the allocation of a row to one partition or the
    other is evaluated independently for each row, so the exact number of rows
    in each partition is binomially distributed.

    Args:
      index_series: a `Series` of unique strings, whose hash will determine the
        partitioning; or the name in this `DataFrame` of such a `Series`.
        (This `Series` must contain strings because TensorFlow provides hash
        ops only for strings, and there are no number-to-string converter ops.)
      proportion: The proportion of the rows to select for the 'left'
        partition; the remaining (1 - proportion) rows form the 'right'
        partition.
      batch_size: the batch size to use when rebatching the left and right
        `DataFrame`s.  If None (default), the `DataFrame`s are not rebatched;
        thus their batches will have variable sizes, according to which rows
        are selected from each batch of the original `DataFrame`.

    Returns:
      Two `DataFrame`s containing the partitioned rows.
    """
    if isinstance(index_series, str):
      index_series = self[index_series]
    left_mask, = split_mask.SplitMask(proportion)(index_series)
    right_mask = ~left_mask
    left_rows = self.select_rows(left_mask)
    right_rows = self.select_rows(right_mask)

    if batch_size:
      left_rows = left_rows.batch(batch_size=batch_size, shuffle=False)
      right_rows = right_rows.batch(batch_size=batch_size, shuffle=False)

    return left_rows, right_rows
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def key(self):
    """Returns a string which will be used as a key when we do sorting."""
    pass
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def insert_transformed_feature(self, columns_to_tensors):
    """Apply transformation and inserts it into columns_to_tensors.

    Args:
      columns_to_tensors: A mapping from feature columns to tensors. 'string'
        key means a base feature (not-transformed). It can have _FeatureColumn
        as a key too. That means that _FeatureColumn is already transformed.
    """
    raise NotImplementedError("Transform is not implemented for {}.".format(
        self))
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def key(self):
    """Returns a string which will be used as a key when we do sorting."""
    return "{}".format(self)
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def sparse_column_with_integerized_feature(column_name,
                                           bucket_size,
                                           combiner=None,
                                           dtype=dtypes.int64):
  """Creates an integerized _SparseColumn.

  Use this when your features are already pre-integerized into int64 IDs.
  output_id = input_feature

  Args:
    column_name: A string defining sparse column name.
    bucket_size: An int that is > 1. The number of buckets. It should be bigger
      than maximum feature. In other words features in this column should be an
      int64 in range [0, bucket_size)
    combiner: A string specifying how to reduce if the sparse column is
      multivalent. Currently "mean", "sqrtn" and "sum" are supported, with
      "sum" the default:
        * "sum": do not normalize features in the column
        * "mean": do l1 normalization on features in the column
        * "sqrtn": do l2 normalization on features in the column
      For more information: `tf.embedding_lookup_sparse`.
    dtype: Type of features. It should be an integer type. Default value is
      dtypes.int64.

  Returns:
    An integerized _SparseColumn definition.

  Raises:
    ValueError: bucket_size is not greater than 1.
    ValueError: dtype is not integer.
  """
  if combiner is None:
    logging.warn("The default value of combiner will change from \"sum\" "
                 "to \"sqrtn\" after 2016/11/01.")
    combiner = "sum"
  return _SparseColumnIntegerized(
      column_name, bucket_size, combiner=combiner, dtype=dtype)
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def sparse_column_with_hash_bucket(column_name,
                                   hash_bucket_size,
                                   combiner=None,
                                   dtype=dtypes.string):
  """Creates a _SparseColumn with hashed bucket configuration.

  Use this when your sparse features are in string or integer format, but you
  don't have a vocab file that maps each value to an integer ID.
  output_id = Hash(input_feature_string) % bucket_size

  Args:
    column_name: A string defining sparse column name.
    hash_bucket_size: An int that is > 1. The number of buckets.
    combiner: A string specifying how to reduce if the sparse column is
      multivalent. Currently "mean", "sqrtn" and "sum" are supported, with
      "sum" the default:
        * "sum": do not normalize features in the column
        * "mean": do l1 normalization on features in the column
        * "sqrtn": do l2 normalization on features in the column
      For more information: `tf.embedding_lookup_sparse`.
    dtype: The type of features. Only string and integer types are supported.

  Returns:
    A _SparseColumn with hashed bucket configuration

  Raises:
    ValueError: hash_bucket_size is not greater than 2.
    ValueError: dtype is neither string nor integer.
  """
  if combiner is None:
    logging.warn("The default value of combiner will change from \"sum\" "
                 "to \"sqrtn\" after 2016/11/01.")
    combiner = "sum"
  return _SparseColumnHashed(column_name, hash_bucket_size, combiner, dtype)
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def __new__(cls, column_name, keys, default_value=-1, combiner="sum"):
    return super(_SparseColumnKeys, cls).__new__(
        cls,
        column_name,
        combiner=combiner,
        lookup_config=_SparseIdLookupConfig(
            keys=keys, vocab_size=len(keys), default_value=default_value),
        dtype=dtypes.string)
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def sparse_column_with_keys(column_name, keys, default_value=-1,
                            combiner=None):
  """Creates a _SparseColumn with keys.

  Look up logic is as follows:
  lookup_id = index_of_feature_in_keys if feature in keys else default_value

  Args:
    column_name: A string defining sparse column name.
    keys: a string list defining vocabulary.
    default_value: The value to use for out-of-vocabulary feature values.
      Default is -1.
    combiner: A string specifying how to reduce if the sparse column is
      multivalent. Currently "mean", "sqrtn" and "sum" are supported, with
      "sum" the default:
        * "sum": do not normalize features in the column
        * "mean": do l1 normalization on features in the column
        * "sqrtn": do l2 normalization on features in the column
      For more information: `tf.embedding_lookup_sparse`.

  Returns:
    A _SparseColumnKeys with keys configuration.
  """
  if combiner is None:
    logging.warn("The default value of combiner will change from \"sum\" "
                 "to \"sqrtn\" after 2016/11/01.")
    combiner = "sum"
  return _SparseColumnKeys(
      column_name, tuple(keys), default_value=default_value, combiner=combiner)
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def key(self):
    """Returns a string which will be used as a key when we do sorting."""
    return "{}".format(self)
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def weighted_sparse_column(sparse_id_column,
                           weight_column_name,
                           dtype=dtypes.float32):
  """Creates a _SparseColumn by combining sparse_id_column with a weight column.

  Args:
    sparse_id_column: A `_SparseColumn` which is created by
      `sparse_column_with_*` functions.
    weight_column_name: A string defining a sparse column name which represents
      weight or value of the corresponding sparse id feature.
    dtype: Type of weights, such as `tf.float32`
  Returns:
    A _WeightedSparseColumn composed of two sparse features: one represents id,
    the other represents weight (value) of the id feature in that example.
  Raises:
    ValueError: if dtype is not convertible to float.

  An example usage:
    ```python
    words = sparse_column_with_hash_bucket("words", 1000)
    tfidf_weighted_words = weighted_sparse_column(words, "tfidf_score")
This configuration assumes that input dictionary of model contains the
following two items:
  * (key="words", value=word_tensor) where word_tensor is a SparseTensor.
  * (key="tfidf_score", value=tfidf_score_tensor) where tfidf_score_tensor
    is a SparseTensor.
 Following are assumed to be true:
   * word_tensor.indices = tfidf_score_tensor.indices
   * word_tensor.shape = tfidf_score_tensor.shape

""" if not (dtype.is_integer or dtype.is_floating): raise ValueError("dtype is not convertible to float. Given {}".format( dtype))

return _WeightedSparseColumn(sparse_id_column, weight_column_name, dtype) ```