我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.SparseTensor()。
def sparse_tuple_from(sequences, dtype=np.int32): r"""Creates a sparse representention of ``sequences``. Args: * sequences: a list of lists of type dtype where each element is a sequence Returns a tuple with (indices, values, shape) """ indices = [] values = [] for n, seq in enumerate(sequences): indices.extend(zip([n]*len(seq), range(len(seq)))) values.extend(seq) indices = np.asarray(indices, dtype=np.int64) values = np.asarray(values, dtype=dtype) shape = np.asarray([len(sequences), indices.max(0)[1]+1], dtype=np.int64) return tf.SparseTensor(indices=indices, values=values, shape=shape)
def variable(value, dtype=_FLOATX, name=None): '''Instantiates a tensor. # Arguments value: numpy array, initial value of the tensor. dtype: tensor type. name: optional name string for the tensor. # Returns Tensor variable instance. ''' if hasattr(value, 'tocoo'): sparse_coo = value.tocoo() indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), np.expand_dims(sparse_coo.col, 1)), 1) # SparseTensor doesn't need initialization v = tf.SparseTensor(indices=indices, values=sparse_coo.data, shape=sparse_coo.shape) v._dims = len(sparse_coo.shape) return v v = tf.Variable(value, dtype=_convert_string_dtype(dtype), name=name) return v
def testIsIterable(self): self.assertTrue(base_info._is_iterable((1, 2, 3))) self.assertTrue(base_info._is_iterable([1, 2, 3])) self.assertTrue(base_info._is_iterable({1: 1, 2: 2, 3: 3})) self.assertTrue(base_info._is_iterable( collections.OrderedDict([(1, 1), (2, 2)]))) self.assertTrue(base_info._is_iterable(DumbNamedTuple(1, 2))) tensor = tf.placeholder(dtype=tf.float32, shape=(1, 10,)) self.assertFalse(base_info._is_iterable(set([1, 2, 3]))) self.assertFalse(base_info._is_iterable(tensor)) sparse_tensor = tf.SparseTensor( indices=tf.placeholder(dtype=tf.int64, shape=(10, 2,)), values=tf.placeholder(dtype=tf.float32, shape=(10,)), dense_shape=tf.placeholder(dtype=tf.int64, shape=(2,))) self.assertFalse(base_info._is_iterable(sparse_tensor)) self.assertFalse(base_info._is_iterable(NotATensor())) self.assertFalse(base_info._is_iterable("foo")) def generator(): for count in xrange(3): self.assertFalse(False) yield count self.assertFalse(base_info._is_iterable(generator))
def testModuleInfo_sparsetensor(self): # pylint: disable=not-callable tf.reset_default_graph() dumb = DumbModule(name="dumb_a") sparse_tensor = tf.SparseTensor( indices=tf.placeholder(dtype=tf.int64, shape=(10, 2,)), values=tf.placeholder(dtype=tf.float32, shape=(10,)), dense_shape=tf.placeholder(dtype=tf.int64, shape=(2,))) dumb(sparse_tensor) def check(): sonnet_collection = tf.get_default_graph().get_collection( base_info.SONNET_COLLECTION_NAME) connected_subgraph = sonnet_collection[0].connected_subgraphs[0] self.assertIsInstance( connected_subgraph.inputs["inputs"], tf.SparseTensor) self.assertIsInstance(connected_subgraph.outputs, tf.SparseTensor) check() _copy_default_graph() check()
def _to_proto_sparse_tensor(sparse_tensor, nested_proto, process_leafs, already_processed): """Serializes a `tf.SparseTensor` into `nested_proto`. Args: sparse_tensor: An instance of `tf.SparseTensor`. nested_proto: A `module_pb2.NestedData` instance to be filled from `sparse_tensor`. process_leafs: A function to be applied to the leaf valued of the nested structure. already_processed: Set of already processed objects (used to avoid infinite recursion). """ already_processed.add(id(sparse_tensor)) nested_proto.named_tuple.name = _SPARSE_TENSOR_NAME for str_key in _SPARSE_TENSOR_FIELD: tensor = getattr(sparse_tensor, str_key) nested_proto.named_tuple.map[str_key].value = process_leafs(tensor)
def combine_analyzer(x, output_dtype, output_shape, combiner_spec, name): """Applies the combiner over the whole dataset. Args: x: An input `Tensor` or `SparseTensor`. output_dtype: The dtype of the output of the analyzer. output_shape: The shape of the output of the analyzer. combiner_spec: A subclass of CombinerSpec. name: Similar to a TF op name. Used to define a unique scope for this analyzer, which can be used for debugging info. Returns: The combined values, which is a `Tensor` with type output_dtype and shape `output_shape`. These must be compatible with the combiner_spec. """ return Analyzer([x], [(output_dtype, output_shape, False)], combiner_spec, name).outputs[0]
def testSplitTFIDF(self): tfidfs = tf.SparseTensor( [[0, 0], [0, 1], [2, 1], [2, 2]], [0.23104906, 0.19178806, 0.14384104, 0.34657359], [3, 4]) out_index, out_weight = mappers._split_tfidfs_to_outputs(tfidfs) self.assertSparseOutput( expected_indices=[[0, 0], [0, 1], [2, 0], [2, 1]], expected_values=[0, 1, 1, 2], expected_shape=[3, 2], actual_sparse_tensor=out_index, close_values=False) self.assertSparseOutput( expected_indices=[[0, 0], [0, 1], [2, 0], [2, 1]], expected_values=[0.23104906, 0.19178806, 0.14384104, 0.34657359], expected_shape=[3, 2], actual_sparse_tensor=out_weight, close_values=True)
def input_fn(df): """Input builder function.""" # Creates a dictionary mapping from each continuous feature column name (k) to # the values of that column stored in a constant Tensor. continuous_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS} # Creates a dictionary mapping from each categorical feature column name (k) # to the values of that column stored in a tf.SparseTensor. categorical_cols = { k: tf.SparseTensor( indices=[[i, 0] for i in range(df[k].size)], values=df[k].values, dense_shape=[df[k].size, 1]) for k in CATEGORICAL_COLUMNS} # Merges the two dictionaries into one. feature_cols = dict(continuous_cols) feature_cols.update(categorical_cols) label = tf.constant(df[LABEL_COLUMN].values) # Returns the feature columns and the label. return feature_cols, label
def SimpleSparseTensorFrom(x): """Create a very simple SparseTensor with dimensions (batch, time). Args: x: a list of lists of type int Returns: x_ix and x_val, the indices and values of the SparseTensor<2>. """ x_ix = [] x_val = [] for batch_i, batch in enumerate(x): for time, val in enumerate(batch): x_ix.append([batch_i, time]) x_val.append(val) x_shape = [len(x), np.asarray(x_ix).max(0)[1]+1] x_ix = tf.constant(x_ix, tf.int64) x_val = tf.constant(x_val, tf.int32) x_shape = tf.constant(x_shape, tf.int64) return tf.SparseTensor(x_ix, x_val, x_shape)
def is_sparse(tensor): """Returns whether a tensor is a sparse tensor. # Arguments tensor: A tensor instance. # Returns A boolean. # Example ```python >>> from keras import backend as K >>> a = K.placeholder((2, 2), sparse=False) >>> print(K.is_sparse(a)) False >>> b = K.placeholder((2, 2), sparse=True) >>> print(K.is_sparse(b)) True
""" return isinstance(tensor, tf.SparseTensor)
```
def compute_ler(self, labels_true, labels_pred): """Operation for computing LER (Label Error Rate). Args: labels_true: A SparseTensor of target labels labels_pred: A SparseTensor of predicted labels Returns: ler_op: operation for computing LER """ # Compute LER (normalize by label length) ler_op = tf.reduce_mean(tf.edit_distance( labels_pred, labels_true, normalize=True)) # TODO: consider variable lengths # Add a scalar summary for the snapshot of LER # with tf.name_scope("ler"): # self.summaries_train.append(tf.summary.scalar( # 'ler_train', ler_op)) # self.summaries_dev.append(tf.summary.scalar( # 'ler_dev', ler_op)) # TODO: feed_dict???????????????? return ler_op
def create_placeholders(self): """Create placeholders and append them to list.""" self.inputs_pl_list.append( tf.placeholder(tf.float32, shape=[None, None, self.input_size], name='input')) self.labels_pl_list.append( tf.SparseTensor(tf.placeholder(tf.int64, name='indices'), tf.placeholder(tf.int32, name='values'), tf.placeholder(tf.int64, name='shape'))) self.labels_sub_pl_list.append( tf.SparseTensor(tf.placeholder(tf.int64, name='indices_sub'), tf.placeholder(tf.int32, name='values_sub'), tf.placeholder(tf.int64, name='shape_sub'))) self.inputs_seq_len_pl_list.append( tf.placeholder(tf.int32, shape=[None], name='inputs_seq_len')) self.keep_prob_pl_list.append( tf.placeholder(tf.float32, name='keep_prob'))
def compute_ler(self, decode_op, labels): """Operation for computing LER (Label Error Rate). Args: decode_op: operation for decoding labels: A SparseTensor of target labels Return: ler_op: operation for computing LER """ # Compute LER (normalize by label length) ler_op = tf.reduce_mean(tf.edit_distance( decode_op, labels, normalize=True)) # Add a scalar summary for the snapshot of LER self.summaries_train.append(tf.summary.scalar('ler_train', ler_op)) self.summaries_dev.append(tf.summary.scalar('ler_dev', ler_op)) return ler_op
def compute_edit_distance(session, labels_true_st, labels_pred_st): """Compute edit distance per mini-batch. Args: session: labels_true_st: A `SparseTensor` of ground truth labels_pred_st: A `SparseTensor` of prediction Returns: edit_distances: list of edit distance of each uttearance """ indices, values, dense_shape = labels_true_st labels_pred_pl = tf.SparseTensor(indices, values, dense_shape) indices, values, dense_shape = labels_pred_st labels_true_pl = tf.SparseTensor(indices, values, dense_shape) edit_op = tf.edit_distance(labels_pred_pl, labels_true_pl, normalize=True) edit_distances = session.run(edit_op) return edit_distances
def loss_func_softmax(pred, gold): """softmax function with integers as the second argument (instead of zero-one encoding matrix) Args: pred: log-odds where the last dimension is the number of labels gold: integer array the same size as pred but the last dimension which is 1 Returns: the softmax values applied to the predictions """ pred = tf.reshape(pred, [-1, pred.get_shape()[-1].value]) gold = tf.reshape(gold, [pred.get_shape()[0].value]) n = pred.get_shape()[0].value voc_size = pred.get_shape()[1].value rg = tf.range(0, n) inds = tf.transpose(tf.pack([rg, tf.cast(gold, 'int32')])) vals = tf.ones([n]) # gold_mat = tf.SparseTensor( , [n, voc_size]) gold_mat = tf.sparse_to_dense(inds, [n, voc_size], vals) return tf.nn.softmax_cross_entropy_with_logits(pred, gold_mat)
def testTrainingConstructionClassificationSparse(self): input_data = tf.SparseTensor( indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]], values=[-1.0, 0.0, -1., 2., 1., -2.0], shape=[4, 10]) input_labels = [0, 1, 2, 3] params = tensor_forest.ForestHParams( num_classes=4, num_features=10, num_trees=10, max_nodes=1000, split_after_samples=25).fill() graph_builder = tensor_forest.RandomForestGraphs(params) graph = graph_builder.training_graph(input_data, input_labels) self.assertTrue(isinstance(graph, tf.Operation))
def testInferenceConstructionSparse(self): input_data = tf.SparseTensor( indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]], values=[-1.0, 0.0, -1., 2., 1., -2.0], shape=[4, 10]) params = tensor_forest.ForestHParams( num_classes=4, num_features=10, num_trees=10, max_nodes=1000, split_after_samples=25).fill() graph_builder = tensor_forest.RandomForestGraphs(params) graph = graph_builder.inference_graph(input_data) self.assertTrue(isinstance(graph, tf.Tensor))
def extract_dense_weights(sess): for key in dense_layers.keys(): layer = dense_layers[key] # sparse kernel dense_kernel = layer.kernel dense_kernel_shape = dense_kernel.get_shape().as_list() # dense_kernel = tf.reshape(dense_kernel, [dense_kernel_shape[0] * dense_kernel_shape[1] * dense_kernel_shape[2], # dense_kernel_shape[3]]) # dense_kernel = tf.transpose(dense_kernel) idx = tf.where(tf.not_equal(dense_kernel, 0)) sparse_kernel = tf.SparseTensor(idx, tf.gather_nd(dense_kernel, idx), dense_kernel.get_shape()) if layer.bias is not None: dk, k, b = sess.run([dense_kernel, sparse_kernel, layer.bias]) else: dk, k = sess.run([dense_kernel, sparse_kernel]) b = None dense_weights['%s/%s' % (key, 'kernel_dense')] = dk dense_weights['%s/%s' % (key, 'kernel')] = k dense_weights['%s/%s' % (key, 'kernel_shape')] = dense_kernel_shape dense_weights['%s/%s' % (key, 'bias')] = b
def input_fn(df): """Input builder function.""" # Creates a dictionary mapping from each continuous feature column name (k) to # the values of that column stored in a constant Tensor. continuous_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS} # Creates a dictionary mapping from each categorical feature column name (k) # to the values of that column stored in a tf.SparseTensor. categorical_cols = { k: tf.SparseTensor( indices=[[i, 0] for i in range(df[k].size)], values=df[k].values, dense_shape=[df[k].size, 1]) for k in CATEGORICAL_COLUMNS} # Merges the two dictionaries into one. feature_cols = dict(continuous_cols) feature_cols.update(categorical_cols) # Converts the label column into a constant Tensor. label = tf.constant(df[LABEL_COLUMN].values) # Returns the feature columns and the label. return feature_cols, label
def tensors_to_item(self, keys_to_tensors): tensor = keys_to_tensors[self._tensor_key] shape = self._shape if self._shape_keys: shape_dims = [] for k in self._shape_keys: shape_dim = keys_to_tensors[k] if isinstance(shape_dim, tf.SparseTensor): shape_dim = tf.sparse_tensor_to_dense(shape_dim) shape_dims.append(shape_dim) shape = tf.reshape(tf.stack(shape_dims), [-1]) if isinstance(tensor, tf.SparseTensor): if shape is not None: tensor = tf.sparse_reshape(tensor, shape) tensor = tf.sparse_tensor_to_dense( tensor, self._default_value) else: if shape is not None: tensor = tf.reshape(tensor, shape) return tensor
def sparse_boolean_mask(tensor, mask): """ Creates a sparse tensor from masked elements of `tensor` Inputs: tensor: a 2-D tensor, [batch_size, T] mask: a 2-D mask, [batch_size, T] Output: a 2-D sparse tensor """ mask_lens = tf.reduce_sum(tf.cast(mask, tf.int32), -1, keep_dims=True) mask_shape = tf.shape(mask) left_shifted_mask = tf.tile( tf.expand_dims(tf.range(mask_shape[1]), 0), [mask_shape[0], 1] ) < mask_lens return tf.SparseTensor( indices=tf.where(left_shifted_mask), values=tf.boolean_mask(tensor, mask), shape=tf.cast(tf.pack([mask_shape[0], tf.reduce_max(mask_lens)]), tf.int64) # For 2D only )
def input_fn(df): """Input builder function.""" # Creates a dictionary mapping from each continuous feature column name (k) to # the values of that column stored in a constant Tensor. continuous_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS} # Creates a dictionary mapping from each categorical feature column name (k) # to the values of that column stored in a tf.SparseTensor. categorical_cols = {k: tf.SparseTensor( indices=[[i, 0] for i in range(df[k].size)], values=df[k].values, shape=[df[k].size, 1]) for k in CATEGORICAL_COLUMNS} # Merges the two dictionaries into one. feature_cols = dict(continuous_cols) feature_cols.update(categorical_cols) # Converts the label column into a constant Tensor. label = tf.constant(df[LABEL_COLUMN].values) # Returns the feature columns and the label. return feature_cols, label
def testLinearModel(self): """Tests that loss goes down with training.""" def input_fn(): return { 'age': tf.constant([1]), 'language': tf.SparseTensor(values=['english'], indices=[[0, 0]], shape=[1, 1]) }, tf.constant([[1]]) language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100) age = tf.contrib.layers.real_valued_column('age') target_column = layers.multi_class_target(n_classes=2) classifier = LinearEstimator(target_column, feature_columns=[age, language]) classifier.fit(input_fn=input_fn, steps=1000) loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] classifier.fit(input_fn=input_fn, steps=2000) loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss2, loss1) self.assertLess(loss2, 0.01)
def testJointLinearModel(self): """Tests that loss goes down with training.""" def input_fn(): return { 'age': tf.SparseTensor(values=['1'], indices=[[0, 0]], shape=[1, 1]), 'language': tf.SparseTensor(values=['english'], indices=[[0, 0]], shape=[1, 1]) }, tf.constant([[1]]) language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100) age = tf.contrib.layers.sparse_column_with_hash_bucket('age', 2) target_column = layers.multi_class_target(n_classes=2) classifier = JointLinearEstimator(target_column, feature_columns=[age, language]) classifier.fit(input_fn=input_fn, steps=1000) loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] classifier.fit(input_fn=input_fn, steps=2000) loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss2, loss1) self.assertLess(loss2, 0.01)
def testLinearOnlyOneFeature(self): """Tests that linear-only instantiation works for one feature only.""" def input_fn(): return { 'language': tf.SparseTensor(values=['english'], indices=[[0, 0]], shape=[1, 1]) }, tf.constant([[1]]) language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 99) classifier = tf.contrib.learn.DNNLinearCombinedClassifier( linear_feature_columns=[language]) classifier.fit(input_fn=input_fn, steps=100) loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] classifier.fit(input_fn=input_fn, steps=200) loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss2, loss1) self.assertLess(loss2, 0.01) self.assertTrue('centered_bias_weight' in classifier.get_variable_names()) self.assertNotIn('dnn/logits/biases', classifier.get_variable_names()) self.assertNotIn('dnn/logits/weights', classifier.get_variable_names()) self.assertEquals(1, len(classifier.linear_bias_)) self.assertEquals(99, len(classifier.linear_weights_))
def testTrain(self): """Tests that loss goes down with training.""" def input_fn(): return { 'age': tf.constant([1]), 'language': tf.SparseTensor(values=['english'], indices=[[0, 0]], shape=[1, 1]) }, tf.constant([[1]]) language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100) age = tf.contrib.layers.real_valued_column('age') classifier = tf.contrib.learn.LinearClassifier( feature_columns=[age, language]) classifier.fit(input_fn=input_fn, steps=100) loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] classifier.fit(input_fn=input_fn, steps=200) loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss2, loss1) self.assertLess(loss2, 0.01) self.assertTrue('centered_bias_weight' in classifier.get_variable_names())
def testJointTrain(self): """Tests that loss goes down with training with joint weights.""" def input_fn(): return { 'age': tf.SparseTensor(values=['1'], indices=[[0, 0]], shape=[1, 1]), 'language': tf.SparseTensor(values=['english'], indices=[[0, 0]], shape=[1, 1]) }, tf.constant([[1]]) language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100) age = tf.contrib.layers.sparse_column_with_hash_bucket('age', 2) classifier = tf.contrib.learn.LinearClassifier( _joint_weight=True, feature_columns=[age, language]) classifier.fit(input_fn=input_fn, steps=100) loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] classifier.fit(input_fn=input_fn, steps=200) loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss2, loss1) self.assertLess(loss2, 0.01) self.assertTrue('centered_bias_weight' in classifier.get_variable_names())
def testExport(self): """Tests that export model for servo works.""" def input_fn(): return { 'age': tf.constant([1]), 'language': tf.SparseTensor(values=['english'], indices=[[0, 0]], shape=[1, 1]) }, tf.constant([[1]]) language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100) age = tf.contrib.layers.real_valued_column('age') classifier = tf.contrib.learn.LinearClassifier( feature_columns=[age, language]) classifier.fit(input_fn=input_fn, steps=100) export_dir = tempfile.mkdtemp() classifier.export(export_dir)
def testTrainOptimizerWithL1Reg(self): """Tests l1 regularized model has higher loss.""" def input_fn(): return { 'language': tf.SparseTensor(values=['hindi'], indices=[[0, 0]], shape=[1, 1]) }, tf.constant([[1]]) language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100) classifier_no_reg = tf.contrib.learn.LinearClassifier( feature_columns=[language]) classifier_with_reg = tf.contrib.learn.LinearClassifier( feature_columns=[language], optimizer=tf.train.FtrlOptimizer(learning_rate=1.0, l1_regularization_strength=100.)) loss_no_reg = classifier_no_reg.fit( input_fn=input_fn, steps=100).evaluate( input_fn=input_fn, steps=1)['loss'] loss_with_reg = classifier_with_reg.fit( input_fn=input_fn, steps=100).evaluate( input_fn=input_fn, steps=1)['loss'] self.assertLess(loss_no_reg, loss_with_reg)
def testSdcaOptimizerSparseFeatures(self): """Tests LinearClasssifier with SDCAOptimizer and sparse features.""" def input_fn(): return { 'example_id': tf.constant(['1', '2', '3']), 'price': tf.constant([[0.4], [0.6], [0.3]]), 'country': tf.SparseTensor(values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 3], [2, 1]], shape=[3, 5]), 'weights': tf.constant([[1.0], [1.0], [1.0]]) }, tf.constant([[1], [0], [1]]) price = tf.contrib.layers.real_valued_column('price') country = tf.contrib.layers.sparse_column_with_hash_bucket( 'country', hash_bucket_size=5) sdca_optimizer = tf.contrib.linear_optimizer.SDCAOptimizer( example_id_column='example_id') classifier = tf.contrib.learn.LinearClassifier( feature_columns=[price, country], weight_column_name='weights', optimizer=sdca_optimizer) classifier.fit(input_fn=input_fn, steps=50) scores = classifier.evaluate(input_fn=input_fn, steps=1) self.assertGreater(scores['accuracy'], 0.9)
def testEval(self): """Tests that eval produces correct metrics. """ def input_fn(): return { 'age': tf.constant([[1], [2]]), 'language': tf.SparseTensor(values=['greek', 'chinese'], indices=[[0, 0], [1, 0]], shape=[2, 1]), }, tf.constant([[1], [0]]) language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100) age = tf.contrib.layers.real_valued_column('age') classifier = tf.contrib.learn.LinearClassifier( feature_columns=[age, language]) # Evaluate on trained mdoel classifier.fit(input_fn=input_fn, steps=100) classifier.evaluate(input_fn=input_fn, steps=1) # TODO(ispir): Enable accuracy check after resolving the randomness issue. # self.assertLess(evaluated_values['loss/mean'], 0.3) # self.assertGreater(evaluated_values['accuracy/mean'], .95)
def testRegression(self): """Tests that loss goes down with training.""" def input_fn(): return { 'age': tf.constant([1]), 'language': tf.SparseTensor(values=['english'], indices=[[0, 0]], shape=[1, 1]) }, tf.constant([[10.]]) language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100) age = tf.contrib.layers.real_valued_column('age') classifier = tf.contrib.learn.LinearRegressor( feature_columns=[age, language]) classifier.fit(input_fn=input_fn, steps=100) loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] classifier.fit(input_fn=input_fn, steps=200) loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss2, loss1) self.assertLess(loss2, 0.5)
def testSparseFeatures(self): """Tests SVM classifier with (hashed) sparse features.""" def input_fn(): return { 'example_id': tf.constant(['1', '2', '3']), 'price': tf.constant([[0.8], [0.6], [0.3]]), 'country': tf.SparseTensor( values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 0], [2, 0]], shape=[3, 1]), }, tf.constant([[0], [1], [1]]) price = tf.contrib.layers.real_valued_column('price') country = tf.contrib.layers.sparse_column_with_hash_bucket( 'country', hash_bucket_size=5) svm_classifier = tf.contrib.learn.SVM(feature_columns=[price, country], example_id_column='example_id', l1_regularization=0.0, l2_regularization=1.0) svm_classifier.fit(input_fn=input_fn, steps=30) accuracy = svm_classifier.evaluate(input_fn=input_fn, steps=1)['accuracy'] self.assertAlmostEqual(accuracy, 1.0, places=3)
def testWeightedSparseColumn(self): ids = tf.contrib.layers.sparse_column_with_keys( "ids", ["marlo", "omar", "stringer"]) ids_tensor = tf.SparseTensor(values=["stringer", "stringer", "marlo"], indices=[[0, 0], [1, 0], [1, 1]], shape=[2, 2]) weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights") weights_tensor = tf.SparseTensor(values=[10.0, 20.0, 30.0], indices=[[0, 0], [1, 0], [1, 1]], shape=[2, 2]) features = {"ids": ids_tensor, "weights": weights_tensor} output = feature_column_ops._Transformer(features).transform(weighted_ids) with self.test_session(): tf.initialize_all_tables().run() self.assertAllEqual(output[0].shape.eval(), ids_tensor.shape.eval()) self.assertAllEqual(output[0].indices.eval(), ids_tensor.indices.eval()) self.assertAllEqual(output[0].values.eval(), [2, 2, 0]) self.assertAllEqual(output[1].shape.eval(), weights_tensor.shape.eval()) self.assertAllEqual(output[1].indices.eval(), weights_tensor.indices.eval()) self.assertEqual(output[1].values.dtype, tf.float32) self.assertAllEqual(output[1].values.eval(), weights_tensor.values.eval())
def testCrossColumn(self): language = tf.contrib.layers.sparse_column_with_hash_bucket( "language", hash_bucket_size=3) country = tf.contrib.layers.sparse_column_with_hash_bucket( "country", hash_bucket_size=5) country_language = tf.contrib.layers.crossed_column( [language, country], hash_bucket_size=15) features = { "language": tf.SparseTensor(values=["english", "spanish"], indices=[[0, 0], [1, 0]], shape=[2, 1]), "country": tf.SparseTensor(values=["US", "SV"], indices=[[0, 0], [1, 0]], shape=[2, 1]) } output = feature_column_ops._Transformer(features).transform( country_language) with self.test_session(): self.assertEqual(output.values.dtype, tf.int64) self.assertTrue(all(x < 15 and x >= 0 for x in output.values.eval()))
def testCrossWithBucketizedColumn(self): price_bucket = tf.contrib.layers.bucketized_column( tf.contrib.layers.real_valued_column("price"), boundaries=[0., 10., 100.]) country = tf.contrib.layers.sparse_column_with_hash_bucket( "country", hash_bucket_size=5) country_price = tf.contrib.layers.crossed_column( [country, price_bucket], hash_bucket_size=15) features = { "price": tf.constant([[20.]]), "country": tf.SparseTensor(values=["US", "SV"], indices=[[0, 0], [0, 1]], shape=[1, 2]) } output = feature_column_ops._Transformer(features).transform(country_price) with self.test_session(): self.assertEqual(output.values.dtype, tf.int64) self.assertTrue(all(x < 15 and x >= 0 for x in output.values.eval()))
def testCrossWithCrossedColumn(self): price_bucket = tf.contrib.layers.bucketized_column( tf.contrib.layers.real_valued_column("price"), boundaries=[0., 10., 100.]) country = tf.contrib.layers.sparse_column_with_hash_bucket( "country", hash_bucket_size=5) country_price = tf.contrib.layers.crossed_column( [country, price_bucket], hash_bucket_size=15) wire = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10) wire_country_price = tf.contrib.layers.crossed_column( [wire, country_price], hash_bucket_size=15) features = { "price": tf.constant([[20.]]), "country": tf.SparseTensor(values=["US", "SV"], indices=[[0, 0], [0, 1]], shape=[1, 2]), "wire": tf.SparseTensor(values=["omar", "stringer", "marlo"], indices=[[0, 0], [0, 1], [0, 2]], shape=[1, 3]) } output = feature_column_ops._Transformer(features).transform( wire_country_price) with self.test_session(): self.assertEqual(output.values.dtype, tf.int64) self.assertTrue(all(x < 15 and x >= 0 for x in output.values.eval()))
def testAllDNNColumns(self): sparse_column = tf.contrib.layers.sparse_column_with_keys( "ids", ["a", "b", "c", "unseen"]) real_valued_column = tf.contrib.layers.real_valued_column("income", 2) one_hot_column = tf.contrib.layers.one_hot_column(sparse_column) embedding_column = tf.contrib.layers.embedding_column(sparse_column, 10) features = { "ids": tf.SparseTensor( values=["c", "b", "a"], indices=[[0, 0], [1, 0], [2, 0]], shape=[3, 1]), "income": tf.constant([[20.3, 10], [110.3, 0.4], [-3.0, 30.4]]) } output = tf.contrib.layers.input_from_feature_columns(features, [one_hot_column, embedding_column, real_valued_column]) with self.test_session(): tf.initialize_all_variables().run() tf.initialize_all_tables().run() self.assertAllEqual(output.eval().shape, [3, 2 + 4 + 10])
def testOneHotColumnFromWeightedSparseColumnFails(self): ids_column = tf.contrib.layers.sparse_column_with_keys( "ids", ["a", "b", "c", "unseen"]) ids_tensor = tf.SparseTensor( values=["c", "b", "a", "c"], indices=[[0, 0], [1, 0], [2, 0], [2, 1]], shape=[3, 2]) weighted_ids_column = tf.contrib.layers.weighted_sparse_column(ids_column, "weights") weights_tensor = tf.SparseTensor( values=[10.0, 20.0, 30.0, 40.0], indices=[[0, 0], [1, 0], [2, 0], [2, 1]], shape=[3, 2]) features = {"ids": ids_tensor, "weights": weights_tensor} one_hot_column = tf.contrib.layers.one_hot_column(weighted_ids_column) with self.test_session(): tf.initialize_all_variables().run() tf.initialize_all_tables().run() with self.assertRaisesRegexp( ValueError, "one_hot_column does not yet support weighted_sparse_column"): _ = tf.contrib.layers.input_from_feature_columns(features, [one_hot_column])
def testOneHotColumnFromMultivalentSparseColumnWithKeysSucceedsForDNN(self): ids_column = tf.contrib.layers.sparse_column_with_keys( "ids", ["a", "b", "c", "unseen"]) ids_tensor = tf.SparseTensor( values=["c", "b", "a", "c"], indices=[[0, 0], [1, 0], [2, 0], [2, 1]], shape=[3, 2]) one_hot_sparse = tf.contrib.layers.one_hot_column(ids_column) features = {"ids": ids_tensor} output = tf.contrib.layers.input_from_feature_columns(features, [one_hot_sparse]) with self.test_session(): tf.initialize_all_variables().run() tf.initialize_all_tables().run() self.assertAllEqual([[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 1, 0]], output.eval())
def testHashedEmbeddingColumnSucceedsForDNN(self): wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo", "omar"], indices=[[0, 0], [1, 0], [1, 1], [2, 0]], shape=[3, 2]) features = {"wire": wire_tensor} # Big enough hash space so that hopefully there is no collision embedded_sparse = tf.contrib.layers.hashed_embedding_column("wire", 1000, 3) output = tf.contrib.layers.input_from_feature_columns( features, [embedded_sparse], weight_collections=["my_collection"]) weights = tf.get_collection("my_collection") grad = tf.gradients(output, weights) with self.test_session(): tf.initialize_all_variables().run() gradient_values = [] # Collect the gradient from the different partitions (one in this test) for p in range(len(grad)): gradient_values.extend(grad[p].values.eval()) gradient_values.sort() self.assertAllEqual(gradient_values, [0.5]*6 + [2]*3)
def testEmbeddingColumnWithInitializerSucceedsForDNN(self): hashed_sparse = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10) wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"], indices=[[0, 0], [1, 0], [1, 1]], shape=[2, 2]) features = {"wire": wire_tensor} init_value = 133.7 embeded_sparse = tf.contrib.layers.embedding_column( hashed_sparse, 10, initializer=tf.constant_initializer(init_value)) output = tf.contrib.layers.input_from_feature_columns(features, [embeded_sparse]) with self.test_session(): tf.initialize_all_variables().run() output_eval = output.eval() self.assertAllEqual(output_eval.shape, [2, 10]) self.assertAllClose(output_eval, np.tile(init_value, [2, 10]))
def testEmbeddingColumnWithMultipleInitializersFails(self): hashed_sparse = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10) wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"], indices=[[0, 0], [1, 0], [1, 1]], shape=[2, 2]) features = {"wire": wire_tensor} embedded_sparse = tf.contrib.layers.embedding_column( hashed_sparse, 10, initializer=tf.truncated_normal_initializer(mean=42, stddev=1337)) embedded_sparse_alternate = tf.contrib.layers.embedding_column( hashed_sparse, 10, initializer=tf.truncated_normal_initializer(mean=1337, stddev=42)) # Makes sure that trying to use different initializers with the same # embedding column explicitly fails. with self.test_session(): with self.assertRaisesRegexp( ValueError, "Duplicate feature column key found for column: wire_embedding"): tf.contrib.layers.input_from_feature_columns( features, [embedded_sparse, embedded_sparse_alternate])
def testEmbeddingColumnWithCrossedColumnSucceedsForDNN(self): a = tf.contrib.layers.sparse_column_with_hash_bucket("aaa", hash_bucket_size=100) b = tf.contrib.layers.sparse_column_with_hash_bucket("bbb", hash_bucket_size=100) crossed = tf.contrib.layers.crossed_column( set([a, b]), hash_bucket_size=10000) wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"], indices=[[0, 0], [1, 0], [1, 1]], shape=[2, 2]) features = {"aaa": wire_tensor, "bbb": wire_tensor} embeded_sparse = tf.contrib.layers.embedding_column(crossed, 10) output = tf.contrib.layers.input_from_feature_columns(features, [embeded_sparse]) with self.test_session(): tf.initialize_all_variables().run() self.assertAllEqual(output.eval().shape, [2, 10])
def testWeightedSparseColumnFailsForDNN(self): ids = tf.contrib.layers.sparse_column_with_keys( "ids", ["marlo", "omar", "stringer"]) ids_tensor = tf.SparseTensor(values=["stringer", "stringer", "marlo"], indices=[[0, 0], [1, 0], [1, 1]], shape=[2, 2]) weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights") weights_tensor = tf.SparseTensor(values=[10.0, 20.0, 30.0], indices=[[0, 0], [1, 0], [1, 1]], shape=[2, 2]) features = {"ids": ids_tensor, "weights": weights_tensor} with self.test_session(): with self.assertRaisesRegexp( ValueError, "Error creating input layer for column: ids_weighted_by_weights"): tf.initialize_all_tables().run() tf.contrib.layers.input_from_feature_columns(features, [weighted_ids])
def testCrossedColumnFailsForDNN(self): a = tf.contrib.layers.sparse_column_with_hash_bucket("aaa", hash_bucket_size=100) b = tf.contrib.layers.sparse_column_with_hash_bucket("bbb", hash_bucket_size=100) crossed = tf.contrib.layers.crossed_column( set([a, b]), hash_bucket_size=10000) wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"], indices=[[0, 0], [1, 0], [1, 1]], shape=[2, 2]) features = {"aaa": wire_tensor, "bbb": wire_tensor} with self.test_session(): with self.assertRaisesRegexp( ValueError, "Error creating input layer for column: aaa_X_bbb"): tf.initialize_all_variables().run() tf.contrib.layers.input_from_feature_columns(features, [crossed])
def testDeepColumnsSucceedForDNN(self): real_valued = tf.contrib.layers.real_valued_column("income", 3) bucket = tf.contrib.layers.bucketized_column( tf.contrib.layers.real_valued_column("price", 2), boundaries=[0., 10., 100.]) hashed_sparse = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10) features = { "income": tf.constant([[20., 10, -5], [110, 0, -7], [-3, 30, 50]]), "price": tf.constant([[20., 200], [110, 2], [-20, -30]]), "wire": tf.SparseTensor(values=["omar", "stringer", "marlo"], indices=[[0, 0], [1, 0], [2, 0]], shape=[3, 1]) } embeded_sparse = tf.contrib.layers.embedding_column( hashed_sparse, 10, initializer=tf.constant_initializer(133.7)) output = tf.contrib.layers.input_from_feature_columns( features, [real_valued, bucket, embeded_sparse]) with self.test_session(): tf.initialize_all_variables().run() # size of output = 3 (real_valued) + 2 * 4 (bucket) + 10 (embedding) = 21 self.assertAllEqual(output.eval().shape, [3, 21])
def testEmbeddingColumnWithWeightedSparseColumnForDNN(self): ids = tf.contrib.layers.sparse_column_with_keys( "ids", ["marlo", "omar", "stringer"]) ids_tensor = tf.SparseTensor(values=["stringer", "stringer", "marlo"], indices=[[0, 0], [1, 0], [1, 1]], shape=[2, 2]) weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights") weights_tensor = tf.SparseTensor(values=[10.0, 20.0, 30.0], indices=[[0, 0], [1, 0], [1, 1]], shape=[2, 2]) features = {"ids": ids_tensor, "weights": weights_tensor} embeded_sparse = tf.contrib.layers.embedding_column( weighted_ids, 1, combiner="sum", initializer=init_ops.ones_initializer) output = tf.contrib.layers.input_from_feature_columns(features, [embeded_sparse]) with self.test_session(): tf.initialize_all_variables().run() tf.initialize_all_tables().run() # score: (sum of weights) self.assertAllEqual(output.eval(), [[10.], [50.]])