def _to_tfidf(term_frequency, reduced_term_freq, corpus_size, smooth): """Calculates the inverse document frequency of terms in the corpus. Args: term_frequency: The `SparseTensor` output of _to_term_frequency. reduced_term_freq: A `Tensor` of shape (vocabSize,) that represents the count of the number of documents with each term. corpus_size: A scalar count of the number of documents in the corpus. smooth: A bool indicating if the idf value should be smoothed. See tfidf_weights documentation for details. Returns: A `SparseTensor` with indices=<doc_index_in_batch>, <term_index_in_vocab>, values=term frequency * inverse document frequency, and shape=(batch, vocab_size) """ # The idf tensor has shape (vocab_size,) if smooth: idf = tf.log((tf.to_double(corpus_size) + 1.0) / ( 1.0 + tf.to_double(reduced_term_freq))) + 1 else: idf = tf.log(tf.to_double(corpus_size) / ( tf.to_double(reduced_term_freq))) + 1 gathered_idfs = tf.gather(tf.squeeze(idf), term_frequency.indices[:, 1]) tfidf_values = tf.to_float(term_frequency.values) * tf.to_float(gathered_idfs) return tf.SparseTensor( indices=term_frequency.indices, values=tfidf_values, dense_shape=term_frequency.dense_shape)
def per_image_standardizer(self, image): stand = SamplewiseStandardizer(clip=6) image = tf.py_func(stand, [tf.to_double(image), False], tf.float64) return image
def tf_apply(self, x, update): inputs_to_merge = list() for name in self.inputs: # Previous input, by name or "*", like normal network_spec # Not using named_tensors as there could be unintended outcome if name == "*" or name == "previous": inputs_to_merge.append(x) elif name in self.named_tensors: inputs_to_merge.append(self.named_tensors[name]) else: # Failed to find key in available inputs, print out help to user, raise error keys=list(self.named_tensors) raise TensorForceError( 'ComplexNetwork input "{}" doesn\'t exist, Available inputs: {}'.format(name,keys) ) # Review data for casting to more precise format so TensorFlow doesn't throw error for mixed data # Quick & Dirty cast only promote types: bool=0,int32=10, int64=20, float32=30, double=40 cast_type_level = 0 cast_type_dict = { 'bool':0, 'int32':10, 'int64':20, 'float32':30, 'float64':40 } cast_type_func_dict = { 0:tf.identity, 10:tf.to_int32, 20:tf.to_int64, 30:tf.to_float, 40:tf.to_double } # Scan inputs for max cast_type for tensor in inputs_to_merge: key = str(tensor.dtype.name) if key in cast_type_dict: if cast_type_dict[key] > cast_type_level: cast_type_level = cast_type_dict[key] else: raise TensorForceError('Network spec input does not support dtype {}'.format(key)) # Add casting if needed for index, tensor in enumerate(inputs_to_merge): key = str(tensor.dtype.name) if cast_type_dict[key] < cast_type_level: inputs_to_merge[index]=cast_type_func_dict[cast_type_level](tensor) input_tensor = tf.concat(inputs_to_merge, self.axis) return input_tensor
def _to_term_frequency(x, vocab_size): """Creates a SparseTensor of term frequency for every doc/term pair. Args: x : a SparseTensor of int64 representing string indices in vocab. vocab_size: An int - the count of vocab used to turn the string into int64s including any OOV buckets. Returns: a SparseTensor with the count of times a term appears in a document at indices <doc_index_in_batch>, <term_index_in_vocab>, with size (num_docs_in_batch, vocab_size). """ # Construct intermediary sparse tensor with indices # [<doc>, <term_index_in_doc>, <vocab_id>] and tf.ones values. split_indices = tf.to_int64( tf.split(x.indices, axis=1, num_or_size_splits=2)) expanded_values = tf.to_int64(tf.expand_dims(x.values, 1)) next_index = tf.concat( [split_indices[0], split_indices[1], expanded_values], axis=1) next_values = tf.ones_like(x.values) vocab_size_as_tensor = tf.constant([vocab_size], dtype=tf.int64) next_shape = tf.concat( [x.dense_shape, vocab_size_as_tensor], 0) next_tensor = tf.SparseTensor( indices=tf.to_int64(next_index), values=next_values, dense_shape=next_shape) # Take the intermediary tensor and reduce over the term_index_in_doc # dimension. This produces a tensor with indices [<doc_id>, <term_id>] # and values [count_of_term_in_doc] and shape batch x vocab_size term_count_per_doc = tf.sparse_reduce_sum_sparse(next_tensor, 1) dense_doc_sizes = tf.to_double(tf.sparse_reduce_sum(tf.SparseTensor( indices=x.indices, values=tf.ones_like(x.values), dense_shape=x.dense_shape), 1)) gather_indices = term_count_per_doc.indices[:, 0] gathered_doc_sizes = tf.gather(dense_doc_sizes, gather_indices) term_frequency = (tf.to_double(term_count_per_doc.values) / tf.to_double(gathered_doc_sizes)) return tf.SparseTensor( indices=term_count_per_doc.indices, values=term_frequency, dense_shape=term_count_per_doc.dense_shape)
def classifier_score(images, classifier_fn, num_batches=1): """Classifier score for evaluating a conditional generative model. This is based on the Inception Score, but for an arbitrary classifier. This technique is described in detail in https://arxiv.org/abs/1606.03498. In summary, this function calculates exp( E[ KL(p(y|x) || p(y)) ] ) which captures how different the network's classification prediction is from the prior distribution over classes. Args: images: Images to calculate the classifier score for. classifier_fn: A function that takes images and produces logits based on a classifier. num_batches: Number of batches to split `generated_images` in to in order to efficiently run them through the classifier network. Returns: The classifier score. A floating-point scalar of the same type as the output of `classifier_fn`. """ generated_images_list = tf.split( images, num_or_size_splits=num_batches) # Compute the classifier splits using the memory-efficient `map_fn`. logits = tf.map_fn( fn=classifier_fn, elems=tf.stack(generated_images_list), parallel_iterations=1, back_prop=False, swap_memory=True, name='RunClassifier') logits = tf.concat(tf.unstack(logits), 0) logits.shape.assert_has_rank(2) # Use maximum precision for best results. logits_dtype = logits.dtype if logits_dtype != tf.float64: logits = tf.to_double(logits) p = tf.nn.softmax(logits) q = tf.reduce_mean(p, axis=0) kl = _kl_divergence(p, logits, q) kl.shape.assert_has_rank(1) log_score = tf.reduce_mean(kl) final_score = tf.exp(log_score) if logits_dtype != tf.float64: final_score = tf.cast(final_score, logits_dtype) return final_score