我们从Python开源项目中,提取了以下40个代码示例,用于说明如何使用tensorflow.mod()。
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope(scope or type(self).__name__): # "DilatedLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. c, h = tf.split(state, 2, axis=1) concat = self._linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(concat, 4, axis=1) new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(j) new_h = tf.tanh(new_c) * tf.sigmoid(o) # update relevant cores timestep = tf.assign_add(self._timestep, 1) core_to_update = tf.mod(timestep, self._cores) updated_h = self._hold_mask[core_to_update] * h + self._dilated_mask[core_to_update] * new_h return updated_h, tf.concat([new_c, updated_h], axis=1)
def ternary_encoder(input_data): """Encoding and compressing the signs """ a = tf.sign(input_data) # -1, 0, 1 a = tf.add(a,1) # shift -1,0,1 to 0,1,2 (2'b00,2'b01,2'b10) a = tf.reshape(a,[-1]) pad_size = 4 - tf.mod(tf.size(a), 4) pad = tf.range(0.0, pad_size) a = tf.concat([a, pad], 0) a_split1, a_split2, a_split3, a_split4 = tf.split(a,4) # assume the size is dividable by 4 # encode 4 grads into 1 Byte sum_1 = tf.add(a_split1, a_split2*4) sum_2 = tf.add(a_split3*16, a_split4*64) sum_all = tf.add(sum_1, sum_2) encoded = tf.cast(sum_all, tf.uint8) return encoded
def ternary_decoder(encoded_data, scaler, shape): """Decoding the signs to float format """ a = tf.cast(encoded_data, tf.int32) a_split1 = tf.mod(a,4) a_split2 = tf.to_int32(tf.mod(a/4,4)) a_split3 = tf.to_int32(tf.mod(a/16,4)) a_split4 = tf.to_int32(tf.mod(a/64,4)) a = tf.concat([a_split1, a_split2, a_split3, a_split4], 0) real_size = tf.reduce_prod(shape) a = tf.to_float(a) a = tf.gather(a, tf.range(0,real_size)) a = tf.reshape(a, shape) a = tf.subtract(a,1) decoded = a*scaler return decoded
def fast_rotate(input_image, dx = 0, dy = 0): # Basic rotations (constant disparities) for equirectangular # images. For image augmentations (y-axis rotations), this method is preferable compared # to the more general rotation function. height = tf.shape(input_image)[0] width = tf.shape(input_image)[1] # Shift coordinate grid for inverse warp. ix, iy = tf.meshgrid(tf.range(width), tf.range(height)) ox = tf.mod(ix - dx, width) oy = tf.mod(iy - dy, height) indices = tf.stack([oy, ox], 2) # Perform exact sampling (as we are using integer coordinates). return tf.gather_nd(input_image, indices) # Project equirectangular image onto a cube face.
def random_exp_initializer(minval=0, maxval=None, seed=None, dtype=dtypes.float32): """Returns an initializer that generates tensors with an exponential distribution. Args: minval: A python scalar or a scalar tensor. Lower bound of the range of random values to generate. maxval: A python scalar or a scalar tensor. Upper bound of the range of random values to generate. Defaults to 1 for float types. seed: A Python integer. Used to create random seeds. See [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed) for behavior. dtype: The data type. Returns: An initializer that generates tensors with an exponential distribution. """ def _initializer(shape, dtype=dtype, partition_info=None): return tf.exp(random_ops.random_uniform(shape, minval, maxval, dtype, seed=seed)) return _initializer # Here we need to register the gradient for the mod operation
def tf_mod(x, y, name=None): """Differentiable mod based in numpy Args x: first argument y: second argument Returns mod between x and y """ def np_mod(x, y): return np.mod(x, y, dtype=np.float32) def modgrad(op, grad): x = op.inputs[0] # the first argument (normally you need those to calculate the gradient, like the gradient of x^2 is 2x. ) y = op.inputs[1] # the second argument return grad * 1, grad * 0 #the propagated gradient with respect to the first and second argument respectively def py_func(func, inp, Tout, stateful=True, name=None, grad=None): # Need to generate a unique name to avoid duplicates: rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8)) tf.RegisterGradient(rnd_name)(grad) # see _MySquareGrad for grad example g = tf.get_default_graph() with g.gradient_override_map({"PyFunc": rnd_name}): return tf.py_func(func, inp, Tout, stateful=stateful, name=name) with ops.name_scope(name, "mod", [x,y]) as name: z = py_func(np_mod, [x,y], [tf.float32], name=name, grad=modgrad) # <-- here's the call to the gradient return tf.reshape(z[0], tf.shape(x))
def sample_k_fids_for_pid(pid, all_fids, all_pids, batch_k): """ Given a PID, select K FIDs of that specific PID. """ possible_fids = tf.boolean_mask(all_fids, tf.equal(all_pids, pid)) # The following simply uses a subset of K of the possible FIDs # if more than, or exactly K are available. Otherwise, we first # create a padded list of indices which contain a multiple of the # original FID count such that all of them will be sampled equally likely. count = tf.shape(possible_fids)[0] padded_count = tf.cast(tf.ceil(batch_k / count), tf.int32) * count full_range = tf.mod(tf.range(padded_count), count) # Sampling is always performed by shuffling and taking the first k. shuffled = tf.random_shuffle(full_range) selected_fids = tf.gather(possible_fids, shuffled[:batch_k]) return selected_fids, tf.fill([batch_k], pid)
def random_exp_initializer(minval=0, maxval=None, seed=None, dtype=dtypes.float32): '''Returns an initializer that generates tensors with an exponential distribution. Args: minval: A python scalar or a scalar tensor. Lower bound of the range of random values to generate. maxval: A python scalar or a scalar tensor. Upper bound of the range of random values to generate. Defaults to 1 for float types. seed: A Python integer. Used to create random seeds. See [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed) for behavior. dtype: The data type. Returns: An initializer that generates tensors with an exponential distribution. ''' def _initializer(shape, dtype=dtype, partition_info=None): return tf.exp(random_ops.random_uniform(shape, minval, maxval, dtype, seed=seed)) return _initializer # Register the gradient for the mod operation. tf.mod() does not have a gradient implemented.
def _process_batch(self, batch): # We have to call tf.abs before calling tf.mod, because tf.mod gives # native outputs when given negative inputs. if self._cast: batch = tf.cast(batch, tf.int32) if self._mod_inputs: batch = tf.mod(tf.abs(batch), self._num_buckets) return tf.gather(self._weights, batch)
def __mod__(self, other): return tf.mod(self, other)
def __rmod__(self, other): return tf.mod(other, self)
def unpack_colors(color, axis, normalize=True): r = tf.mod(color, 256) g = tf.mod(tf.floordiv(color, 256), 256) b = tf.mod(tf.floordiv(color, 256 ** 2), 256 ** 2) color = tf.stack([r, g, b], axis=axis) if normalize: color = tf.div(tf.to_float(color), 255.) return color
def ternary_decoder(encoded_data, scaler, shape): """Decoding the signs to float format """ a = tf.cast(encoded_data, tf.int32) a_split1 = tf.mod(a,4) a_split2 = tf.to_int32(tf.mod(a/4,4)) a_split3 = tf.to_int32(tf.mod(a/16,4)) a_split4 = tf.to_int32(tf.mod(a/64,4)) a = tf.concat([a_split1, a_split2, a_split3, a_split4], 0) real_size = tf.reduce_prod(shape) a = tf.to_float(a) a = tf.gather(a, tf.range(0,real_size)) a = tf.reshape(a, shape) a = tf.subtract(a, 1) decoded = a*scaler return decoded
def rejection_resample(self, ds): nclasses = 1000 def _classfunc(*tensors): as_dict = self.dictify(tensors) uids = as_dict['uid'] return tf.mod(uids, nclasses) target_dist = tf.constant(1/nclasses, shape=(nclasses,)) return tf.contrib.data.rejection_resample(ds, _classfunc, target_dist)
def setUp(self): super(CoreBinaryOpsTest, self).setUp() self.x_probs_broadcast_tensor = tf.reshape( self.x_probs_lt.tensor, [self.x_size, 1, self.probs_size]) self.channel_probs_broadcast_tensor = tf.reshape( self.channel_probs_lt.tensor, [1, self.channel_size, self.probs_size]) # == and != are not element-wise for tf.Tensor, so they shouldn't be # elementwise for LabeledTensor, either. self.ops = [ ('add', operator.add, tf.add, core.add), ('sub', operator.sub, tf.sub, core.sub), ('mul', operator.mul, tf.mul, core.mul), ('div', operator.truediv, tf.div, core.div), ('mod', operator.mod, tf.mod, core.mod), ('pow', operator.pow, tf.pow, core.pow_function), ('equal', None, tf.equal, core.equal), ('less', operator.lt, tf.less, core.less), ('less_equal', operator.le, tf.less_equal, core.less_equal), ('not_equal', None, tf.not_equal, core.not_equal), ('greater', operator.gt, tf.greater, core.greater), ('greater_equal', operator.ge, tf.greater_equal, core.greater_equal), ] self.test_lt_1 = self.x_probs_lt self.test_lt_2 = self.channel_probs_lt self.test_lt_1_broadcast = self.x_probs_broadcast_tensor self.test_lt_2_broadcast = self.channel_probs_broadcast_tensor self.broadcast_axes = [self.a0, self.a1, self.a3]
def append(self, tensors): position = tf.mod(self.index, self.capacity) append_ops = [self.buffers[key][position].assign(tensor) for key, tensor in zip(self.buffers, tensors)] with tf.control_dependencies(append_ops): inc_index_op = self.index.assign_add(1) return inc_index_op
def _tile_encoders_for_beamsearch(self, projected_sentinel): sentinel_batch_size = tf.shape(projected_sentinel)[0] encoders_batch_size = tf.shape( self.encoder_projections_for_ctx[0])[0] modulo = tf.mod(sentinel_batch_size, encoders_batch_size) with tf.control_dependencies([tf.assert_equal(modulo, 0)]): beam_size = tf.div(sentinel_batch_size, encoders_batch_size) return [tf.tile(proj, [beam_size, 1, 1]) for proj in self.encoder_projections_for_ctx]
def lat_long_to_equirectangular_uv(S, T): # Convert latitude and longitude to UV coordinates # on an equirectangular plane. u = tf.mod(S / (2.0 * np.pi) - 0.25, 1.0) v = tf.mod(T / np.pi, 1.0) return u, v # General rotation function given angles in (x, y, z) axes.
def add_timing_signal(x, min_timescale=1.0, max_timescale=1.0e4, name=None): """ This function adds a bunch of sinusoids of different frequencies to a Tensor. See paper: Attention is all you need :param x: A tensor with shape [batch, length, channels] :param min_timescale: A floating point number :param max_timescale: A floating point number :param name: An optional string :returns: a Tensor the same shape as x. """ with tf.name_scope(name, default_name="add_timing_signal", values=[x]): length = tf.shape(x)[1] channels = tf.shape(x)[2] position = tf.to_float(tf.range(length)) num_timescales = channels // 2 log_timescale_increment = ( math.log(float(max_timescale) / float(min_timescale)) / (tf.to_float(num_timescales) - 1) ) inv_timescales = min_timescale * tf.exp( tf.to_float(tf.range(num_timescales)) * -log_timescale_increment ) scaled_time = (tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, 0)) signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1) signal = tf.pad(signal, [[0, 0], [0, tf.mod(channels, 2)]]) signal = tf.reshape(signal, [1, length, channels]) return x + signal
def gather_forced_att_logits(encoder_input_symbols, encoder_decoder_vocab_map, att_logit, batch_size, attn_length, target_vocab_size): """Gathers attention weights as logits for forced attention.""" flat_input_symbols = tf.reshape(encoder_input_symbols, [-1]) flat_label_symbols = tf.gather(encoder_decoder_vocab_map, flat_input_symbols) flat_att_logits = tf.reshape(att_logit, [-1]) flat_range = tf.to_int64(tf.range(tf.shape(flat_label_symbols)[0])) batch_inds = tf.floordiv(flat_range, attn_length) position_inds = tf.mod(flat_range, attn_length) attn_vocab_inds = tf.transpose(tf.pack( [batch_inds, position_inds, tf.to_int64(flat_label_symbols)])) # Exclude indexes of entries with flat_label_symbols[i] = -1. included_flat_indexes = tf.reshape(tf.where(tf.not_equal( flat_label_symbols, -1)), [-1]) included_attn_vocab_inds = tf.gather(attn_vocab_inds, included_flat_indexes) included_flat_att_logits = tf.gather(flat_att_logits, included_flat_indexes) sparse_shape = tf.to_int64(tf.pack( [batch_size, attn_length, target_vocab_size])) sparse_label_logits = tf.SparseTensor(included_attn_vocab_inds, included_flat_att_logits, sparse_shape) forced_att_logit_sum = tf.sparse_reduce_sum(sparse_label_logits, [1]) forced_att_logit = tf.reshape(forced_att_logit_sum, [-1, target_vocab_size]) return forced_att_logit
def dk_mod(x, y): """Differentiable mod, Donald Knuth style Args x: first argument y: second argument Returns mod between x and y """ return x - y * tf.floor(x / y) # Register the gradient for the mod operation. tf.mod() does not have a gradient implemented.
def add_timing_signal_1d_given_position(x, position, min_timescale=1.0, max_timescale=1.0e4): """Adds sinusoids of diff frequencies to a Tensor, with timing position given. Args: x: a Tensor with shape [batch, length, channels] position: a Tensor with shape [batch, length] min_timescale: a float max_timescale: a float Returns: a Tensor the same shape as x. """ channels = common_layers.shape_list(x)[2] num_timescales = channels // 2 log_timescale_increment = ( math.log(float(max_timescale) / float(min_timescale)) / (tf.to_float(num_timescales) - 1)) inv_timescales = min_timescale * tf.exp( tf.to_float(tf.range(num_timescales)) * -log_timescale_increment) scaled_time = ( tf.expand_dims(tf.to_float(position), 2) * tf.expand_dims( tf.expand_dims(inv_timescales, 0), 0)) signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=2) signal = tf.pad(signal, [[0, 0], [0, 0], [0, tf.mod(channels, 2)]]) return x + signal
def attention_image_summary(attn, image_shapes=None): """Compute color image summary. Args: attn: a Tensor with shape [batch, num_heads, query_length, memory_length] image_shapes: optional tuple of integer scalars. If the query positions and memory positions represent the pixels of flattened images, then pass in their dimensions: (query_rows, query_cols, memory_rows, memory_cols). If the query positions and memory positions represent the pixels x channels of flattened images, then pass in their dimensions: (query_rows, query_cols, query_channels, memory_rows, memory_cols, memory_channels). """ num_heads = common_layers.shape_list(attn)[1] # [batch, query_length, memory_length, num_heads] image = tf.transpose(attn, [0, 2, 3, 1]) image = tf.pow(image, 0.2) # for high-dynamic-range # Each head will correspond to one of RGB. # pad the heads to be a multiple of 3 image = tf.pad(image, [[0, 0], [0, 0], [0, 0], [0, tf.mod(-num_heads, 3)]]) image = split_last_dimension(image, 3) image = tf.reduce_max(image, 4) if image_shapes is not None: if len(image_shapes) == 4: q_rows, q_cols, m_rows, m_cols = list(image_shapes) image = tf.reshape(image, [-1, q_rows, q_cols, m_rows, m_cols, 3]) image = tf.transpose(image, [0, 1, 3, 2, 4, 5]) image = tf.reshape(image, [-1, q_rows * m_rows, q_cols * m_cols, 3]) else: assert len(image_shapes) == 6 q_rows, q_cols, q_channnels, m_rows, m_cols, m_channels = list( image_shapes) image = tf.reshape( image, [-1, q_rows, q_cols, q_channnels, m_rows, m_cols, m_channels, 3]) image = tf.transpose(image, [0, 1, 4, 3, 2, 5, 6, 7]) image = tf.reshape( image, [-1, q_rows * m_rows * q_channnels, q_cols * m_cols * m_channels, 3]) tf.summary.image("attention", image, max_outputs=1)
def fast_dlstm(s_t, state_in): def dilate_one_time_step(one_h, switcher, num_chunks): h_slices = [] h_size = 256 chunk_step_size = h_size // num_chunks for switch_step, h_step in zip(range(num_chunks), range(0, h_size, chunk_step_size)): one_switch = switcher[switch_step] h_s = conditional_backprop(one_switch, one_h[h_step: h_step + chunk_step_size]) h_slices.append(h_s) dh = tf.stack(h_slices) dh = tf.reshape(dh, [-1, 256]) return dh lstm = rnn.LSTMCell(256, state_is_tuple=True) chunks = 8 def dlstm_scan_fn(previous_output, current_input): out, state_out = lstm(current_input, previous_output[1]) i = previous_output[2] basis_i = tf.one_hot(i, depth=chunks) state_out_dilated = dilate_one_time_step(tf.squeeze(state_out[0]), basis_i, chunks) state_out = rnn.LSTMStateTuple(state_out_dilated, state_out[1]) i += tf.constant(1) new_i = tf.mod(i, chunks) return out, state_out, new_i rnn_outputs, final_states, mod_idxs = tf.scan(dlstm_scan_fn, tf.transpose(s_t, [1, 0, 2]), initializer=( state_in[1], rnn.LSTMStateTuple(*state_in), tf.constant(0))) state_out = [final_states[0][-1, 0, :], final_states[1][-1, 0, :]] cell_states = final_states[0][:, 0, :] out_states = final_states[1][:, 0, :] return out_states, cell_states, state_out
def _filter_function(n_gpus): def f(x,y): a = tf.equal( tf.mod( tf.shape(x)[0] , n_gpus ) , 0 ) b = tf.equal( tf.mod( tf.shape(y)[0] , n_gpus ) , 0 ) return tf.logical_and(a,b) return f
def test_Mod(self): t = tf.mod(*self.random((4, 3), (4, 3))) self.check(t)
def __unpool(self, updates, mask, ksize=[1, 2, 2, 1], output_shape=None, feature_count=None, name=''): with tf.variable_scope(name): mask = tf.cast(mask, tf.int32) input_shape = tf.shape(updates, out_type=tf.int32) # calculation new shape if feature_count is None: feature_count = input_shape[3] if output_shape is None: output_shape = (1, input_shape[1] * ksize[1], input_shape[2] * ksize[2], feature_count) output_shape = tf.cast(output_shape, tf.int32) # calculation indices for batch, height, width and feature maps one_like_mask = tf.cast(tf.ones_like(mask, dtype=tf.int16), tf.int32) batch_shape = tf.concat([[input_shape[0]], [1], [1], [1]], 0) batch_range = tf.reshape(tf.range(output_shape[0], dtype=tf.int32), shape=batch_shape) b = one_like_mask * batch_range y = tf.floordiv(mask, output_shape[2] * output_shape[3]) x = tf.mod(tf.floordiv(mask, output_shape[3]), output_shape[2]) #mask % (output_shape[2] * output_shape[3]) // output_shape[3] feature_range = tf.range(output_shape[3], dtype=tf.int32) f = one_like_mask * feature_range # transpose indices & reshape update values to one dimension updates_size = tf.size(updates) indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, updates_size])) values = tf.reshape(updates, [updates_size]) ret = tf.scatter_nd(indices, values, output_shape) return ret
def phi(times, s, tau): # return tf.div(tf.mod(tf.mod(times - s, tau) + tau, tau), tau) return tf.div(tf.mod(times - s, tau), tau)
def __init__(self, num_buckets, num_units_out, initializer=None, name=None, trainable=True, mod_inputs=True): """Initializes the layer. Args: num_buckets: How many buckets the embedding has. num_units_out: The number of output units in the layer. initializer: the initializer for the weights. Defaults to uniform unit scaling. The initializer can also be a Tensor or numpy array, in which case the weights are initialized to this value and shape. Note that in this case the weights will still be trainable unless you also pass `trainable=False`. name: An optional string name. Defaults to `Embedding_%d_%d % (num_buckets, num_units_out)`. Used to name the variable scope where the variables for the layer live. trainable: Whether or not to make the weights trainable. mod_inputs: Whether or not to mod the input by the number of buckets. Raises: ValueError: If the shape of `weights` is not `(num_buckets, num_units_out)`. """ self.set_constructor_args('td.Embedding', *get_local_arguments(Embedding.__init__, True)) self._weights_shape = (num_buckets, num_units_out) if name is None: name = 'Embedding_%d_%d' % self._weights_shape if initializer is None: initializer = tf.uniform_unit_scaling_initializer(1.0) elif isinstance(initializer, np.ndarray): initializer = tf.convert_to_tensor(initializer) if isinstance(initializer, tf.Tensor): initializer.set_shape(self._weights_shape) self._weights_shape = None # otherwise get_variable barfs self._initializer = initializer self._num_buckets = num_buckets self._num_units_out = num_units_out self._trainable = trainable self._mod_inputs = bool(mod_inputs) super(Embedding, self).__init__( output_type=tdt.TensorType([num_units_out]), name_or_scope=name)
def time_error_loss(model_h, model_m, label_h, label_m): """ Compute the time error (in minutes) of the current model. Total time difference is expressed in minutes: 1/N sum( delta(PP, TT)) where PP and TT are the predicted and true times, expressed in number of minutes. The delta operator takes care of 'wraparound', so that the difference between 9'58 and 10'02 is 4 minutes. We also return the individual errors for hours and minutes. Just for fun. :param model_h: :param model_m: :param label_h: :param label_m: :return: losses for (combined, hours, minutes) """ # Take classifier argmax for most likely hour/minute, and cast everything to # float32. hours_predicted = tf.cast(tf.argmax(model_h, 1), tf.float32) hours_true = tf.cast(label_h, tf.float32) minutes_predicted = tf.cast(tf.argmax(model_m, 1), tf.float32) minutes_true = tf.cast(label_m, tf.float32) delta_time = tf.sub(tf.add(60 * hours_predicted, minutes_predicted), tf.add(60 * hours_true, minutes_true)) delta_hours = tf.sub(hours_predicted, hours_true) delta_minutes = tf.sub(minutes_predicted, minutes_true) # TF's mod operator returns negative values: # -7 mod 3 = -1 (we want 2) # so we need to do a little extra work. def positive_mod(val, div): # Return the positive result of the modulo operator. # Does x = ((v % div) + div) % div return tf.mod(tf.add(tf.mod(val, div), div), div) # Handle time wrapping around by comparing the mod of the positive and # negative time differences. time_error_c = tf.minimum(positive_mod(delta_time, 720), positive_mod(-1 * delta_time, 720)) time_error_h = tf.minimum(positive_mod(delta_hours, 12.0), positive_mod(-1 * delta_hours, 12.0)) time_error_m = tf.minimum(positive_mod(delta_minutes, 60.0), positive_mod(-1 * delta_minutes, 60.0)) avg_error_c = tf.reduce_mean(time_error_c) avg_error_h = tf.reduce_mean(time_error_h) avg_error_m = tf.reduce_mean(time_error_m) return avg_error_c, avg_error_h, avg_error_m
def tfidf(x, vocab_size, smooth=True, name=None): """Maps the terms in x to their term frequency * inverse document frequency. The inverse document frequency of a term is calculated as 1+ log((corpus size + 1) / (document frequency of term + 1)) by default. Example usage: example strings [["I", "like", "pie", "pie", "pie"], ["yum", "yum", "pie]] in: SparseTensor(indices=[[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [1, 0], [1, 1], [1, 2]], values=[1, 2, 0, 0, 0, 3, 3, 0]) out: SparseTensor(indices=[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]], values=[1, 2, 0, 3, 0]) SparseTensor(indices=[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]], values=[(1/5)*(log(3/2)+1), (1/5)*(log(3/2)+1), (1/5), (1/3), (2/3)*(log(3/2)+1]) NOTE that the first doc's duplicate "pie" strings have been combined to one output, as have the second doc's duplicate "yum" strings. Args: x: A `SparseTensor` representing int64 values (most likely that are the result of calling string_to_int on a tokenized string). vocab_size: An int - the count of vocab used to turn the string into int64s including any OOV buckets. smooth: A bool indicating if the inverse document frequency should be smoothed. If True, which is the default, then the idf is calculated as 1 + log((corpus size + 1) / (document frequency of term + 1)). Otherwise, the idf is 1 +log((corpus size) / (document frequency of term)), which could result in a divizion by zero error. name: (Optional) A name for this operation. Returns: Two `SparseTensor`s with indices [index_in_batch, index_in_bag_of_words]. The first has values vocab_index, which is taken from input `x`. The second has values tfidf_weight. """ def _to_vocab_range(x): """Enforces that the vocab_ids in x are positive.""" return tf.SparseTensor( indices=x.indices, values=tf.mod(x.values, vocab_size), dense_shape=x.dense_shape) with tf.name_scope(name, 'tfidf'): cleaned_input = _to_vocab_range(x) term_frequencies = _to_term_frequency(cleaned_input, vocab_size) count_docs_with_term_column = _count_docs_with_term(term_frequencies) # Expand dims to get around the min_tensor_rank checks sizes = tf.expand_dims(tf.shape(cleaned_input)[0], 0) # [batch, vocab] - tfidf tfidfs = _to_tfidf(term_frequencies, analyzers.sum(count_docs_with_term_column, reduce_instance_dims=False), analyzers.sum(sizes), smooth) return _split_tfidfs_to_outputs(tfidfs)
def sample(self, logits, log_probs, prev_finished, time): """ sample based on logits. :param logits: [_batch_size * beam_size, vocab.vocab_size] :param log_probs: [_batch_size * beam_size,], log_probs of current decoded sequence. :param prev_finished: [_batch_size * beam_size,], indicate each beam is finished or not. :param time: :return: """ # [_batch_size * beam_size, target_vocab_size] probs = tf.nn.log_softmax(logits) mask_tensor = [tf.float32.max] * self.vocab_size mask_tensor[self.eos_id] = -1. mask_tensor = tf.expand_dims(tf.constant(mask_tensor, dtype=tf.float32), 0) mask_probs = (tf.expand_dims(tf.to_float(prev_finished), 1) * mask_tensor + 1.) * probs # [_batch_size * beam_size, target_vocab_size] log_probs = mask_probs + tf.expand_dims(log_probs, 1) log_probs = tf.reshape(tf.reshape(log_probs, [-1]), [self._batch_size, -1]) # flatten log_probs_flat = tf.cond( tf.convert_to_tensor(time) > 0, lambda: log_probs, lambda: tf.slice(log_probs, [0, 0], [-1, self.vocab_size])) next_log_probs, word_ids = tf.nn.top_k(log_probs_flat, k=self.beam_size) next_log_probs = tf.reshape(next_log_probs, [-1]) word_ids = tf.reshape(word_ids, [-1]) sample_ids = tf.mod(word_ids, self.vocab_size) # beam ids should be adjusted according to _batch_size beam_add = tf.tile([tf.range(self._batch_size)], [self.beam_size, 1]) * self.beam_size beam_ids = tf.div(word_ids, self.vocab_size) \ + tf.reshape(tf.transpose(beam_add), [-1]) return sample_ids, beam_ids, next_log_probs
def extract_features(inputs, k_idxs, map_h): """Extract top k fine features NOTE. do not use tf.image.extract_glimpse ops to get input patches (cf. https://github.com/tensorflow/tensorflow/issues/2134) """ def _extract_feature(inputs, idxs): idxs = tf.expand_dims(idxs,1) idx_i = tf.floordiv(idxs, map_h) idx_j = tf.mod(idxs, map_h) # NOTE: the below origins are starting points, not center! origin_i = 2*(2*idx_i+1)+3 - 5 + 2 origin_j = 2*(2*idx_j+1)+3 - 5 + 2 origin_centers = tf.concat(1,[origin_i,origin_j]) # NOTE: size also depends on the architecture #patches = tf.image.extract_glimpse(inputs, size=[14,14], offsets=origin_centers, # centered=False, normalized=False) patches = extract_patches(inputs, size=[14,14], offsets=origin_centers) #fine_features = fine_layers(patches) fine_features = [] src_idxs = tf.concat(1,[idx_i,idx_j]) return fine_features, src_idxs, patches k_features = [] k_src_idxs = [] k_patches = [] for i in xrange(N_PATCHES): fine_feature, src_idx, patches = _extract_feature(inputs,k_idxs[:,i]) k_features.append(fine_feature) k_src_idxs.append(src_idx) k_patches.append(patches) concat_patches = tf.concat(0,k_patches) concat_k_features = fine_layers(concat_patches) k_features = tf.split(0,N_PATCHES,concat_k_features) return k_features, k_src_idxs, k_patches
def get_timing_signal_1d(length, channels, min_timescale=1.0, max_timescale=1.0e4): """Gets a bunch of sinusoids of different frequencies. Each channel of the input Tensor is incremented by a sinusoid of a different frequency and phase. This allows attention to learn to use absolute and relative positions. Timing signals should be added to some precursors of both the query and the memory inputs to attention. The use of relative position is possible because sin(x+y) and cos(x+y) can be experessed in terms of y, sin(x) and cos(x). In particular, we use a geometric sequence of timescales starting with min_timescale and ending with max_timescale. The number of different timescales is equal to channels / 2. For each timescale, we generate the two sinusoidal signals sin(timestep/timescale) and cos(timestep/timescale). All of these sinusoids are concatenated in the channels dimension. Args: length: scalar, length of timing signal sequence. channels: scalar, size of timing embeddings to create. The number of different timescales is equal to channels / 2. min_timescale: a float max_timescale: a float Returns: a Tensor of timing signals [1, length, channels] """ position = tf.to_float(tf.range(length)) num_timescales = channels // 2 log_timescale_increment = ( math.log(float(max_timescale) / float(min_timescale)) / (tf.to_float(num_timescales) - 1)) inv_timescales = min_timescale * tf.exp( tf.to_float(tf.range(num_timescales)) * -log_timescale_increment) scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, 0) signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1) signal = tf.pad(signal, [[0, 0], [0, tf.mod(channels, 2)]]) signal = tf.reshape(signal, [1, length, channels]) return signal
def fast_dlstm(self, s_t, state_in, lstm, chunks, h_size): def get_sub_state(state, state_step): c, h = state chunk_step_size = h_size // chunks h_step = state_step * chunk_step_size sub_state_h = h[:, h_step: h_step + chunk_step_size] sub_state_c = c[:, h_step: h_step + chunk_step_size] sub_state_h.set_shape([1, chunk_step_size]) sub_state_c.set_shape([1, chunk_step_size]) sub_state = tf.contrib.rnn.LSTMStateTuple(sub_state_c, sub_state_h) return sub_state def build_new_state(new_sub_state, previous_state, state_step): c_previous_state, h_previous_state = previous_state c_new_sub_state, h_new_sub_state = new_sub_state h_slices = [] c_slices = [] chunk_step_size = h_size // chunks one_hot_state_step = tf.one_hot(state_step, depth=chunks) for switch_step, h_step in zip(range(chunks), range(0, h_size, chunk_step_size)): is_this_current_step = one_hot_state_step[switch_step] h_s = self.conditional_sub_state(is_this_current_step, h_new_sub_state, h_previous_state[:, h_step: h_step + chunk_step_size]) h_s.set_shape([1, chunk_step_size]) c_s = self.conditional_sub_state(is_this_current_step, c_new_sub_state, c_previous_state[:, h_step: h_step + chunk_step_size]) c_s.set_shape([1, chunk_step_size]) h_slices.append(h_s) c_slices.append(c_s) h_new_state = tf.concat(h_slices, axis=1) c_new_state = tf.concat(c_slices, axis=1) new_state = tf.contrib.rnn.LSTMStateTuple(c_new_state, h_new_state) return new_state def dlstm_scan_fn(previous_output, current_input): # out, state_out = lstm(current_input, previous_output[1]) state_step = previous_output[2] sub_state = get_sub_state(previous_output[1], state_step) out, sub_state_out = lstm(current_input, sub_state) state_out = build_new_state(sub_state_out, previous_output[1], state_step) state_step += tf.constant(1) new_state_step = tf.mod(state_step, chunks) return out, state_out, new_state_step chunk_step_size = h_size // chunks first_input = state_in.c[:, 0: chunk_step_size] rnn_outputs, final_states, mod_idxs = tf.scan(dlstm_scan_fn, tf.transpose(s_t, [1, 0, 2]), initializer=( first_input, state_in, tf.constant(0)), name="dlstm") return rnn_outputs, final_states
def test_copy_from_works_with_control_flow(self): def graph_fn1(mode, x): return plx.layers.Dense(units=1)(x) def graph_fn2(mode, x): return plx.layers.Dense(units=1, trainable=False)(x) l1 = plx.FunctionModule(mode=plx.Modes.TRAIN, build_fn=graph_fn1, name='fn1') l2 = plx.FunctionModule(mode=plx.Modes.TRAIN, build_fn=graph_fn2, name='fn2') x = tf.placeholder(dtype=tf.float32, shape=[1, 1]) lx1 = l1(x) lx2 = l2(x) init_all_op = tf.global_variables_initializer() def copy(): # note that we need to put this copy_op in a function otherwise it will always # be evaluate no matter what the condition return l2.copy_from(l1, tf.GraphKeys.GLOBAL_VARIABLES) a = tf.placeholder(tf.int32, ()) cond = tf.cond(tf.equal(tf.mod(a, 5), 0), copy, lambda: tf.no_op()) assign_op = l1.get_variables()[0].assign_add([[1]]) group_op = tf.group(*[assign_op, cond]) with self.test_session() as sess: sess.run(init_all_op) # Check that initially they have different values lx1_results = lx1.eval({x: [[1]]}) lx2_results = lx2.eval({x: [[1]]}) assert lx1_results[0] != lx2_results[0] # Set condition to True 10 % 5 == 0 sess.run(cond, feed_dict={a: 10}) lx1_results = lx1.eval({x: [[1]]}) lx2_results = lx2.eval({x: [[1]]}) assert lx1_results[0] == lx2_results[0] # Assign and Set condition to False 2 % 5 != 0 sess.run(group_op, feed_dict={a: 2}) lx1_results = lx1.eval({x: [[1]]}) lx2_results = lx2.eval({x: [[1]]}) assert lx1_results[0] != lx2_results[0]