我们从Python开源项目中,提取了以下32个代码示例,用于说明如何使用tensorflow.scatter_update()。
def _generate_labels(self, overlaps): labels = tf.Variable(tf.ones(shape=(tf.shape(overlaps)[0],), dtype=tf.float32) * -1, trainable=False, validate_shape=False) gt_max_overlaps = tf.arg_max(overlaps, dimension=0) anchor_max_overlaps = tf.arg_max(overlaps, dimension=1) mask = tf.one_hot(anchor_max_overlaps, tf.shape(overlaps)[1], on_value=True, off_value=False) max_overlaps = tf.boolean_mask(overlaps, mask) if self._debug: max_overlaps = tf.Print(max_overlaps, [max_overlaps]) labels = tf.scatter_update(labels, gt_max_overlaps, tf.ones((tf.shape(gt_max_overlaps)[0],))) # TODO: extract config object over_threshold_mask = tf.reshape(tf.where(max_overlaps > 0.5), (-1,)) if self._debug: over_threshold_mask = tf.Print(over_threshold_mask, [over_threshold_mask], message='over threshold index : ') labels = tf.scatter_update(labels, over_threshold_mask, tf.ones((tf.shape(over_threshold_mask)[0],))) # TODO: support clobber positive in the origin implement below_threshold_mask = tf.reshape(tf.where(max_overlaps < 0.3), (-1,)) if self._debug: below_threshold_mask = tf.Print(below_threshold_mask, [below_threshold_mask], message='below threshold index : ') labels = tf.scatter_update(labels, below_threshold_mask, tf.zeros((tf.shape(below_threshold_mask)[0],))) return labels
def test_scatter_nd_2(): gt_bboxes = tf.constant([[0,0,1,2],[1,0,3,4],[100,100,105,102.5]]) gt_labels = tf.constant([1,2,6]) gt_anchors_labels = tf.Variable([100,100,100,100], trainable=False,collections=[ops.GraphKeys.LOCAL_VARIABLES]) gt_anchors_bboxes=tf.Variable([[100,100,105,105],[2,1,3,3.5],[0,0,10,10],[0.5,0.5,0.8,1.5]], trainable=False,collections=[ops.GraphKeys.LOCAL_VARIABLES],dtype=tf.float32) max_inds = [1,0,3] gt_anchors_labels = tf.scatter_update(gt_anchors_labels, max_inds,gt_labels) gt_anchors_bboxes = tf.scatter_update(gt_anchors_bboxes, max_inds,gt_bboxes) return gt_anchors_labels,gt_anchors_bboxes
def make_update_op(self, upd_idxs, upd_keys, upd_vals, batch_size, use_recent_idx, intended_output): """Function that creates all the update ops.""" mem_age_incr = self.mem_age.assign_add(tf.ones([self.memory_size], dtype=tf.float32)) with tf.control_dependencies([mem_age_incr]): mem_age_upd = tf.scatter_update( self.mem_age, upd_idxs, tf.zeros([batch_size], dtype=tf.float32)) mem_key_upd = tf.scatter_update( self.mem_keys, upd_idxs, upd_keys) mem_val_upd = tf.scatter_update( self.mem_vals, upd_idxs, upd_vals) if use_recent_idx: recent_idx_upd = tf.scatter_update( self.recent_idx, intended_output, upd_idxs) else: recent_idx_upd = tf.group() return tf.group(mem_age_upd, mem_key_upd, mem_val_upd, recent_idx_upd)
def insert(self, ids, scores): """Insert the ids and scores into the TopN.""" with tf.control_dependencies(self.last_ops): scatter_op = tf.scatter_update(self.id_to_score, ids, scores) larger_scores = tf.greater(scores, self.sl_scores[0]) def shortlist_insert(): larger_ids = tf.boolean_mask(tf.to_int64(ids), larger_scores) larger_score_values = tf.boolean_mask(scores, larger_scores) shortlist_ids, new_ids, new_scores = self.ops.top_n_insert( self.sl_ids, self.sl_scores, larger_ids, larger_score_values) u1 = tf.scatter_update(self.sl_ids, shortlist_ids, new_ids) u2 = tf.scatter_update(self.sl_scores, shortlist_ids, new_scores) return tf.group(u1, u2) # We only need to insert into the shortlist if there are any # scores larger than the threshold. cond_op = tf.cond( tf.reduce_any(larger_scores), shortlist_insert, tf.no_op) with tf.control_dependencies([cond_op]): self.last_ops = [scatter_op, cond_op]
def remove(self, ids): """Remove the ids (and their associated scores) from the TopN.""" with tf.control_dependencies(self.last_ops): scatter_op = tf.scatter_update( self.id_to_score, ids, tf.ones_like( ids, dtype=tf.float32) * tf.float32.min) # We assume that removed ids are almost always in the shortlist, # so it makes no sense to hide the Op behind a tf.cond shortlist_ids_to_remove, new_length = self.ops.top_n_remove(self.sl_ids, ids) u1 = tf.scatter_update( self.sl_ids, tf.concat(0, [[0], shortlist_ids_to_remove]), tf.concat(0, [new_length, tf.ones_like(shortlist_ids_to_remove) * -1])) u2 = tf.scatter_update( self.sl_scores, shortlist_ids_to_remove, tf.float32.min * tf.ones_like( shortlist_ids_to_remove, dtype=tf.float32)) self.last_ops = [scatter_op, u1, u2]
def scatter_update(cls, factor, indices, values, sharding_func): """Helper function for doing sharded scatter update.""" assert isinstance(factor, list) if len(factor) == 1: with ops.colocate_with(factor[0]): # TODO(agarwal): assign instead of scatter update for full batch update. return tf.scatter_update(factor[0], indices, values).op else: num_shards = len(factor) assignments, new_ids = sharding_func(indices) assert assignments is not None assignments = tf.cast(assignments, tf.int32) sharded_ids = tf.dynamic_partition(new_ids, assignments, num_shards) sharded_values = tf.dynamic_partition(values, assignments, num_shards) updates = [] for i in xrange(num_shards): updates.append(tf.scatter_update(factor[i], sharded_ids[i], sharded_values[i])) return tf.group(*updates)
def _sparse_moving_average(self, x_tm1, idxs, a_t_, name, beta=.9): """""" b_tm1 = self.get_accumulator(x_tm1, '%s' % name) b_tm1_ = tf.gather(b_tm1, idxs) shape = self.get_variable_shape(x_tm1) tm1 = self.get_accumulator(x_tm1, '%s/tm1' % name, shape=[shape[0]]+[1]*(len(shape)-1)) tm1_ = tf.gather(tm1, idxs) t = tf.scatter_add(tm1, idxs, tf.ones_like(tm1_)) t_ = tf.gather(t, idxs) if beta < 1: beta_t = tf.convert_to_tensor(beta, name='%s/decay' % name) beta_t_ = beta_t * (1-beta_t**tm1_) / (1-beta_t**t_) else: beta_t_ = tm1_/t_ b_t = tf.scatter_update(b_tm1, idxs, beta_t_*b_tm1_) b_t = tf.scatter_add(b_t, idxs, (1-beta_t_)*a_t_) return b_t, t #=============================================================
def curvature_range(self): # set up the curvature window self._curv_win = \ tf.Variable(np.zeros( [self._curv_win_width, ] ), dtype=tf.float32, name="curv_win", trainable=False) self._curv_win = tf.scatter_update(self._curv_win, self._global_step % self._curv_win_width, self._grad_norm_squared) # note here the iterations start from iteration 0 valid_window = tf.slice(self._curv_win, tf.constant( [0, ] ), tf.expand_dims(tf.minimum(tf.constant(self._curv_win_width), self._global_step + 1), dim=0) ) self._h_min_t = tf.reduce_min(valid_window) self._h_max_t = tf.reduce_max(valid_window) curv_range_ops = [] with tf.control_dependencies([self._h_min_t, self._h_max_t] ): avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t] ) with tf.control_dependencies([avg_op] ): self._h_min = tf.identity(self._moving_averager.average(self._h_min_t) ) self._h_max = tf.identity(self._moving_averager.average(self._h_max_t) ) curv_range_ops.append(avg_op) return curv_range_ops
def get_mu_tensor(self): const_fact = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var coef = tf.Variable([-1.0, 3.0, 0.0, 1.0], dtype=tf.float32, name="cubic_solver_coef") coef = tf.scatter_update(coef, tf.constant(2), -(3 + const_fact) ) roots = tf.py_func(np.roots, [coef], Tout=tf.complex64, stateful=False) # filter out the correct root root_idx = tf.logical_and(tf.logical_and(tf.greater(tf.real(roots), tf.constant(0.0) ), tf.less(tf.real(roots), tf.constant(1.0) ) ), tf.less(tf.abs(tf.imag(roots) ), 1e-5) ) # in case there are two duplicated roots satisfying the above condition root = tf.reshape(tf.gather(tf.gather(roots, tf.where(root_idx) ), tf.constant(0) ), shape=[] ) tf.assert_equal(tf.size(root), tf.constant(1) ) dr = self._h_max / self._h_min mu = tf.maximum(tf.real(root)**2, ( (tf.sqrt(dr) - 1)/(tf.sqrt(dr) + 1) )**2) return mu
def _thin_stack_update_gradient(op, stack_grad, *rest): stack = op.inputs[2] batch_size = op.inputs[4].get_shape().as_list()[0] t = op.get_attr("timestep") # We usually slice off the head of the stack output in feedforward and # send it off to downstream computation. The Slice feedforward op will # generate a sparse gradient in the backward pass. Nix this sparsity # at the very start. if isinstance(stack_grad, ops.IndexedSlices): # Trick: re-use our stack structure to store new gradients. # Recover the original stack variable from the lookup/update chain. stack = _fetch_stack(stack) stack = tf.assign(stack, tf.zeros_like(stack)) stack = tf.scatter_update(stack, stack_grad.indices, stack_grad.values) stack_grad = stack with tf.control_dependencies([stack_grad]): input_grad = tf.slice(stack_grad, [t * batch_size, 0], [batch_size, -1]) return input_grad, None, stack_grad, None, None, None
def test_scatter_update(): a = tf.Variable(initial_value=[2, 5, -4, 0]) b = tf.scatter_update(a, [2,2], [9,100]) return b
def _curvature_range(self): """Curvature range. Returns: h_max_t, h_min_t ops """ self._curv_win = tf.get_variable("curv_win", dtype=tf.float32, trainable=False, shape=[self.curvature_window_width, ], initializer=tf.zeros_initializer) # We use log smoothing for curvature range self._curv_win = tf.scatter_update(self._curv_win, self._step % self.curvature_window_width, tf.log(self._grad_norm_squared)) # Note here the iterations start from iteration 0 valid_window = tf.slice(self._curv_win, tf.constant([0, ]), tf.expand_dims( tf.minimum( tf.constant( self.curvature_window_width), self._step + 1), dim=0)) self._h_min_t = tf.reduce_min(valid_window) self._h_max_t = tf.reduce_max(valid_window) curv_range_ops = [] with tf.control_dependencies([self._h_min_t, self._h_max_t]): avg_op = self._moving_averager.apply( [self._h_min_t, self._h_max_t]) with tf.control_dependencies([avg_op]): self._h_min = tf.exp( tf.identity(self._moving_averager.average(self._h_min_t))) self._h_max = tf.exp( tf.identity(self._moving_averager.average(self._h_max_t))) if self._sparsity_debias: self._h_min *= self._sparsity_avg self._h_max *= self._sparsity_avg curv_range_ops.append(avg_op) return curv_range_ops # h_max_t, h_min_t
def _apply_sparse(self, grad, var): beta1_power = tf.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = tf.cast(self._beta2_power, var.dtype.base_dtype) lr_t = tf.cast(self._lr_t, var.dtype.base_dtype) beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = tf.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * tf.sqrt(1 - beta2_power) / (1 - beta1_power)) # m := beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_t = tf.scatter_update(m, grad.indices, beta1_t * tf.gather(m, grad.indices) + (1 - beta1_t) * grad.values, use_locking=self._use_locking) # v := beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_t = tf.scatter_update(v, grad.indices, beta2_t * tf.gather(v, grad.indices) + (1 - beta2_t) * tf.square(grad.values), use_locking=self._use_locking) # variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t)) m_t_slice = tf.gather(m_t, grad.indices) v_t_slice = tf.gather(v_t, grad.indices) denominator_slice = tf.sqrt(v_t_slice) + epsilon_t var_update = tf.scatter_sub(var, grad.indices, lr * m_t_slice / denominator_slice, use_locking=self._use_locking) return tf.group(var_update, m_t, v_t)
def get_state_update_op(state_variables, new_states): """Returns an operation to update an LSTM's state variables. See get_state_variables() for more info. Args: state_variables (tuple[tf.contrib.rnn.LSTMStateTuple]): The LSTM's state variables. new_states (tuple[tf.contrib.rnn.LSTMStateTuple]): The new values for the state variables. new_states may have state tuples with state sizes < max_batch_size. Then, only the first rows of the corresponding state variables will be updated. Returns: tf.Operation: An operation that updates the LSTM's. """ # Add an operation to update the train states with the last state tensors. update_ops = [] for state_variable, new_state in zip(state_variables, new_states): # new_state[0] might be smaller than state_variable[0], because state_variable[0] # contains max_batch_size entries. # Get the update indices for both states in the tuple update_indices = (tf.range(0, tf.shape(new_state[0])[0]), tf.range(0, tf.shape(new_state[1])[0])) update_ops.extend([ tf.scatter_update(state_variable[0], update_indices[0], new_state[0]), tf.scatter_update(state_variable[1], update_indices[1], new_state[1]) ]) return tf.tuple(update_ops)
def test_state_grads(): with tf.Session() as sess: v = tf.Variable([0., 0., 0.]) x = tf.ones((3,)) y0 = tf.assign(v, x) y1 = tf.assign_add(v, x) grad0 = tf.gradients(y0, [v, x]) grad1 = tf.gradients(y1, [v, x]) grad_vals = sess.run((grad0, grad1)) assert np.allclose(grad_vals[0][0], 0) assert np.allclose(grad_vals[0][1], 1) assert np.allclose(grad_vals[1][0], 1) assert np.allclose(grad_vals[1][1], 1) with tf.Session() as sess: v = tf.Variable([0., 0., 0.]) x = tf.ones((1,)) y0 = tf.scatter_update(v, [0], x) y1 = tf.scatter_add(v, [0], x) grad0 = tf.gradients(y0, [v._ref(), x]) grad1 = tf.gradients(y1, [v._ref(), x]) grad_vals = sess.run((grad0, grad1)) assert np.allclose(grad_vals[0][0], [0, 1, 1]) assert np.allclose(grad_vals[0][1], 1) assert np.allclose(grad_vals[1][0], 1) assert np.allclose(grad_vals[1][1], 1)
def _scatter_f_var(self, dst, src, mode="update"): # create a temporary variable for dst so that we can use the sparse # variable updates. despite this looking incredibly inefficient, it is # actually faster than the scatter_nd approach # from tensorflow.python.ops import gen_state_ops # var = gen_state_ops._temporary_variable( # self.bases[dst.key].get_shape(), self.bases[dst.key].dtype) # var_name = var.op.name # var = tf.assign(var, self.bases[dst.key]) var = self.bases[dst.key] if (dst.as_slice is not None and var.get_shape().is_compatible_with(src.get_shape()) and dst.indices[0] == 0 and dst.indices[-1] == var.get_shape()[0].value - 1 and len(dst.indices) == var.get_shape()[0]): if mode == "inc": result = tf.assign_add(var, src, use_locking=False) else: result = tf.assign(var, src, use_locking=False) elif mode == "inc": result = tf.scatter_add(var, dst.tf_indices, src, use_locking=False) else: result = tf.scatter_update(var, dst.tf_indices, src, use_locking=False) # result = gen_state_ops._destroy_temporary_variable(var, var_name) return result
def combine_messages(self, forward_messages, backward_messages, self_loop_messages, previous_code, mode='train'): mtr_f = self.get_graph().forward_incidence_matrix(normalization=('none', 'recalculated')) mtr_b = self.get_graph().backward_incidence_matrix(normalization=('none', 'recalculated')) if mode == 'train': forward_messages_comp = forward_messages - tf.nn.embedding_lookup(self.cached_messages_f, self.I) backward_messages_comp = backward_messages - tf.nn.embedding_lookup(self.cached_messages_b, self.I) with tf.control_dependencies([forward_messages, backward_messages]): self.f_upd = tf.scatter_update(self.cached_messages_f, self.I, forward_messages) self.b_upd = tf.scatter_update(self.cached_messages_b, self.I, backward_messages) collected_messages_f = tf.sparse_tensor_dense_matmul(mtr_f, forward_messages_comp) collected_messages_b = tf.sparse_tensor_dense_matmul(mtr_b, backward_messages_comp) new_embedding = collected_messages_f + collected_messages_b updated_vertex_embeddings = new_embedding + self.cached_vertex_embeddings with tf.control_dependencies([updated_vertex_embeddings]): self.v_upd = tf.assign(self.cached_vertex_embeddings, updated_vertex_embeddings) else: collected_messages_f = tf.sparse_tensor_dense_matmul(mtr_f, forward_messages) collected_messages_b = tf.sparse_tensor_dense_matmul(mtr_b, backward_messages) new_embedding = collected_messages_f + collected_messages_b updated_vertex_embeddings = new_embedding if self.use_nonlinearity: activated = tf.nn.relu(updated_vertex_embeddings + self_loop_messages) else: activated = updated_vertex_embeddings + self_loop_messages return activated
def floaty_scatter_update(ref, indices, updates, **kwargs): return tf.scatter_update(ref, tf.to_int32(indices), updates, **kwargs)
def __create_embedding_ops(self, last_hidden): if self.n_embeddings > 0: # Preallocate memory to save embeddings self.embedding_var = tf.Variable(tf.zeros([self.n_embeddings, self.layers_size[-2]]), name='representation') self.next_embedding = tf.Variable(tf.zeros([1], dtype=tf.int32), name="next_embedding_counter") self.save_embedding_op = tf.scatter_update(self.embedding_var, self.next_embedding, last_hidden) self.increment_next_embedding_op = self.next_embedding.assign_add(tf.constant([1])) self.embeddings_saver = tf.train.Saver([self.embedding_var])
def update_diff(self, accuracy, batch_idxs, batch_losses, batch_plens, loss_w=0.5, smooth_w=0.5): with tf.control_dependencies( [tf.assign(self.acc_coef, accuracy)] ): current_entropy = tf.gather(self.seq_entropy, batch_idxs) loss_coef = batch_losses / (tf.reduce_max(batch_losses) + 1e-8) new_entropy = (loss_coef * loss_w) + (batch_plens / self.max_plen * (1 - loss_w)) updated_entropy = (current_entropy * smooth_w) + (new_entropy * (1 - smooth_w)) update_op = tf.scatter_update(self.seq_entropy, batch_idxs, updated_entropy) return update_op
def batch_norm_layer_in_time(x, max_length, step, is_training, epsilon=1e-3, decay=0.99, scope="layer"): '''Assume 2d [batch, values] 3d [batch, width, values] or 4d [batch, width, height, values] tensor''' with tf.variable_scope('bn_'+scope): dim_x = len(x.get_shape().as_list()) size = x.get_shape().as_list()[dim_x-1] step_idcs = tf.range(step*size, (step+1)*size) scale_var = tf.get_variable('scale', [size * max_length], initializer=tf.constant_initializer(0.1)) scale = tf.gather(scale_var, step_idcs) offset_var = tf.get_variable('offset', [size * max_length]) offset = tf.gather(offset_var, step_idcs) pop_mean_var = tf.get_variable('pop_mean', [size * max_length], initializer=tf.zeros_initializer(), trainable=False) pop_mean = tf.gather(pop_mean_var, step_idcs) pop_var_var = tf.get_variable('pop_var', [size * max_length], initializer=tf.ones_initializer(), trainable=False) pop_var = tf.gather(pop_var_var, step_idcs) batch_mean, batch_var = tf.nn.moments(x, [i for i in range(dim_x-1)]) train_mean_op = tf.scatter_update(pop_mean_var, step_idcs, pop_mean * decay + batch_mean * (1 - decay)) train_var_op = tf.scatter_update(pop_var_var, step_idcs, pop_var * decay + batch_var * (1 - decay)) def batch_statistics(): with tf.control_dependencies([train_mean_op, train_var_op]): return tf.nn.batch_normalization(x, batch_mean, batch_var, offset, scale, epsilon) def population_statistics(): return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale, epsilon) if is_training: return batch_statistics() else: return population_statistics()
def _curvature_range(self): """Curvature range. Returns: h_max_t, h_min_t ops """ self._curv_win = tf.get_variable("curv_win", dtype=tf.float32, trainable=False, shape=[self.curvature_window_width,], initializer=tf.zeros_initializer) # We use log smoothing for curvature range self._curv_win = tf.scatter_update(self._curv_win, self._step % self.curvature_window_width, tf.log(self._grad_norm_squared)) # Note here the iterations start from iteration 0 valid_window = tf.slice(self._curv_win, tf.constant([0,]), tf.expand_dims( tf.minimum( tf.constant(self.curvature_window_width), self._step + 1), dim=0)) self._h_min_t = tf.reduce_min(valid_window) self._h_max_t = tf.reduce_max(valid_window) curv_range_ops = [] with tf.control_dependencies([self._h_min_t, self._h_max_t]): avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t]) with tf.control_dependencies([avg_op]): self._h_min = tf.exp( tf.identity(self._moving_averager.average(self._h_min_t))) self._h_max = tf.exp( tf.identity(self._moving_averager.average(self._h_max_t))) if self._sparsity_debias: self._h_min *= self._sparsity_avg self._h_max *= self._sparsity_avg curv_range_ops.append(avg_op) return curv_range_ops # h_max_t, h_min_t
def run(self, x, eta, idx_center=None, idx_sample=None): h = [None] * self.num_layer embeddings = [] reg_ops = [] reset_ops = [] clustering_ops = [] with tf.variable_scope(self.scope): for ii in xrange(self.num_layer): with tf.variable_scope('layer_{}'.format(ii)): if ii == 0: input_vec = x else: input_vec = h[ii - 1] h[ii] = tf.matmul(input_vec, self.w[ii]) if self.add_bias: h[ii] += self.b[ii] if self.clustering_shape[ii] is not None: embedding = h[ii] embeddings += [embedding] clustering_ops += [kmeans_clustering(embedding, self.cluster_center[ ii], self.cluster_label[ii], self.num_cluster[ii], eta)] sample_center = tf.stop_gradient( tf.gather(self.cluster_center[ii], self.cluster_label[ii])) reg_ops += [tf.reduce_mean( tf.square(embedding - sample_center)) * self.alpha[ii] / 2.0] reset_ops += [tf.scatter_update(self.cluster_center[ii], idx_center[ ii], tf.gather(h[ii], idx_sample[ii]))] if self.act_func and self.act_func[ii] is not None: h[ii] = self.act_func[ii](h[ii]) return h, embeddings, clustering_ops, reg_ops, reset_ops
def curvature_range(self): # set up the curvature window self._curv_win = tf.Variable( np.zeros([self._curv_win_width, ]), dtype=tf.float32, name="curv_win", trainable=False) # we can use log smoothing for curvature range to follow trend faster # self._curv_win = tf.scatter_update( # self._curv_win, self._global_step % self._curv_win_width, # tf.log(self._grad_norm_squared + EPS)) self._curv_win = tf.scatter_update( self._curv_win, self._global_step % self._curv_win_width, self._grad_norm_squared + EPS) # note here the iterations start from iteration 0 valid_window = tf.slice( self._curv_win, tf.constant([0, ]), tf.expand_dims( tf.minimum(tf.constant(self._curv_win_width), self._global_step + 1), dim=0)) if self._h_min_log_smooth: self._h_min_t = tf.log(tf.reduce_min(valid_window) + EPS) else: self._h_min_t = tf.reduce_min(valid_window) if self._h_max_log_smooth: self._h_max_t = tf.log(tf.reduce_max(valid_window) + EPS) else: self._h_max_t = tf.reduce_max(valid_window) curv_range_ops = [] with tf.control_dependencies([self._h_min_t, self._h_max_t] ): avg_op = self._moving_averager.apply( [self._h_min_t, self._h_max_t]) with tf.control_dependencies([avg_op]): if self._h_min_log_smooth: self._h_min = tf.exp( tf.identity(self._moving_averager.average(self._h_min_t))) else: self._h_min = \ tf.identity(self._moving_averager.average(self._h_min_t)) if self._h_max_log_smooth: self._h_max = tf.exp( tf.identity(self._moving_averager.average(self._h_max_t))) else: self._h_max = \ tf.identity(self._moving_averager.average(self._h_max_t)) if self._sparsity_debias: self._h_min = self._h_min * self._sparsity_avg self._h_max = self._h_max * self._sparsity_avg curv_range_ops.append(avg_op) return curv_range_ops
def cnn_sen_enc(word_vocab_size, word_embed_size=50, batch_size=20, num_highway_layers=2, max_sen_length=65, kernels = [ 1, 2, 3, 4, 5, 6, 7], kernel_features = [50, 100, 150, 200, 200, 200, 200], max_doc_length=35, pretrained=None): # cnn sentence encoder assert len(kernels) == len(kernel_features), 'Kernel and Features must have the same size' input_ = tf.placeholder(tf.int32, shape=[batch_size, max_doc_length, max_sen_length], name="input") ''' First, embed words to sentence ''' with tf.variable_scope('embedding'): if pretrained is not None: word_embedding = tf.get_variable(name='word_embedding', shape=[word_vocab_size, word_embed_size], initializer=tf.constant_initializer(pretrained)) else: word_embedding = tf.get_variable(name='word_embedding', shape=[word_vocab_size, word_embed_size]) ''' this op clears embedding vector of first symbol (symbol at position 0, which is by convention the position of the padding symbol). It can be used to mimic Torch7 embedding operator that keeps padding mapped to zero embedding vector and ignores gradient updates. For that do the following in TF: 1. after parameter initialization, apply this op to zero out padding embedding vector 2. after each gradient update, apply this op to keep padding at zero''' clear_word_embedding_padding = tf.scatter_update(word_embedding, [0], tf.constant(0.0, shape=[1, word_embed_size])) # [batch_size, max_doc_length, max_sen_length, word_embed_size] input_embedded = tf.nn.embedding_lookup(word_embedding, input_) input_embedded = tf.reshape(input_embedded, [-1, max_sen_length, word_embed_size]) ''' Second, apply convolutions ''' # [batch_size x max_doc_length, cnn_size] # where cnn_size=sum(kernel_features) input_cnn = tdnn(input_embedded, kernels, kernel_features) ''' Maybe apply Highway ''' if num_highway_layers > 0: input_cnn = highway(input_cnn, input_cnn.get_shape()[-1], num_layers=num_highway_layers) return adict( input = input_, clear_word_embedding_padding=clear_word_embedding_padding, input_embedded=input_embedded, input_cnn=input_cnn )
def cnn_sen_enc(word_vocab_size, word_embed_size=50, batch_size=20, num_highway_layers=2, max_sen_length=65, kernels = [ 1, 2, 3, 4, 5, 6, 7], kernel_features = [50, 100, 150, 200, 200, 200, 200], max_doc_length=35, pretrained=None): # cnn sentence encoder assert len(kernels) == len(kernel_features), 'Kernel and Features must have the same size' input_ = tf.placeholder(tf.int32, shape=[batch_size, max_doc_length, max_sen_length], name="input") ''' First, embed words to sentence ''' with tf.variable_scope('Embedding'): if pretrained is not None: word_embedding = tf.get_variable(name='word_embedding', shape=[word_vocab_size, word_embed_size], initializer=tf.constant_initializer(pretrained)) else: word_embedding = tf.get_variable(name='word_embedding', shape=[word_vocab_size, word_embed_size]) ''' this op clears embedding vector of first symbol (symbol at position 0, which is by convention the position of the padding symbol). It can be used to mimic Torch7 embedding operator that keeps padding mapped to zero embedding vector and ignores gradient updates. For that do the following in TF: 1. after parameter initialization, apply this op to zero out padding embedding vector 2. after each gradient update, apply this op to keep padding at zero''' clear_word_embedding_padding = tf.scatter_update(word_embedding, [0], tf.constant(0.0, shape=[1, word_embed_size])) # [batch_size, max_doc_length, max_sen_length, word_embed_size] input_embedded = tf.nn.embedding_lookup(word_embedding, input_) input_embedded = tf.reshape(input_embedded, [-1, max_sen_length, word_embed_size]) ''' Second, apply convolutions ''' # [batch_size x max_doc_length, cnn_size] # where cnn_size=sum(kernel_features) input_cnn = tdnn(input_embedded, kernels, kernel_features) ''' Maybe apply Highway ''' if num_highway_layers > 0: input_cnn = highway(input_cnn, input_cnn.get_shape()[-1], num_layers=num_highway_layers) return adict( input = input_, clear_word_embedding_padding=clear_word_embedding_padding, input_embedded=input_embedded, input_cnn=input_cnn )
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Accumulates gradients.""" grad_add_ops = [] if self._count <= self.num_passes - 1: for grad, var in grads_and_vars: if grad is not None: _grad_cache = self.grad_cache[var] if self._method == "cumsum": _div = tf.div(grad, self.num_passes) _add_op = _grad_cache.assign_add(_div) grad_add_ops.append(_add_op) else: _add = tf.expand_dims(grad, 0) _assign_op = tf.scatter_update(_grad_cache, [self._count], _add) grad_add_ops.append(_assign_op) else: if v not in self._grad_cache: self._grad_cache[var] = None else: raise Exception("You cannot call more apply_graidents") grad_add_op = tf.group(*grad_add_ops) if self._count < self.num_passes - 1: final_op = grad_add_op else: zero_out_ops = [] with tf.control_dependencies([grad_add_op]): if self._method == "cumsum": grad_avg = [(tf.identity(gg), var) for var, gg in self._grad_cache.items()] else: grad_avg = [(tf.reduce_mean(gg, [0]), var) for var, gg in self._grad_cache.items()] # Update the weight variables. with tf.control_dependencies([grad_add_op]): weight_update = self.opt.apply_gradients( grad_avg, global_step=global_step, name=name) # Zero out gradient cache. with tf.control_dependencies([weight_update]): for grad, var in grad_avg: _grad_cache = self._grad_cache[var] if _grad_cache is not None: _grad_shape = _grad_cache.get_shape() _zeros = tf.zeros(_grad_shape, dtype=_grad_cache.dtype) _zero_out_grad = _grad_cache.assign(_zeros) zero_out_ops.append(_zero_out_grad) zero_out_op = tf.group(*zero_out_ops) final_op = zero_out_op self._count += 1 return final_op
def run(self, x, eta, idx_center=None, idx_sample=None): """ x must be of size [B H W C] """ h = [None] * self.num_layer embeddings = [] reg_ops = [] reset_ops = [] clustering_ops = [] with tf.variable_scope(self.scope): for ii in xrange(self.num_layer): if ii == 0: input_vec = x else: input_vec = h[ii - 1] h[ii] = tf.nn.conv2d(input_vec, self.w[ii], self.conv_filters[ 'filter_stride'][ii], padding='SAME') if self.add_bias: h[ii] += self.b[ii] if self.clustering_type[ii] == 'sample': embedding = h[ii] elif self.clustering_type[ii] == 'spatial': embedding = h[ii] elif self.clustering_type[ii] == 'channel': embedding = tf.transpose(h[ii], [0, 3, 1, 2]) if self.clustering_shape[ii] is not None: embedding = tf.reshape( embedding, [-1, self.clustering_shape[ii][1]]) embeddings += [embedding] clustering_ops += [kmeans_clustering(embedding, self.cluster_center[ ii], self.cluster_label[ii], self.num_cluster[ii], eta)] sample_center = tf.stop_gradient( tf.gather(self.cluster_center[ii], self.cluster_label[ii])) reg_ops += [tf.reduce_mean(tf.square(embedding - sample_center)) * self.alpha[ii] / 2.0] reset_ops += [tf.scatter_update(self.cluster_center[ii], idx_center[ ii], tf.gather(embedding, idx_sample[ii]))] if self.act_func[ii] is not None: h[ii] = self.act_func[ii](h[ii]) if self.pool_func[ii] is not None: h[ii] = self.pool_func[ii](h[ii], ksize=self.pooling['pool_size'][ ii], strides=self.pooling['pool_stride'][ii], padding='SAME') return h, embeddings, clustering_ops, reg_ops, reset_ops