我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.multinomial()。
def _sample(self, n_samples): if self.logits.get_shape().ndims == 2: logits_flat = self.logits else: logits_flat = tf.reshape(self.logits, [-1, self.n_categories]) samples_flat = tf.transpose(tf.multinomial(logits_flat, n_samples)) samples_flat = tf.cast(samples_flat, self.dtype) if self.logits.get_shape().ndims == 2: return samples_flat shape = tf.concat([[n_samples], self.batch_shape], 0) samples = tf.reshape(samples_flat, shape) static_n_samples = n_samples if isinstance(n_samples, int) else None samples.set_shape( tf.TensorShape([static_n_samples]).concatenate( self.get_batch_shape())) return samples
def _sample(self, n_samples): n = self.n_experiments if self.logits.get_shape().ndims == 1: logits_flat = self.logits else: logits_flat = tf.reshape(self.logits, [-1]) log_1_minus_p = -tf.nn.softplus(logits_flat) log_p = logits_flat + log_1_minus_p stacked_logits_flat = tf.stack([log_1_minus_p, log_p], axis=-1) samples_flat = tf.transpose( tf.multinomial(stacked_logits_flat, n_samples * n)) shape = tf.concat([[n, n_samples], self.batch_shape], 0) samples = tf.reduce_sum(tf.reshape(samples_flat, shape), axis=0) static_n_samples = n_samples if isinstance(n_samples, int) else None static_shape = tf.TensorShape([static_n_samples]).concatenate( self.get_batch_shape()) samples.set_shape(static_shape) return tf.cast(samples, self.dtype)
def _sample(self, n_samples): if self.logits.get_shape().ndims == 2: logits_flat = self.logits else: logits_flat = tf.reshape(self.logits, [-1, self.n_categories]) samples_flat = tf.transpose( tf.multinomial(logits_flat, n_samples * self.n_experiments)) shape = tf.concat([[n_samples, self.n_experiments], self.batch_shape], 0) samples = tf.reshape(samples_flat, shape) static_n_samples = n_samples if isinstance(n_samples, int) else None static_n_exps = self.n_experiments if isinstance(self.n_experiments, int) else None samples.set_shape( tf.TensorShape([static_n_samples, static_n_exps]). concatenate(self.get_batch_shape())) samples = tf.reduce_sum( tf.one_hot(samples, self.n_categories, dtype=self.dtype), axis=1) return samples
def _sample(self, n_samples): if self.logits.get_shape().ndims == 2: logits_flat = self.logits else: logits_flat = tf.reshape(self.logits, [-1, self.n_categories]) samples_flat = tf.transpose(tf.multinomial(logits_flat, n_samples)) if self.logits.get_shape().ndims == 2: samples = samples_flat else: shape = tf.concat([[n_samples], self.batch_shape], 0) samples = tf.reshape(samples_flat, shape) static_n_samples = n_samples if isinstance(n_samples, int) else None samples.set_shape( tf.TensorShape([static_n_samples]). concatenate(self.get_batch_shape())) samples = tf.one_hot(samples, self.n_categories, dtype=self.dtype) return samples
def sampleAction(self, states): # TODO: use this code piece when tf.multinomial gets better # sample action from current policy # actions = self.session.run(self.predicted_actions, {self.states: states})[0] # return actions[0] # temporary workaround def softmax(y): """ simple helper function here that takes unnormalized logprobs """ maxy = np.amax(y) e = np.exp(y - maxy) return e / np.sum(e) # epsilon-greedy exploration strategy if random.random() < self.exploration: return random.randint(0, self.num_actions-1) else: action_scores = self.session.run(self.action_scores, {self.states: states})[0] action_probs = softmax(action_scores) - 1e-5 action = np.argmax(np.random.multinomial(1, action_probs)) return action
def __init__(self, name, inputs, conv_outputs, reward_scaling, config): with tf.variable_scope(name): hidden = tf.layers.dense(conv_outputs, 256, tf.nn.relu, name='hidden') value = tf.layers.dense(hidden, 1) self.value = tf.squeeze( inputs.alive * reward_scaling.unnormalize_output(value), axis=1, name='value') actions = tf.layers.dense(hidden, config.num_actions, name='actions') self.policy = tf.nn.softmax(actions, name='policy') self.log_policy = tf.nn.log_softmax(actions, name='log_policy') # Sample action from policy self.greedy_action = tf.squeeze( tf.multinomial(self.log_policy, num_samples=1), axis=1, name='greedy_action')
def build_forward(self, _input): output = _input # [batch_size, num_steps, rnn_units] feature_dim = int(output.get_shape()[2]) # rnn_units output = tf.reshape(output, [-1, feature_dim]) # [batch_size * num_steps, rnn_units] final_activation = 'sigmoid' if self.out_dim == 1 else 'softmax' if self.net_type == 'simple': net_config = [] if self.net_config is None else self.net_config with tf.variable_scope('wider_actor'): for layer in net_config: units, activation = layer.get('units'), layer.get('activation', 'relu') output = BasicModel.fc_layer(output, units, use_bias=True) output = BasicModel.activation(output, activation) logits = BasicModel.fc_layer(output, self.out_dim, use_bias=True) # [batch_size * num_steps, out_dim] probs = BasicModel.activation(logits, final_activation) # [batch_size * num_steps, out_dim] probs_dim = self.out_dim if self.out_dim == 1: probs = tf.concat([1 - probs, probs], axis=1) probs_dim = 2 self.decision = tf.multinomial(tf.log(probs), 1) # [batch_size * num_steps, 1] self.decision = tf.reshape(self.decision, [-1, self.num_steps]) # [batch_size, num_steps] self.probs = tf.reshape(probs, [-1, self.num_steps, probs_dim]) # [batch_size, num_steps, out_dim] else: raise ValueError('Do not support %s' % self.net_type)
def _argmax_or_mcsearch(embedding, output_projection=None, update_embedding=True, mc_search=False): def loop_function(prev, _): if output_projection is not None: prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1]) if isinstance(mc_search, bool): #tf.multinomial???prev????????? ?-1?????????? prev_symbol = tf.reshape(tf.multinomial(prev, 1), [-1]) if mc_search else math_ops.argmax(prev, 1) else: prev_symbol = tf.cond(mc_search, lambda: tf.reshape(tf.multinomial(prev, 1), [-1]), lambda: tf.argmax(prev, 1)) emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) #??????????? if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev return loop_function
def sample(params, eps, dist='gauss'): """ utility function for sampling from distributions, given noise """ if 'bin' in dist: logits = params[-1] params = params[:-1] if 'gauss' in dist: mean, cov = params s = mean + tf.sqrt(cov) * eps elif 'gm' in dist: means, covs, pi_logits = params choices = tf.multinomial(pi_logits, num_samples=1) batch_size = choices.get_shape()[0] ids = tf.constant(list(range(batch_size)), dtype=tf.int64, shape=(batch_size, 1)) idx_tensor = tf.concat([ids, choices], axis=1) chosen_means = tf.gather_nd(means, idx_tensor) chosen_covs = tf.gather_nd(covs, idx_tensor) s = chosen_means + tf.sqrt(chosen_covs) * eps else: raise NotImplementedError if 'bin' in dist: sig = tf.sigmoid(logits) s = tf.concat([s, sig], axis=1) return s
def sample_with_temperature(logits, temperature): """Either argmax or random sampling. Args: logits: a Tensor. temperature: a float 0.0=argmax 1.0=random Returns: a Tensor with one fewer dimension than logits. """ if temperature == 0.0: return tf.argmax(logits, -1) else: assert temperature > 0.0 reshaped_logits = ( tf.reshape(logits, [-1, shape_list(logits)[-1]]) / temperature) choices = tf.multinomial(reshaped_logits, 1) choices = tf.reshape(choices, shape_list(logits)[:logits.get_shape().ndims - 1]) return choices
def sample_action(self, observation): """ Samples an action from \pi_\theta(a|s) tf ops are eliminated on purpose here since this is a hot code path and were optimizing for CPU usage...or maybe tf.multinomial is just slow in general. Using TF ops: sample_action_op = tf.squeeze(tf.nn.softmax(self.net.logits)) action = tf.multinomial(sample_action_op) """ # TODO: ensure this works when num_actions > 1 action_probs = self.net.sess.run( self.net.logits, {self.net.obs: [observation]} )[0] action = np.random.choice(np.arange(len(action_probs)), p = self.softmax(action_probs)) return [action]
def choose_action(self): if self.config.mode == 'discrete': return tf.multinomial(tf.log(self.a_prob), 1)[0][0] # ???????tf.log??????action_dim?? elif self.config.mode == 'continuous': # axis = 0?????0??squeeze sample_action = self.action_normal_dist.sample(1) * self.config.ACTION_GAP + self.config.ACTION_BOUND[0] return tf.clip_by_value(tf.squeeze(sample_action, axis=0), self.config.ACTION_BOUND[0], self.config.ACTION_BOUND[1])[0]
def __call__(self, prev_output): """ Use TODO formula Args: prev_output (tf.Tensor): the ouput on which applying the transformation Return: tf.Ops: the processing operator """ # prev_output size: [batch_size, nb_labels] nb_labels = prev_output.get_shape().as_list()[-1] if False: # TODO: Add option to control argmax #label_draws = tf.argmax(prev_output, 1) label_draws = tf.multinomial(tf.log(prev_output), 1) # Draw 1 sample from the distribution label_draws = tf.squeeze(label_draws, [1]) self.chosen_labels.append(label_draws) next_input = tf.one_hot(label_draws, nb_labels) return next_input # Could use the Gumbel-Max trick to sample from a softmax distribution ? soft_values = tf.exp(tf.div(prev_output, self.temperature)) # Pi = exp(pi/t) # soft_values size: [batch_size, nb_labels] normalisation_coeff = tf.expand_dims(tf.reduce_sum(soft_values, 1), -1) # normalisation_coeff size: [batch_size, 1] probs = tf.div(soft_values, normalisation_coeff + 1e-8) # = Pi / sum(Pk) # probs size: [batch_size, nb_labels] label_draws = tf.multinomial(tf.log(probs), 1) # Draw 1 sample from the log-probability distribution # probs label_draws: [batch_size, 1] label_draws = tf.squeeze(label_draws, [1]) # label_draws size: [batch_size,] self.chosen_labels.append(label_draws) next_input = tf.one_hot(label_draws, nb_labels) # Reencode the next input vector # next_input size: [batch_size, nb_labels] return next_input
def sample(x): return tf.multinomial(tf.log(x), 1)
def __init__(self, dim): self._dim = dim weights_var = tf.placeholder( dtype=tf.float32, shape=(None, dim), name="weights" ) self._f_sample = tensor_utils.compile_function( inputs=[weights_var], outputs=tf.multinomial(weights_var, num_samples=1)[:, 0], )
def sample_sym(self, dist_info): probs = dist_info["prob"] samples = tf.multinomial(tf.log(probs + 1e-8), num_samples=1)[:, 0] return tf.nn.embedding_lookup(np.eye(self.dim, dtype=np.float32), samples)
def random_category(self, batch_size, size, dtype): prior = tf.ones([batch_size, size])*1./size dist = tf.log(prior + TINY) sample=tf.multinomial(dist, num_samples=1)[:, 0] return tf.one_hot(sample, size, dtype=dtype)
def sample_and_embed(embedding, temperature, output_list=None, output_projection=None): """Returns a callable (usable as a loop_fn for seq2seq) which takes a sample from a batch of outputs and embeds them. Optionally applies a projection first. Args: embedding: an embedding matrix to lookup symbols in. temperature: temperature to control the pointiness of the softmax. output_list (Optional): a list in which to collect the samples. Default None means don't collect them at all. output_proj (Optional): tuple (weight, biases) used to project outputs. If None (default), no projection is performed. Returns: embedding from embedding. """ def _sample_embed(prev, _): var = _maybe_project(prev, output_projection) var /= temperature next_ = tf.multinomial(var, 1) # get rid of the num_samples dimension next_ = tf.squeeze(next_) # maybe store it if output_list is not None: output_list.append(next_) # look up the embedding next_ = tf.nn.embedding_lookup( embedding, next_) return next_ return _sample_embed
def sample(self, dist_info): prob = dist_info["prob"] ids = tf.multinomial(tf.log(prob + TINY), num_samples=1)[:, 0] onehot = tf.constant(np.eye(self.dim, dtype=np.float32)) return tf.nn.embedding_lookup(onehot, ids)
def multinomial_3d(x): """Samples from a multinomial distribution from 3D Tensor. Args: x: Tensor with shape (batch_size, timesteps, classes) Returns: Tensor with shape (batch_size, timesteps), sampled from `classes`. """ a, b = tf.shape(x)[0], tf.shape(x)[1] x = tf.reshape(x, (a * b, -1)) m = tf.multinomial(x, 1) return tf.reshape(m, (a, b))
def multinomial_2d(x): """Samples from a multinomial distribution from 2D Tensor. Args: x: Tensor with shape (batch_size, classes) Returns: Tensor with shape (batch_size), sampled from `classes`. """ a = tf.shape(x)[0] m = tf.multinomial(x, 1) return tf.reshape(m, (a,))
def categorical_sample(logits, d, exploration=True): # value = tf.squeeze(tf.multinomial(logits - tf.reduce_max(logits, [1], keep_dims=True), 1), [1]) temp = logits - tf.reduce_max(logits, [1], keep_dims=True) if exploration is True: temp = tf.multinomial(temp, 1) elif exploration is False: temp = tf.expand_dims(tf.argmax(temp, 1),-1) temp = tf.squeeze(temp, [1]) temp = tf.one_hot(temp, d) return temp
def __init__(self, dim): self._dim = dim weights_var = tf.placeholder( dtype=tf.float32, shape=(None, dim), name="weights" ) self._f_sample = tensor_utils.compile_function( inputs=[weights_var], outputs=tf.multinomial(tf.log(weights_var + 1e-8), num_samples=1)[:, 0], )
def rollout(self, doing_eval=False): """ run one episode collecting observations, actions and advantages""" observations, actions, rewards = [], [], [] observation = self.env.reset() done = False while not done: observations.append(observation) action = self.sample_action_given(observation, doing_eval) assert action != 5, "FAIL! (multinomial logits sampling bug?" observation, reward, done, _ = self.env.step(action) actions.append(action) rewards.append(reward) if VERBOSE_DEBUG: print "rollout: actions=%s" % (actions) return observations, actions, rewards
def __init__(self, n_options, sess): self.n_options = n_options self.sess = sess self.picker = tf.multinomial([self.n_options * [1.]], 1)[0][0]
def corrupt(tensor, corruption_level=0.05): """Uses the masking noise algorithm to mask corruption_level proportion of the input. :param tensor: A tensor whose values are to be corrupted. :param corruption_level: An int [0, 1] specifying the probability to corrupt each value. :return: The corrupted tensor. """ total_samples = tf.reduce_prod(tf.shape(tensor)) corruption_matrix = tf.multinomial(tf.log([[corruption_level, 1 - corruption_level]]), total_samples) corruption_matrix = tf.cast(tf.reshape(corruption_matrix, shape=tf.shape(tensor)), dtype=tf.float32) return tf.mul(tensor, corruption_matrix)
def _create_model(self): depth = len(self.vocab)+1 self._create_rnn() with tf.name_scope('loss'): self.logits = tf.contrib.layers.fully_connected(inputs=self.output, num_outputs=depth, activation_fn=None) self.labels = tf.one_hot(self.seq, depth=depth) self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.labels[:, 1:], logits=self.logits[:, :-1]), name='loss') with tf.name_scope('sample'): self.sample = tf.multinomial(self.logits[:, -1] / self.temp, 1)[:, 0] return self.loss, self.sample, self.in_state, self.out_state
def _compute_specific(self, predicted, targets): # Generate random predictions with equal probabilities random = tf.multinomial(logits=tf.log([[10.] * self._num_classes]), num_samples=tf.shape(targets)[0]) # Output prediction as ratio of matches eq = tf.equal(x=random, y=tf.cast(targets, tf.int64)) return tf.reduce_mean(tf.cast(eq, tf.float32))
def sample(self, amount, temperature=1): priorities = self.buffer.buffers['priority'].value()[:self.size()] logprobs = tf.log(priorities / tf.reduce_sum(priorities)) / temperature positions = tf.multinomial(logprobs[None, ...], amount)[0] return [ tf.gather(b, positions) for key,b in self.buffer.buffers.items() if key != 'priority' ]
def sample(self): return tf.multinomial(self.inputs, 1)[0]
def sampler(symbols_to_logits_fn, initial_ids, sample_num, decode_length, vocab_size, eos_id, features=None): batch_size = tf.shape(initial_ids)[0] # Expand each batch to sample_num seqlen = tf.constant(0) alive_seq = tf.tile(tf.expand_dims(initial_ids, 1), [1, sample_num]) alive_seq = tf.expand_dims(alive_seq, 2) # (batch_size, sample_num, 1) sa = tf.shape(alive_seq) alive_seq = tf.reshape(alive_seq, [sa[0]*sa[1],1]) def _is_finished(i, alive_seq): return i < decode_length def inner_loop(i, alive_seq): logit = symbols_to_logits_fn(alive_seq)[0] new_samples = tf.multinomial(logit, 1) new_samples = tf.to_int32(new_samples) alive_seq = tf.concat([alive_seq, new_samples], 1) return (i + 1, alive_seq) (_, alive_seq) = tf.while_loop( _is_finished, inner_loop, [seqlen, alive_seq], shape_invariants=[ tf.TensorShape([]), tf.TensorShape([None, None]) ], parallel_iterations=1, back_prop=False ) alive_seq.set_shape((sample_num, None)) return alive_seq
def Monte_Carlo_Rollout(self): images = tf.placeholder(tf.float32, [1, self.feats_dim]) images_embed = tf.matmul(images, self.encode_img_W) + self.encode_img_b state = self.lstm.zero_state(batch_size=1, dtype=tf.float32) gen_sentences = [] all_sample_sentences = [] with tf.variable_scope("LSTM"): output, state = self.lstm(images_embed, state) with tf.device("/cpu:0"): current_emb = tf.nn.embedding_lookup(self.Wemb, tf.ones([1], dtype=tf.int64)) for i in range(0, self.lstm_step): tf.get_variable_scope().reuse_variables() output, state = self.lstm(current_emb, state) logit_words = tf.matmul(output, self.embed_word_W) + self.embed_word_b max_prob_word = tf.argmax(logit_words, 1)[0] with tf.device("/cpu:0"): current_emb = tf.nn.embedding_lookup(self.Wemb, max_prob_word) current_emb = tf.expand_dims(current_emb, 0) gen_sentences.append(max_prob_word) if i < self.lstm_step-1: num_sample = self.lstm_step - 1 - i sample_sentences = [] for idx_sample in range(num_sample): sample = tf.multinomial(logit_words, 3) sample_sentences.append(sample[0]) all_sample_sentences.append(sample_sentences) return images, gen_sentences, all_sample_sentences ######################################################################## # # Class function for step 4 # ########################################################################
def Monte_Carlo_and_Baseline(self): images = tf.placeholder(tf.float32, [self.batch_size, self.feats_dim]) images_embed = tf.matmul(images, self.encode_img_W) + self.encode_img_b state = self.lstm.zero_state(batch_size=self.batch_size, dtype=tf.float32) gen_sentences = [] all_sample_sentences = [] all_baselines = [] with tf.variable_scope("LSTM"): output, state = self.lstm(images_embed, state) with tf.device("/cpu:0"): current_emb = tf.nn.embedding_lookup(self.Wemb, tf.ones([self.batch_size], dtype=tf.int64)) for i in range(0, self.lstm_step): tf.get_variable_scope().reuse_variables() output, state = self.lstm(current_emb, state) logit_words = tf.matmul(output, self.embed_word_W) + self.embed_word_b max_prob_word = tf.argmax(logit_words, 1) with tf.device("/cpu:0"): current_emb = tf.nn.embedding_lookup(self.Wemb, max_prob_word) #current_emb = tf.expand_dims(current_emb, 0) gen_sentences.append(max_prob_word) # compute Q for gt with K Monte Carlo rollouts if i < self.lstm_step-1: num_sample = self.lstm_step - 1 - i sample_sentences = [] for idx_sample in range(num_sample): sample = tf.multinomial(logit_words, 3) sample_sentences.append(sample) all_sample_sentences.append(sample_sentences) # compute eatimated baseline baseline = tf.nn.relu(tf.matmul(state[1], self.baseline_MLP_W) + self.baseline_MLP_b) all_baselines.append(baseline) return images, gen_sentences, all_sample_sentences, all_baselines
def random_multinomial(logits, seed=None): ''' Theano function for sampling from a multinomal with probability given by `logits` ''' if K.backend() == "theano": if seed is None: seed = numpy.random.randint(1, 10e6) rng = RandomStreams(seed=seed) return rng.multinomial(n=1, pvals=logits, ndim=None, dtype=_FLOATX) elif K.backend() == "tensorflow": return tf.one_hot(tf.squeeze(tf.multinomial(K.log(logits), num_samples=1)), int(logits.shape[1]))
def multinomial_sample(x, vocab_size, temperature): """Multinomial sampling from a n-dimensional tensor.""" if temperature > 0: samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1) else: samples = tf.argmax(x, axis=-1) reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1]) return tf.to_int32(reshaped_samples)
def sample(self, features): """Run the model and extract samples. Args: features: an map of string to `Tensor`. Returns: samples: an integer `Tensor`. logits: a list of `Tensor`s, one per datashard. losses: a dictionary: {loss-name (string): floating point `Scalar`}. """ logits, losses = self(features) # pylint: disable=not-callable if self.hparams.sampling_method == "argmax": samples = tf.argmax(logits, axis=-1) else: assert self.hparams.sampling_method == "random" def multinomial_squeeze(logits, temperature=1.0): logits_shape = common_layers.shape_list(logits) reshaped_logits = ( tf.reshape(logits, [-1, logits_shape[-1]]) / temperature) choices = tf.multinomial(reshaped_logits, 1) choices = tf.reshape(choices, logits_shape[:-1]) return choices samples = multinomial_squeeze(logits, self.hparams.sampling_temp) return samples, logits, losses
def __init__(self, dim): ''' DESCRIPTION: multinomial sample one output from the softmax probability PARAM: dim (int): layer dimension ''' self.diag = tf.diag(tf.ones(dim))
def _train_fprop(self, state_below): samples = tf.multinomial(state_below, num_samples=1) samples = tf.squeeze(samples) return tf.gather(self.diag, samples)
def scale_distortions(image, gt_bboxes, gt_cats, params): """Samples a random box according to overlapping with gt objects criteria and crops it from an image""" image, gt_bboxes = tf.cond(tf.random_uniform([], 0, 1.0) < args.zoomout_prob, lambda: zoomout(image, gt_bboxes, params), lambda: (image, gt_bboxes)) n_channels = image.shape[-1] def tf_random_choice(slices, bbox): sample = tf.multinomial(tf.log([[10.]*len(slices)]), 1) slices = tf.convert_to_tensor(slices) bbox = tf.convert_to_tensor(bbox) bbox_begin, bbox_size = tf.unstack(slices[tf.cast(sample[0][0], tf.int32)]) distort_bbox = bbox[tf.cast(sample[0][0], tf.int32)] return bbox_begin, bbox_size, distort_bbox bboxes = tf.expand_dims(xywh_to_yxyx(gt_bboxes), 0) samplers = [] boxes = [] for iou in params['sample_jaccards']: sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=bboxes, min_object_covered=iou, aspect_ratio_range=[0.5, 2.0], area_range=[0.3, 1.0], max_attempts=params['crop_max_tries'], use_image_if_no_bounding_boxes=True) samplers.append(sample_distorted_bounding_box[:2]) boxes.append(sample_distorted_bounding_box[2][0][0]) bbox_begin, bbox_size, distort_bbox = tf_random_choice(samplers, boxes) cropped_image = tf.slice(image, bbox_begin, bbox_size) # Nope TF, you are wrong, cropping does not change channels. cropped_image.set_shape([None, None, n_channels]) y1, x1, y2, x2 = tf.unstack(distort_bbox) def check(center, mini, maxi): return tf.logical_and((center >= mini), (center <= maxi)) gt_centers = gt_bboxes[:, :2] + gt_bboxes[:, 2:] / 2 mask = tf.logical_and(check(gt_centers[:, 0], x1, x2), check(gt_centers[:, 1], y1, y2)) gt_bboxes = tf.boolean_mask(gt_bboxes, mask) gt_cats = tf.boolean_mask(gt_cats, mask) w = tf.to_float(x2-x1) h = tf.to_float(y2-y1) gt_x, gt_y, gt_w, gt_h = tf.unstack(gt_bboxes, axis=1) gt_x2 = gt_x + gt_w gt_y2 = gt_y + gt_h gt_x1_clip = tf.clip_by_value(gt_x - x1, 0, w)/w gt_x2_clip = tf.clip_by_value(gt_x2 - x1, 0, w)/w gt_y1_clip = tf.clip_by_value(gt_y - y1, 0, h)/h gt_y2_clip = tf.clip_by_value(gt_y2 - y1, 0, h)/h gt_w_clip = gt_x2_clip - gt_x1_clip gt_h_clip = gt_y2_clip - gt_y1_clip gt_bboxes = tf.stack([gt_x1_clip, gt_y1_clip, gt_w_clip, gt_h_clip], axis=1) return cropped_image, gt_bboxes, gt_cats
def create_variables(self): with tf.name_scope("model_inputs"): # raw state representation self.states = tf.placeholder(tf.float32, (None, self.state_dim), name="states") # rollout action based on current policy with tf.name_scope("predict_actions"): # initialize policy network with tf.variable_scope("policy_network"): self.policy_outputs = self.policy_network(self.states) # predict actions from policy network self.action_scores = tf.identity(self.policy_outputs, name="action_scores") # Note 1: tf.multinomial is not good enough to use yet # so we don't use self.predicted_actions for now self.predicted_actions = tf.multinomial(self.action_scores, 1) # regularization loss policy_network_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy_network") # compute loss and gradients with tf.name_scope("compute_pg_gradients"): # gradients for selecting action from policy network self.taken_actions = tf.placeholder(tf.int32, (None,), name="taken_actions") self.discounted_rewards = tf.placeholder(tf.float32, (None,), name="discounted_rewards") with tf.variable_scope("policy_network", reuse=True): self.logprobs = self.policy_network(self.states) # compute policy loss and regularization loss self.cross_entropy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logprobs, labels=self.taken_actions) self.pg_loss = tf.reduce_mean(self.cross_entropy_loss) self.reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in policy_network_variables]) self.loss = self.pg_loss + self.reg_param * self.reg_loss # compute gradients self.gradients = self.optimizer.compute_gradients(self.loss) # compute policy gradients for i, (grad, var) in enumerate(self.gradients): if grad is not None: self.gradients[i] = (grad * self.discounted_rewards, var) for grad, var in self.gradients: tf.summary.histogram(var.name, var) if grad is not None: tf.summary.histogram(var.name + '/gradients', grad) # emit summaries tf.summary.scalar("policy_loss", self.pg_loss) tf.summary.scalar("reg_loss", self.reg_loss) tf.summary.scalar("total_loss", self.loss) # training update with tf.name_scope("train_policy_network"): # apply gradients to update policy network self.train_op = self.optimizer.apply_gradients(self.gradients) self.summarize = tf.summary.merge_all() self.no_op = tf.no_op()