我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.exp()。
def build_model(self): self.q = tf.placeholder(tf.float32, [self.reader.vocab_size], name="question") self.a = tf.placeholder(tf.float32, [self.reader.vocab_size], name="answer") self.build_encoder() self.build_decoder() # Kullback Leibler divergence self.e_loss = -0.5 * tf.reduce_sum(1 + self.log_sigma_sq - tf.square(self.mu) - tf.exp(self.log_sigma_sq)) # Log likelihood self.g_loss = tf.reduce_sum(tf.log(self.p_x_i)) self.loss = tf.reduce_mean(self.e_loss + self.g_loss) self.optim = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(-self.loss) _ = tf.scalar_summary("encoder loss", self.e_loss) _ = tf.scalar_summary("decoder loss", self.g_loss) _ = tf.scalar_summary("loss", self.loss)
def build_encoder(self): """Inference Network. q(h|X)""" with tf.variable_scope("encoder"): q_cell = tf.nn.rnn_cell.LSTMCell(self.embed_dim, self.vocab_size) a_cell = tf.nn.rnn_cell.LSTMCell(self.embed_dim, self.vocab_size) l1 = tf.nn.relu(tf.nn.rnn_cell.linear(tf.expand_dims(self.x, 0), self.embed_dim, bias=True, scope="l1")) l2 = tf.nn.relu(tf.nn.rnn_cell.linear(l1, self.embed_dim, bias=True, scope="l2")) self.mu = tf.nn.rnn_cell.linear(l2, self.h_dim, bias=True, scope="mu") self.log_sigma_sq = tf.nn.rnn_cell.linear(l2, self.h_dim, bias=True, scope="log_sigma_sq") eps = tf.random_normal((1, self.h_dim), 0, 1, dtype=tf.float32) sigma = tf.sqrt(tf.exp(self.log_sigma_sq)) _ = tf.histogram_summary("mu", self.mu) _ = tf.histogram_summary("sigma", sigma) self.h = self.mu + sigma * eps
def build_encoder(self): """Inference Network. q(h|X)""" with tf.variable_scope("encoder"): self.l1_lin = linear(tf.expand_dims(self.x, 0), self.embed_dim, bias=True, scope="l1") self.l1 = tf.nn.relu(self.l1_lin) self.l2_lin = linear(self.l1, self.embed_dim, bias=True, scope="l2") self.l2 = tf.nn.relu(self.l2_lin) self.mu = linear(self.l2, self.h_dim, bias=True, scope="mu") self.log_sigma_sq = linear(self.l2, self.h_dim, bias=True, scope="log_sigma_sq") self.eps = tf.random_normal((1, self.h_dim), 0, 1, dtype=tf.float32) self.sigma = tf.sqrt(tf.exp(self.log_sigma_sq)) self.h = tf.add(self.mu, tf.mul(self.sigma, self.eps)) _ = tf.histogram_summary("mu", self.mu) _ = tf.histogram_summary("sigma", self.sigma) _ = tf.histogram_summary("h", self.h) _ = tf.histogram_summary("mu + sigma", self.mu + self.sigma)
def __call__(self, z): z1 = tf.reshape(tf.slice(z, [0, 0], [-1, 1]), [-1]) z2 = tf.reshape(tf.slice(z, [0, 1], [-1, 1]), [-1]) v1 = tf.sqrt((z1 - 5) * (z1 - 5) + z2 * z2) * 2 v2 = tf.sqrt((z1 + 5) * (z1 + 5) + z2 * z2) * 2 v3 = tf.sqrt((z1 - 2.5) * (z1 - 2.5) + (z2 - 2.5 * np.sqrt(3)) * (z2 - 2.5 * np.sqrt(3))) * 2 v4 = tf.sqrt((z1 + 2.5) * (z1 + 2.5) + (z2 + 2.5 * np.sqrt(3)) * (z2 + 2.5 * np.sqrt(3))) * 2 v5 = tf.sqrt((z1 - 2.5) * (z1 - 2.5) + (z2 + 2.5 * np.sqrt(3)) * (z2 + 2.5 * np.sqrt(3))) * 2 v6 = tf.sqrt((z1 + 2.5) * (z1 + 2.5) + (z2 - 2.5 * np.sqrt(3)) * (z2 - 2.5 * np.sqrt(3))) * 2 pdf1 = tf.exp(-0.5 * v1 * v1) / tf.sqrt(2 * np.pi * 0.25) pdf2 = tf.exp(-0.5 * v2 * v2) / tf.sqrt(2 * np.pi * 0.25) pdf3 = tf.exp(-0.5 * v3 * v3) / tf.sqrt(2 * np.pi * 0.25) pdf4 = tf.exp(-0.5 * v4 * v4) / tf.sqrt(2 * np.pi * 0.25) pdf5 = tf.exp(-0.5 * v5 * v5) / tf.sqrt(2 * np.pi * 0.25) pdf6 = tf.exp(-0.5 * v6 * v6) / tf.sqrt(2 * np.pi * 0.25) return -tf.log((pdf1 + pdf2 + pdf3 + pdf4 + pdf5 + pdf6) / 6)
def tf_truncexpon(batch_size,rate,right): ''' a tensorflow node that returns a random variable sampled from an Exp(rate) random variable which has been truncated and normalized to [0,right] #Leverages that log of uniform is exponential batch_size: a tensorflow placeholder to sync batch_size everywhere rate: lambda rate parameter for exponential dist right: float in (0,inf) where to truncate exp distribution ''' uleft=tf.exp(-1*rate*right) U=tf.random_uniform(shape=(batch_size,1),minval=uleft,maxval=1) tExp=(-1/rate)*tf.log(U) return tExp
def build_train_op(self): config=self.config self.g_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \ .minimize(self.g_loss, var_list=self.g_vars) self.d_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \ .minimize(self.d_loss, var_list=self.d_vars) self.d_label_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \ .minimize(self.d_labelLossReal, var_list=self.dl_vars) self.d_gen_label_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \ .minimize(self.g_lossLabels_GLabeler, var_list=self.dl_gen_vars) self.d_on_z_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \ .minimize(self.g_loss_on_z + self.rec_loss_coeff*self.real_reconstruction_loss, var_list=self.dz_vars) self.k_t_update = tf.assign(self.k_t, self.k_t*tf.exp(-1.0/config.tau) ) self.train_op=tf.group(self.d_gen_label_optim,self.d_label_optim,self.d_optim,self.g_optim,self.d_on_z_optim)
def recode_cost(self, inputs, variation, eps=1e-5, **kwargs): """ Cost for given input batch of samples, under current params. """ h = self.get_h_inputs(inputs) z_mu = tf.matmul(h, self.params['Mhz']) + self.params['bMhz'] z_sig = tf.matmul(h, self.params['Shz']) + self.params['bShz'] # KL divergence between latent space induced by encoder and ... lat_loss = -tf.reduce_sum(1 + z_sig - z_mu**2 - tf.exp(z_sig), 1) z = z_mu + tf.sqrt(tf.exp(z_sig)) * variation h = self.get_h_latents(z) x_mu = self.decoding(tf.matmul(h, self.params['Mhx']) + self.params['bMhx']) x_sig = self.decoding(tf.matmul(h, self.params['Shx']) + self.params['bShx']) # x_sig = tf.clip_by_value(x_mu * (1 - x_mu), .05, 1) # decoding likelihood term like_loss = tf.reduce_sum(tf.log(x_sig + eps) + (inputs - x_mu)**2 / x_sig, 1) # # Mean cross entropy between input and encode-decoded input. # like_loss = 2 * tf.reduce_sum(functions.cross_entropy(inputs, x_mu), 1) return .5 * tf.reduce_mean(like_loss + lat_loss)
def calfilter(X): '''X is nbatch*boxheight*boxwidth image. k1 and k2 is the nbatch*(boxheight*boxwidth)*(boxheight*boxwidth) filters. Here we only consider 4 neigbor regeion.''' k1 = np.zeros((X.shape[0], X.shape[1], X.shape[2], X.shape[1], X.shape[2])) k2 = np.zeros((X.shape[0], X.shape[1], X.shape[2], X.shape[1], X.shape[2])) for i in range(X.shape[1]): for j in range(X.shape[2]): if i != 0: k1[:,i,j,i-1,j] = 1 k2[:,i,j,i-1,j] = np.exp(-(X[:,i,j]-X[:,i-1,j])**2) if i != X.shape[1]-1: k1[:,i,j,i+1,j] = 1 k2[:,i,j,i+1,j] = np.exp(-(X[:,i,j]-X[:,i+1,j])**2) if j != 0: k1[:,i,j,i,j-1] = 1 k2[:,i,j,i,j-1] = np.exp(-(X[:,i,j]-X[:,i,j-1])**2) if j != X.shape[2]-1: k1[:,i,j,i,j+1] = 1 k2[:,i,j,i,j+1] = np.exp(-(X[:,i,j]-X[:,i,j+1])**2) k1 = k1.reshape((X.shape[0], X.shape[1]*X.shape[2], X.shape[1]*X.shape[2])) k2 = k2.reshape((X.shape[0], X.shape[1]*X.shape[2], X.shape[1]*X.shape[2])) return k1, k2
def sample_encoded_context(self, embeddings): '''Helper function for init_opt''' c_mean_logsigma = self.model.generate_condition(embeddings) mean = c_mean_logsigma[0] if cfg.TRAIN.COND_AUGMENTATION: # epsilon = tf.random_normal(tf.shape(mean)) epsilon = tf.truncated_normal(tf.shape(mean)) stddev = tf.exp(c_mean_logsigma[1]) c = mean + stddev * epsilon kl_loss = KL_loss(c_mean_logsigma[0], c_mean_logsigma[1]) else: c = mean kl_loss = 0 return c, cfg.TRAIN.COEFF.KL * kl_loss
def sample_encoded_context(self, embeddings): '''Helper function for init_opt''' # Build conditioning augmentation structure for text embedding # under different variable_scope: 'g_net' and 'hr_g_net' c_mean_logsigma = self.model.generate_condition(embeddings) mean = c_mean_logsigma[0] if cfg.TRAIN.COND_AUGMENTATION: # epsilon = tf.random_normal(tf.shape(mean)) epsilon = tf.truncated_normal(tf.shape(mean)) stddev = tf.exp(c_mean_logsigma[1]) c = mean + stddev * epsilon kl_loss = KL_loss(c_mean_logsigma[0], c_mean_logsigma[1]) else: c = mean kl_loss = 0 # TODO: play with the coefficient for KL return c, cfg.TRAIN.COEFF.KL * kl_loss
def forward(self,z): if not self.ar: mu,log_sigma = self._get_mu_and_sigma(z) else: # permute z z = tf.reshape(z,[-1]+[1]*self.hps.z_size) perm = np.random.permutation(self.hps.z_size)+1 z = tf.transpose(z,np.append([0],perm)) z = tf.reshape(z,[-1,self.hps.z_size]) mu,log_sigma = ar_layer(z,self.hps,n_hidden=self.n_hidden) log_sigma = tf.clip_by_value(log_sigma,-5,5) if not self.hps.ignore_sigma_flow: y = z * tf.exp(log_sigma) + mu log_det = -1 * log_sigma else: y = z + mu log_det = 0.0 return y,log_det
def shrink_bgest(r,rvar,theta): """Bernoulli-Gaussian MMSE estimator Perform MMSE estimation E[x|r] for x ~ BernoulliGaussian(lambda,xvar1) r|x ~ Normal(x,rvar) The parameters theta[0],theta[1] represent The variance of non-zero x[i] xvar1 = abs(theta[0]) The probability of nonzero x[i] lamba = 1/(exp(theta[1])+1) """ xvar1 = abs(theta[...,0]) loglam = theta[...,1] # log(1/lambda - 1) beta = 1/(1+rvar/xvar1) r2scale = r*r*beta/rvar rho = tf.exp(loglam - .5*r2scale ) * tf.sqrt(1 +xvar1/rvar) rho1 = rho+1 xhat = beta*r/rho1 dxdr = beta*((1+rho*(1+r2scale) ) / tf.square( rho1 )) dxdr = tf.reduce_mean(dxdr,0) return (xhat,dxdr)
def rickerWavelet(scale, sampleCount): def waveEquation(time): time = tf.to_float(time) tSquare = time ** 2. sigma = 1. sSquare = sigma ** 2. # _1 = 2 / ((3 * a) ** .5 * np.pi ** .25) _1a = (3. * sigma) ** .5 _1b = np.pi ** .25 _1 = 2. / (_1a * _1b) # _2 = 1 - t**2 / a**2 _2 = 1. - tSquare / sSquare # _3 = np.exp(-(t**2) / (2 * a ** 2)) _3a = -1. * tSquare _3b = 2. * sSquare _3 = tf.exp(_3a / _3b) return _1 * _2 * _3 return waveletHelper(scale, sampleCount, waveEquation)
def logistic_loss(positive_scores, negative_scores): """ Pairwise logistic loss [1]: loss(p, n) = \sum_i log(1 + e^(1 - p_i + n_i)) [1] http://yann.lecun.com/exdb/publis/pdf/lecun-06.pdf Args: positive_scores: (N,) Tensor containing scores of positive examples. negative_scores: (N,) Tensor containing scores of negative examples. Returns: Loss value. """ logistic_losses = tf.log(1 + tf.exp(1 - positive_scores + negative_scores)) loss = tf.reduce_sum(logistic_losses) return loss
def square_exponential_loss(positive_scores, negative_scores, gamma=1.0): """ Square-Exponential loss [1]: loss(p, n) = \sum_i - p_i^2 + \gamma e^(n_i) [1] http://yann.lecun.com/exdb/publis/pdf/lecun-06.pdf Args: positive_scores: (N,) Tensor containing scores of positive examples. negative_scores: (N,) Tensor containing scores of negative examples. gamma: Gamma hyper-parameter. Returns: Loss value. """ square_exponential_losses = - positive_scores + gamma * tf.exp(negative_scores) loss = tf.reduce_sum(square_exponential_losses) return loss
def segment_softmax(scores, segment_ids): """Given scores and a partition, converts scores to probs by performing softmax over all rows within a partition.""" # Subtract max num_segments = tf.reduce_max(segment_ids) + 1 if len(scores.get_shape()) == 2: max_per_partition = tf.unsorted_segment_max(tf.reduce_max(scores, axis=1), segment_ids, num_segments) scores -= tf.expand_dims(tf.gather(max_per_partition, segment_ids), axis=1) else: max_per_partition = tf.unsorted_segment_max(scores, segment_ids, num_segments) scores -= tf.gather(max_per_partition, segment_ids) # Compute probs scores_exp = tf.exp(scores) if len(scores.get_shape()) == 2: scores_exp_sum_per_partition = tf.unsorted_segment_sum(tf.reduce_sum(scores_exp, axis=1), segment_ids, num_segments) probs = scores_exp / tf.expand_dims(tf.gather(scores_exp_sum_per_partition, segment_ids), axis=1) else: scores_exp_sum_per_partition = tf.unsorted_segment_sum(scores_exp, segment_ids, num_segments) probs = scores_exp / tf.gather(scores_exp_sum_per_partition, segment_ids) return probs
def __call__(self, u_t, a, b, scope=None): """ :param u_t: [N, M, d] :param a: [N, M. 1] :param b: [N, M. 1] :param mask: [N, M] :return: """ N, M, d = self.batch_size, self.mem_size, self.hidden_size L, sL = self.L, self.sL with tf.name_scope(scope or self.__class__.__name__): L = tf.tile(tf.expand_dims(L, 0), [N, 1, 1]) sL = tf.tile(tf.expand_dims(sL, 0), [N, 1, 1]) logb = tf.log(b + 1e-9) logb = tf.concat(1, [tf.zeros([N, 1, 1]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])]) left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, M, M] right = a * u_t # [N, M, d] u = tf.batch_matmul(left, right) # [N, M, d] return u
def __call__(self, u_t, a, b, scope=None): """ :param u_t: [N, M, d] :param a: [N, M. d] :param b: [N, M. d] :param mask: [N, M] :return: """ N, M, d = self.batch_size, self.mem_size, self.hidden_size L, sL = self.L, self.sL with tf.name_scope(scope or self.__class__.__name__): L = tf.tile(tf.expand_dims(tf.expand_dims(L, 0), 0), [N, d, 1, 1]) sL = tf.tile(tf.expand_dims(tf.expand_dims(sL, 0), 0), [N, d, 1, 1]) logb = tf.log(b + 1e-9) # [N, M, d] logb = tf.concat(1, [tf.zeros([N, 1, d]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])]) # [N, M, d] logb = tf.expand_dims(tf.transpose(logb, [0, 2, 1]), -1) # [N, d, M, 1] left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, d, M, M] right = a * u_t # [N, M, d] right = tf.expand_dims(tf.transpose(right, [0, 2, 1]), -1) # [N, d, M, 1] u = tf.batch_matmul(left, right) # [N, d, M, 1] u = tf.transpose(tf.squeeze(u, [3]), [0, 2, 1]) # [N, M, d] return u
def global_attention(state, hidden_states, encoder, encoder_input_length, scope=None, context=None, **kwargs): with tf.variable_scope(scope or 'attention_{}'.format(encoder.name)): if context is not None and encoder.use_context: state = tf.concat([state, context], axis=1) if encoder.attn_filters: e = compute_energy_with_filter(hidden_states, state, attn_size=encoder.attn_size, attn_filters=encoder.attn_filters, attn_filter_length=encoder.attn_filter_length, **kwargs) else: e = compute_energy(hidden_states, state, attn_size=encoder.attn_size, attn_keep_prob=encoder.attn_keep_prob, pervasive_dropout=encoder.pervasive_dropout, layer_norm=encoder.layer_norm, mult_attn=encoder.mult_attn, **kwargs) e -= tf.reduce_max(e, axis=1, keep_dims=True) mask = tf.sequence_mask(encoder_input_length, maxlen=tf.shape(hidden_states)[1], dtype=tf.float32) T = encoder.attn_temperature or 1.0 exp = tf.exp(e / T) * mask weights = exp / tf.reduce_sum(exp, axis=-1, keep_dims=True) weighted_average = tf.reduce_sum(tf.expand_dims(weights, 2) * hidden_states, axis=1) return weighted_average, weights
def kl(self, old_dist_info, new_dist_info): old_means = old_dist_info["mean"] old_log_stds = old_dist_info["log_std"] new_means = new_dist_info["mean"] new_log_stds = new_dist_info["log_std"] """ Compute the KL divergence of two multivariate Gaussian distribution with diagonal covariance matrices """ old_std = np.exp(old_log_stds) new_std = np.exp(new_log_stds) # means: (N*A) # std: (N*A) # formula: # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) + # ln(\sigma_2/\sigma_1) numerator = np.square(old_means - new_means) + \ np.square(old_std) - np.square(new_std) denominator = 2 * np.square(new_std) + 1e-8 return np.sum( numerator / denominator + new_log_stds - old_log_stds, axis=-1) # more lossy version # return TT.sum( # numerator / denominator + TT.log(new_std) - TT.log(old_std ), axis=-1)
def kl_sym(self, old_dist_info_vars, new_dist_info_vars): old_means = old_dist_info_vars["mean"] old_log_stds = old_dist_info_vars["log_std"] new_means = new_dist_info_vars["mean"] new_log_stds = new_dist_info_vars["log_std"] """ Compute the KL divergence of two multivariate Gaussian distribution with diagonal covariance matrices """ old_std = tf.exp(old_log_stds) new_std = tf.exp(new_log_stds) # means: (N*A) # std: (N*A) # formula: # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) + # ln(\sigma_2/\sigma_1) numerator = tf.square(old_means - new_means) + \ tf.square(old_std) - tf.square(new_std) denominator = 2 * tf.square(new_std) + 1e-8 return tf.reduce_sum( numerator / denominator + new_log_stds - old_log_stds, reduction_indices=-1)
def decode_bboxes(tcoords, anchors): var_x, var_y, var_w, var_h = config['prior_variance'] t_x = tcoords[:, 0]*var_x t_y = tcoords[:, 1]*var_y t_w = tcoords[:, 2]*var_w t_h = tcoords[:, 3]*var_h a_w = anchors[:, 2] a_h = anchors[:, 3] a_x = anchors[:, 0]+a_w/2 a_y = anchors[:, 1]+a_h/2 x = t_x*a_w + a_x y = t_y*a_h + a_y w = tf.exp(t_w)*a_w h = tf.exp(t_h)*a_h x1 = tf.maximum(0., x - w/2) y1 = tf.maximum(0., y - h/2) x2 = tf.minimum(1., w + x1) y2 = tf.minimum(1., h + y1) return tf.stack([y1, x1, y2, x2], axis=1)
def __init__(self, name, shape, initial_stdev = 2.0, initial_prec_a = 5.0, initial_prec_b = 1.0, a0 = 1.0, b0 = 1.0, fixed_prec = False, mean_init_std = None): if mean_init_std is None: mean_init_std = 1.0 / np.sqrt(shape[-1]) with tf.variable_scope(name) as scope: #self.mean = tf.get_variable(name="mean", shape=shape, initializer=tf.contrib.layers.xavier_initializer(), dtype = tf.float32) #self.var = tf.Variable(initial_var * np.ones(shape), name = name + ".var", dtype = tf.float32) self.mean = tf.Variable(tf.random_uniform(shape, minval=-mean_init_std, maxval=mean_init_std)) self.logvar = tf.Variable(np.log(initial_stdev**2.0) * np.ones(shape), name = "logvar", dtype = tf.float32) if fixed_prec: self.prec_a = tf.constant(initial_prec_a * np.ones(shape[-1]), name = "prec_a", dtype = tf.float32) self.prec_b = tf.constant(initial_prec_b * np.ones(shape[-1]), name = "prec_b", dtype = tf.float32) else: self.prec_a = tf.Variable(initial_prec_a * np.ones(shape[-1]), name = "prec_a", dtype = tf.float32) self.prec_b = tf.Variable(initial_prec_b * np.ones(shape[-1]), name = "prec_b", dtype = tf.float32) self.prec = tf.div(self.prec_a, self.prec_b, name = "prec") self.var = tf.exp(self.logvar, name = "var") self.a0 = a0 self.b0 = b0 self.shape = shape
def __init__(self, name, shape, initial_stdev = 2.0, initial_prec = 5.0, a0 = 1.0, b0 = 1.0): mean_std = 1.0 / np.sqrt(shape[-1]) with tf.variable_scope(name) as scope: self.mean = tf.Variable(tf.random_uniform(shape, minval=-mean_std, maxval=mean_std)) self.logvar = tf.Variable(np.log(initial_stdev**2.0) * np.ones(shape), name = "logvar", dtype = tf.float32) self.prec = np.repeat(initial_prec, shape[-1]) self.prec_ph= tf.placeholder(shape=shape[-1], name="prec", dtype = tf.float32) self.var = tf.exp(self.logvar, name = "var") self.a0 = a0 self.b0 = b0 self.shape = shape # def prec_div(self): # return - tf.reduce_sum(gammaPrior(self.prec_a, self.prec_b, self.a0, self.b0)) ## outputs E_q[ log N( x | 0, prec^-1) ] + Entropy(q(x)) ## where x is the normally distributed variable
def gauss_log_prob(mu, logstd, x): """ Used for computing the log probability, following the formula for the multivariate Gaussian density. All the inputs should have shape (n,a). The `gp_na` contains component-wise probabilitiles, then the reduce_sum results in a tensor of size (n,) which contains the log probability for each of the n elements. (We later perform a mean on this.) Also, the 2*pi part needs 1/2, but doesn't need the sum over the number of components (# of actions) because of the reduce sum here. Finally, logstd doesn't need a 1/2 constant because log(\sigma_i^2) will bring the 2 over. This formula generalizes for an arbitrary number of actions, BUT it assumes that the covariance matrix is diagonal. """ var_na = tf.exp(2*logstd) gp_na = -tf.square(x - mu)/(2*var_na) - 0.5*tf.log(tf.constant(2*np.pi)) - logstd return tf.reduce_sum(gp_na, axis=[1])
def gauss_KL(mu1, logstd1, mu2, logstd2): """ Returns KL divergence among two multivariate Gaussians, component-wise. It assumes the covariance matrix is diagonal. All inputs have shape (n,a). It is not necessary to know the number of actions because reduce_sum will sum over this to get the `d` constant offset. The part consisting of the trace in the formula is blended with the mean difference squared due to the common "denominator" of var2_na. This forumula generalizes for an arbitrary number of actions. I think mu2 and logstd2 should represent the policy before the update. Returns the KL divergence for each of the n components in the minibatch, then we do a reduce_mean outside this. """ var1_na = tf.exp(2.*logstd1) var2_na = tf.exp(2.*logstd2) tmp_matrix = 2.*(logstd2 - logstd1) + (var1_na + tf.square(mu1-mu2))/var2_na - 1 kl_n = tf.reduce_sum(0.5 * tmp_matrix, axis=[1]) # Don't forget the 1/2 !! assert_op = tf.Assert(tf.reduce_all(kl_n >= -0.0000001), [kl_n]) with tf.control_dependencies([assert_op]): kl_n = tf.identity(kl_n) return kl_n
def vae(observed, n, n_x, n_z, n_k, tau, n_particles, relaxed=False): with zs.BayesianNet(observed=observed) as model: z_stacked_logits = tf.zeros([n, n_z, n_k]) if relaxed: z = zs.ExpConcrete('z', tau, z_stacked_logits, n_samples=n_particles, group_ndims=1) z = tf.exp(tf.reshape(z, [n_particles, n, n_z * n_k])) else: z = zs.OnehotCategorical( 'z', z_stacked_logits, n_samples=n_particles, group_ndims=1, dtype=tf.float32) z = tf.reshape(z, [n_particles, n, n_z * n_k]) lx_z = tf.layers.dense(z, 200, activation=tf.tanh) lx_z = tf.layers.dense(lx_z, 200, activation=tf.tanh) x_logits = tf.layers.dense(lx_z, n_x) x = zs.Bernoulli('x', x_logits, group_ndims=1) return model
def testGetBackwardOpsSplit(self): # a -> b -> c # \-> d a = tf.placeholder(tf.float32) b = tf.exp(a) c = tf.log(b) d = tf.negative(b) self.assertEqual(get_backward_ops([d]), [a.op, b.op, d.op]) self.assertEqual(get_backward_ops([c]), [a.op, b.op, c.op]) self.assertEqual( get_backward_ops([c, d]), [a.op, b.op, c.op, d.op]) self.assertEqual(get_backward_ops([b, d]), [a.op, b.op, d.op]) self.assertEqual(get_backward_ops([a, d]), [a.op, b.op, d.op]) self.assertEqual( get_backward_ops([c, d], treat_as_inputs=[b]), [c.op, d.op]) self.assertEqual( get_backward_ops([c], treat_as_inputs=[d]), [a.op, b.op, c.op])
def test_Normal(self): with BayesianNet(): mean = tf.zeros([2, 3]) logstd = tf.zeros([2, 3]) std = tf.exp(logstd) n_samples = tf.placeholder(tf.int32, shape=[]) group_ndims = tf.placeholder(tf.int32, shape=[]) a = Normal('a', mean, logstd=logstd, n_samples=n_samples, group_ndims=group_ndims) b = Normal('b', mean, std=std, n_samples=n_samples, group_ndims=group_ndims) for st in [a, b]: sample_ops = set(get_backward_ops(st.tensor)) for i in [mean, logstd, n_samples]: self.assertTrue(i.op in sample_ops) log_p = st.log_prob(np.ones([2, 3])) log_p_ops = set(get_backward_ops(log_p)) for i in [mean, logstd, group_ndims]: self.assertTrue(i.op in log_p_ops) self.assertTrue(a.get_shape()[1:], mean.get_shape())
def get_acceptance_rate(q, p, new_q, new_p, log_posterior, mass, data_axes): old_hamiltonian, old_log_prob = hamiltonian( q, p, log_posterior, mass, data_axes) new_hamiltonian, new_log_prob = hamiltonian( new_q, new_p, log_posterior, mass, data_axes) old_log_prob = tf.check_numerics( old_log_prob, 'HMC: old_log_prob has numeric errors! Try better initialization.') acceptance_rate = tf.exp( tf.minimum(-new_hamiltonian + old_hamiltonian, 0.0)) is_finite = tf.logical_and(tf.is_finite(acceptance_rate), tf.is_finite(new_log_prob)) acceptance_rate = tf.where(is_finite, acceptance_rate, tf.zeros_like(acceptance_rate)) return old_hamiltonian, new_hamiltonian, old_log_prob, new_log_prob, \ acceptance_rate
def tune(self, acceptance_rate, fresh_start): def adapt_stepsize(): new_step = tf.assign(self.step, (1 - fresh_start) * self.step + 1) rate1 = tf.div(1.0, new_step + self.t0) new_h_bar = tf.assign( self.h_bar, (1 - fresh_start) * (1 - rate1) * self.h_bar + rate1 * (self.delta - acceptance_rate)) log_epsilon = self.mu - tf.sqrt(new_step) / self.gamma * new_h_bar rate = tf.pow(new_step, -self.kappa) new_log_epsilon_bar = tf.assign( self.log_epsilon_bar, rate * log_epsilon + (1 - fresh_start) * (1 - rate) * self.log_epsilon_bar) with tf.control_dependencies([new_log_epsilon_bar]): new_log_epsilon = tf.identity(log_epsilon) return tf.exp(new_log_epsilon) c = tf.cond(self.adapt_step_size, adapt_stepsize, lambda: tf.exp(self.log_epsilon_bar)) return c
def log_sum_exp(x, axis=None, keep_dims=False): """ Deprecated: Use tf.reduce_logsumexp(). Tensorflow numerically stable log sum of exps across the `axis`. :param x: A Tensor or numpy array. :param axis: An int or list or tuple. The dimensions to reduce. If `None` (the default), reduces all dimensions. :param keep_dims: Bool. If true, retains reduced dimensions with length 1. Default to be False. :return: A Tensor after the computation of log sum exp along given axes of x. """ x = tf.cast(x, dtype=tf.float32) x_max = tf.reduce_max(x, axis=axis, keep_dims=True) ret = tf.log(tf.reduce_sum(tf.exp(x - x_max), axis=axis, keep_dims=True)) + x_max if not keep_dims: ret = tf.reduce_sum(ret, axis=axis) return ret
def log_mean_exp(x, axis=None, keep_dims=False): """ Tensorflow numerically stable log mean of exps across the `axis`. :param x: A Tensor or numpy array. :param axis: An int or list or tuple. The dimensions to reduce. If `None` (the default), reduces all dimensions. :param keep_dims: Bool. If true, retains reduced dimensions with length 1. Default to be False. :return: A Tensor after the computation of log mean exp along given axes of x. """ x = tf.cast(x, dtype=tf.float32) x_max = tf.reduce_max(x, axis=axis, keep_dims=True) ret = tf.log(tf.reduce_mean(tf.exp(x - x_max), axis=axis, keep_dims=True)) + x_max if not keep_dims: ret = tf.reduce_mean(ret, axis=axis) return ret
def decode(roi, deltas): with tf.name_scope('BoundingBoxTransform/decode'): (roi_width, roi_height, roi_urx, roi_ury) = get_width_upright(roi) dx, dy, dw, dh = tf.split(deltas, 4, axis=1) pred_ur_x = dx * roi_width + roi_urx pred_ur_y = dy * roi_height + roi_ury pred_w = tf.exp(dw) * roi_width pred_h = tf.exp(dh) * roi_height bbox_x1 = pred_ur_x - 0.5 * pred_w bbox_y1 = pred_ur_y - 0.5 * pred_h # This -1. extra is different from reference implementation. bbox_x2 = pred_ur_x + 0.5 * pred_w - 1. bbox_y2 = pred_ur_y + 0.5 * pred_h - 1. bboxes = tf.concat( [bbox_x1, bbox_y1, bbox_x2, bbox_y2], axis=1) return bboxes
def _create_network(self): # Initialize autoencode network weights and biases network_weights = self._initialize_weights(**self.network_architecture) # Use recognition network to determine mean and # (log) variance of Gaussian distribution in latent # space self.z_mean, self.z_log_sigma_sq = \ self._recognition_network(network_weights["weights_recog"], network_weights["biases_recog"]) # Draw one sample z from Gaussian distribution n_z = self.network_architecture["n_z"] eps = tf.random_normal((self.batch_size, n_z), 0, 1, dtype=tf.float32) # z = mu + sigma*epsilon self.z = tf.add(self.z_mean, tf.mul(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps)) # Use generator to determine mean of # Bernoulli distribution of reconstructed input self.x_reconstr_mean = \ self._generator_network(network_weights["weights_gener"], network_weights["biases_gener"])
def _setup_training(self): """ Set up a data flow graph for fine tuning """ layer_num = self.layer_num act_func = ACTIVATE_FUNC[self.activate_func] sigma = self.sigma lr = self.learning_rate weights = self.weights biases = self.biases data1, data2 = self.data1, self.data2 batch_size = self.batch_size optimizer = OPTIMIZER[self.optimizer] with tf.name_scope("training"): s1 = self._obtain_score(data1, weights, biases, act_func, "1") s2 = self._obtain_score(data2, weights, biases, act_func, "2") with tf.name_scope("cost"): sum_cost = tf.reduce_sum(tf.log(1 + tf.exp(-sigma*(s1-s2)))) self.cost = cost = sum_cost / batch_size self.optimize = optimizer(lr).minimize(cost) for n in range(layer_num-1): tf.histogram_summary("weight"+str(n), weights[n]) tf.histogram_summary("bias"+str(n), biases[n]) tf.scalar_summary("cost", cost)
def _setup_prediction(self): input_dim = self.input_dim self.input1 = inp1 = tf.placeholder("float", shape=[None,input_dim], name="input1") self.input2 = inp2 = tf.placeholder("float", shape=[None,input_dim], name="input2") weights = self.weights biases = self.biases act_func = ACTIVATE_FUNC[self.activate_func] sigma = self.sigma with tf.name_scope("prediction"): s1 = self._obtain_score(inp1, weights, biases, act_func, "1") s2 = self._obtain_score(inp2, weights, biases, act_func, "2") self.score = s1 with tf.name_scope("probability"): self.prob = 1 / (1 + tf.exp(-sigma*(s1-s2)))
def get_e_qval(self, observations, policy): if isinstance(policy, StochasticPolicy): agent_info = policy.dist_info(observations) means, log_stds = agent_info['mean'], agent_info['log_std'] if self.eqf_use_full_qf and self.eqf_sample_size > 1: observations = np.repeat(observations, self.eqf_sample_size, axis=0) means = np.repeat(means, self.eqf_sample_size, axis=0) stds = np.repeat(np.exp(log_stds), self.eqf_sample_size, axis=0) randoms = np.random.randn(*(means)) actions = means + stds * randoms all_qvals = self.get_qval(observations, actions) qvals = np.mean(all_qvals.reshape((-1,self.eqf_sample_size)),axis=1) else: qvals = self.get_qval(observations, means) else: actions, _ = policy.get_actions(observations) qvals = self.get_qval(observations, actions) return qvals
def generate_proposals(coefficients, anchors): '''Generate proposals from static anchors and normalizing coefficients coefficients: N x 4 tensor: N x (ty, tx, th, tw) anchors: N x 4 tensor with boxes N x (y, x, h, w) anchors contains x,y of box _center_ while returned tensor x,y coordinates are top-left corner. returns: N x 4 tensor with bounding box proposals ''' y_coef, x_coef, h_coef, w_coef = tf.unstack(coefficients, axis=1) y_anchor, x_anchor, h_anchor, w_anchor = tf.unstack(anchors, axis=1) w = w_anchor * tf.exp(w_coef) h = h_anchor * tf.exp(h_coef) x = x_anchor + x_coef * w_anchor y = y_anchor + y_coef * h_anchor proposals = tf.stack([y, x, h, w], axis=1) return proposals
def pos_loss_pred(self, i, pos_embeddings, pos_logit, NUM_POS, gold_pos, pos_trainables): if self.args.no_pos: pos_emb = tf.nn.embedding_lookup(pos_embeddings, gold_pos[i]) if self.train: return 0, pos_emb else: return tf.gather(gold_pos[i], tf.range(1, self.sent_length)), pos_emb else: pos_logit = pos_logit[1:] log_partition = tf.reduce_logsumexp(pos_logit, [1]) pos_pred = tf.exp(pos_logit - tf.reshape(log_partition, (-1, 1))) pos_emb = tf.concat([tf.reshape(tf.nn.embedding_lookup(pos_embeddings, NUM_POS), (1, -1)), tf.matmul(pos_pred, pos_trainables)], 0) if self.train: loss = tf.reduce_sum(tf.gather(log_partition, tf.range(self.sent_lengths[i]-1)) - tf.gather(tf.reshape(pos_logit, [-1]), tf.range(self.sent_lengths[i]-1) * NUM_POS + tf.gather(gold_pos[i], tf.range(1, self.sent_lengths[i])))) return loss, pos_emb else: return tf.cast(tf.argmax(pos_pred, 1), tf.int32), pos_emb
def build_model(self): self.x = tf.placeholder(tf.float32, [self.reader.vocab_size], name="input") self.x_idx = tf.placeholder(tf.int32, [None], name="x_idx") self.build_encoder() self.build_generator() # Kullback Leibler divergence self.e_loss = -0.5 * tf.reduce_sum(1 + self.log_sigma_sq - tf.square(self.mu) - tf.exp(self.log_sigma_sq)) # Log likelihood self.g_loss = -tf.reduce_sum(tf.log(tf.gather(self.p_x_i, self.x_idx) + 1e-10)) self.loss = self.e_loss + self.g_loss self.encoder_var_list, self.generator_var_list = [], [] for var in tf.trainable_variables(): if "encoder" in var.name: self.encoder_var_list.append(var) elif "generator" in var.name: self.generator_var_list.append(var) # optimizer for alternative update self.optim_e = tf.train.AdamOptimizer(learning_rate=self.lr) \ .minimize(self.e_loss, global_step=self.step, var_list=self.encoder_var_list) self.optim_g = tf.train.AdamOptimizer(learning_rate=self.lr) \ .minimize(self.g_loss, global_step=self.step, var_list=self.generator_var_list) # optimizer for one shot update self.optim = tf.train.AdamOptimizer(learning_rate=self.lr) \ .minimize(self.loss, global_step=self.step) _ = tf.scalar_summary("encoder loss", self.e_loss) _ = tf.scalar_summary("generator loss", self.g_loss) _ = tf.scalar_summary("total loss", self.loss)
def safe_exp(w, thresh): """Safe exponential function for tensors.""" slope = np.exp(thresh) with tf.variable_scope('safe_exponential'): lin_region = tf.to_float(w > thresh) lin_out = slope*(w - thresh + 1.) exp_out = tf.exp(w) out = lin_region*lin_out + (1.-lin_region)*exp_out return out
def compute_kernel(x, y): x_size = tf.shape(x)[0] y_size = tf.shape(y)[0] dim = tf.shape(x)[1] tiled_x = tf.tile(tf.reshape(x, tf.stack([x_size, 1, dim])), tf.stack([1, y_size, 1])) tiled_y = tf.tile(tf.reshape(y, tf.stack([1, y_size, dim])), tf.stack([x_size, 1, 1])) return tf.exp(-tf.reduce_mean(tf.square(tiled_x - tiled_y), axis=2) / tf.cast(dim, tf.float32))