我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.square()。
def calculate_loss_mix2(self, predictions, predictions_class, predictions_encoder, labels, **unused_params): with tf.name_scope("loss_mix2"): float_labels = tf.cast(labels, tf.float32) float_encoders = float_labels for i in range(FLAGS.encoder_layers): var_i = np.loadtxt(FLAGS.autoencoder_dir+'autoencoder_layer%d.model' % i) weight_i = tf.constant(var_i[:-1,:],dtype=tf.float32) bias_i = tf.reshape(tf.constant(var_i[-1,:],dtype=tf.float32),[-1]) float_encoders = tf.nn.xw_plus_b(float_encoders,weight_i,bias_i) if i<FLAGS.encoder_layers-1: float_encoders = tf.nn.relu(float_encoders) else: hidden_mean = tf.reduce_mean(float_encoders,axis=1,keep_dims=True) hidden_std = tf.sqrt(tf.reduce_mean(tf.square(float_encoders-hidden_mean),axis=1,keep_dims=True)) float_encoders = (float_encoders-hidden_mean)/(hidden_std+1e-6) #float_encoders = tf.nn.sigmoid(float_encoders) cross_entropy_encoder = 0.1*self.calculate_mseloss(predictions_encoder,float_encoders) cross_entropy_loss = self.calculate_loss(predictions,labels) return cross_entropy_encoder+cross_entropy_loss, float_encoders #return cross_entropy_encoder, float_encoders
def build_model(self): self.q = tf.placeholder(tf.float32, [self.reader.vocab_size], name="question") self.a = tf.placeholder(tf.float32, [self.reader.vocab_size], name="answer") self.build_encoder() self.build_decoder() # Kullback Leibler divergence self.e_loss = -0.5 * tf.reduce_sum(1 + self.log_sigma_sq - tf.square(self.mu) - tf.exp(self.log_sigma_sq)) # Log likelihood self.g_loss = tf.reduce_sum(tf.log(self.p_x_i)) self.loss = tf.reduce_mean(self.e_loss + self.g_loss) self.optim = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(-self.loss) _ = tf.scalar_summary("encoder loss", self.e_loss) _ = tf.scalar_summary("decoder loss", self.g_loss) _ = tf.scalar_summary("loss", self.loss)
def log_variable(variable, gradient=None): r''' We introduce a function for logging a tensor variable's current state. It logs scalar values for the mean, standard deviation, minimum and maximum. Furthermore it logs a histogram of its state and (if given) of an optimization gradient. ''' name = variable.name mean = tf.reduce_mean(variable) tf.summary.scalar(name='%s/mean' % name, tensor=mean) tf.summary.scalar(name='%s/sttdev' % name, tensor=tf.sqrt(tf.reduce_mean(tf.square(variable - mean)))) tf.summary.scalar(name='%s/max' % name, tensor=tf.reduce_max(variable)) tf.summary.scalar(name='%s/min' % name, tensor=tf.reduce_min(variable)) tf.summary.histogram(name=name, values=variable) if gradient is not None: if isinstance(gradient, tf.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: tf.summary.histogram(name='%s/gradients' % name, values=grad_values)
def batchnorm(x, name, phase, updates, gamma=0.96): k = x.get_shape()[1] runningmean = tf.get_variable(name+"/mean", shape=[1, k], initializer=tf.constant_initializer(0.0), trainable=False) runningvar = tf.get_variable(name+"/var", shape=[1, k], initializer=tf.constant_initializer(1e-4), trainable=False) testy = (x - runningmean) / tf.sqrt(runningvar) mean_ = mean(x, axis=0, keepdims=True) var_ = mean(tf.square(x), axis=0, keepdims=True) std = tf.sqrt(var_) trainy = (x - mean_) / std updates.extend([ tf.assign(runningmean, runningmean * gamma + mean_ * (1 - gamma)), tf.assign(runningvar, runningvar * gamma + var_ * (1 - gamma)) ]) y = switch(phase, trainy, testy) out = y * tf.get_variable(name+"/scaling", shape=[1, k], initializer=tf.constant_initializer(1.0), trainable=True)\ + tf.get_variable(name+"/translation", shape=[1,k], initializer=tf.constant_initializer(0.0), trainable=True) return out # ================================================================ # Mathematical utils # ================================================================
def huber_loss(x, delta=1.0): """Reference: https://en.wikipedia.org/wiki/Huber_loss""" return tf.where( tf.abs(x) < delta, tf.square(x) * 0.5, delta * (tf.abs(x) - 0.5 * delta) ) # ================================================================ # Basic Stuff # ================================================================ # ================================================================ # Theano-like Function # ================================================================ # ================================================================ # Optimizer utils # ================================================================
def batchnormalize(X, eps=1e-8, g=None, b=None): if X.get_shape().ndims == 4: mean = tf.reduce_mean(X, [0,1,2]) std = tf.reduce_mean( tf.square(X-mean), [0,1,2] ) X = (X-mean) / tf.sqrt(std+eps) if g is not None and b is not None: g = tf.reshape(g, [1,1,1,-1]) b = tf.reshape(b, [1,1,1,-1]) X = X*g + b elif X.get_shape().ndims == 2: mean = tf.reduce_mean(X, 0) std = tf.reduce_mean(tf.square(X-mean), 0) X = (X-mean) / tf.sqrt(std+eps) if g is not None and b is not None: g = tf.reshape(g, [1,-1]) b = tf.reshape(b, [1,-1]) X = X*g + b else: raise NotImplementedError return X
def Grad_Penalty(real_data,fake_data,Discriminator,config): ''' Implemention from "Improved training of Wasserstein" Interpolation based estimation of the gradient of the discriminator. Used to penalize the derivative rather than explicitly constrain lipschitz. ''' batch_size=config.batch_size LAMBDA=config.lambda_W n_hidden=config.critic_hidden_size alpha = tf.random_uniform([batch_size,1],0.,1.) interpolates = alpha*real_data + ((1-alpha)*fake_data)#Could do more if not fixed batch_size disc_interpolates = Discriminator(interpolates,batch_size,n_hidden=n_hidden,config=config, reuse=True)[1]#logits gradients = tf.gradients(disc_interpolates,[interpolates])[0]#orig slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes-1)**2) grad_cost = LAMBDA*gradient_penalty return grad_cost,slopes
def Minibatch_Discriminator(input, num_kernels=100, dim_per_kernel=5, init=False, name='MD'): num_inputs=df_dim*4 theta = tf.get_variable(name+"/theta",[num_inputs, num_kernels, dim_per_kernel], initializer=tf.random_normal_initializer(stddev=0.05)) log_weight_scale = tf.get_variable(name+"/lws",[num_kernels, dim_per_kernel], initializer=tf.constant_initializer(0.0)) W = tf.mul(theta, tf.expand_dims(tf.exp(log_weight_scale)/tf.sqrt(tf.reduce_sum(tf.square(theta),0)),0)) W = tf.reshape(W,[-1,num_kernels*dim_per_kernel]) x = input x=tf.reshape(x, [batchsize,num_inputs]) activation = tf.matmul(x, W) activation = tf.reshape(activation,[-1,num_kernels,dim_per_kernel]) abs_dif = tf.mul(tf.reduce_sum(tf.abs(tf.sub(tf.expand_dims(activation,3),tf.expand_dims(tf.transpose(activation,[1,2,0]),0))),2), 1-tf.expand_dims(tf.constant(np.eye(batchsize),dtype=np.float32),1)) f = tf.reduce_sum(tf.exp(-abs_dif),2)/tf.reduce_sum(tf.exp(-abs_dif)) print(f.get_shape()) print(input.get_shape()) return tf.concat(1,[x, f])
def layer_norm_all(h, base, num_units, scope): # Layer Norm (faster version) # # Performs layer norm on multiple base at once (ie, i, g, j, o for lstm) # # Reshapes h in to perform layer norm in parallel with tf.variable_scope(scope): h_reshape = tf.reshape(h, [-1, base, num_units]) mean = tf.reduce_mean(h_reshape, [2], keep_dims=True) var = tf.reduce_mean(tf.square(h_reshape - mean), [2], keep_dims=True) epsilon = tf.constant(1e-3) rstd = tf.rsqrt(var + epsilon) h_reshape = (h_reshape - mean) * rstd # reshape back to original h = tf.reshape(h_reshape, [-1, base * num_units]) alpha = tf.get_variable('layer_norm_alpha', [4 * num_units], initializer=tf.constant_initializer(1.0), dtype=tf.float32) bias = tf.get_variable('layer_norm_bias', [4 * num_units], initializer=tf.constant_initializer(0.0), dtype=tf.float32) return (h * alpha) + bias
def shrink_bgest(r,rvar,theta): """Bernoulli-Gaussian MMSE estimator Perform MMSE estimation E[x|r] for x ~ BernoulliGaussian(lambda,xvar1) r|x ~ Normal(x,rvar) The parameters theta[0],theta[1] represent The variance of non-zero x[i] xvar1 = abs(theta[0]) The probability of nonzero x[i] lamba = 1/(exp(theta[1])+1) """ xvar1 = abs(theta[...,0]) loglam = theta[...,1] # log(1/lambda - 1) beta = 1/(1+rvar/xvar1) r2scale = r*r*beta/rvar rho = tf.exp(loglam - .5*r2scale ) * tf.sqrt(1 +xvar1/rvar) rho1 = rho+1 xhat = beta*r/rho1 dxdr = beta*((1+rho*(1+r2scale) ) / tf.square( rho1 )) dxdr = tf.reduce_mean(dxdr,0) return (xhat,dxdr)
def shrink_spline(r,rvar,theta): """ Spline-based shrinkage function """ scale = theta[0]*tf.sqrt(rvar) rs = tf.sign(r) ar = tf.abs(r/scale) ar2 = tf.square(ar) ar3 = ar*ar2 reg1 = tf.to_float(ar<1) reg2 = tf.to_float(ar<2)-reg1 ar_m2 = 2-ar ar_m2_p2 = tf.square(ar_m2) ar_m2_p3 = ar_m2*ar_m2_p2 beta3 = ( (2./3 - ar2 + .5*ar3)*reg1 + (1./6*(ar_m2_p3))*reg2 ) xhat = r*(theta[1] + theta[2]*beta3) return (xhat,auto_gradients(xhat,r))
def __init__(self, preds, labels, model, num_nodes, pos_weight, norm): preds_sub = preds labels_sub = labels self.cost = norm * tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=preds_sub, targets=labels_sub, pos_weight=pos_weight)) self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) # Adam Optimizer # Latent loss self.log_lik = self.cost self.kl = (0.5 / num_nodes) * tf.reduce_mean(tf.reduce_sum(1 + 2 * model.z_log_std - tf.square(model.z_mean) - tf.square(tf.exp(model.z_log_std)), 1)) self.cost -= self.kl self.opt_op = self.optimizer.minimize(self.cost) self.grads_vars = self.optimizer.compute_gradients(self.cost) self.correct_prediction = tf.equal(tf.cast(tf.greater_equal(tf.sigmoid(preds_sub), 0.5), tf.int32), tf.cast(labels_sub, tf.int32)) self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
def smooth_l1_loss(offsets, gt_offsets, scope=None): """ Smooth L1 loss between offsets and encoded_gt ARGS offsets: [m?, 5], predicted offsets for one example gt_offsets: [m?, 5], correponding groundtruth offsets RETURN loss: scalar """ with tf.variable_scope(scope or 'smooth_l1_loss'): gt_offsets = tf.stop_gradient(gt_offsets) diff = tf.abs(offsets - gt_offsets) lesser_mask = tf.cast(tf.less(diff, 1.0), tf.float32) larger_mask = 1.0 - lesser_mask losses = (0.5 * tf.square(diff)) * lesser_mask + (diff - 0.5) * larger_mask return tf.reduce_sum(losses, 1)
def negative_l2_distance(x1, x2, axis=1): """ Negative L2 Distance. .. math:: L = - \\sqrt{\\sum_i (x1_i - x2_i)^2} Args: x1: First term. x2: Second term. axis: Reduction Indices. Returns: Similarity Value. """ distance = tf.sqrt(tf.reduce_sum(tf.square(x1 - x2), axis=axis)) return - distance
def negative_square_l2_distance(x1, x2, axis=1): """ Negative Square L2 Distance. .. math:: L = - \\sum_i (x1_i - x2_i)^2 Args: x1: First term. x2: Second term. axis: Reduction Indices. Returns: Similarity Value. """ distance = tf.reduce_sum(tf.square(x1 - x2), axis=axis) return - distance
def adam_updates(params, cost_or_grads, lr=0.001, mom1=0.9, mom2=0.999): ''' Adam optimizer ''' updates = [] if type(cost_or_grads) is not list: grads = tf.gradients(cost_or_grads, params) else: grads = cost_or_grads t = tf.Variable(1., 'adam_t') for p, g in zip(params, grads): mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg') if mom1 > 0: v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v') v_t = mom1 * v + (1. - mom1) * g v_hat = v_t / (1. - tf.pow(mom1, t)) updates.append(v.assign(v_t)) else: v_hat = g mg_t = mom2 * mg + (1. - mom2) * tf.square(g) mg_hat = mg_t / (1. - tf.pow(mom2, t)) g_t = v_hat / tf.sqrt(mg_hat + 1e-8) p_t = p - lr * g_t updates.append(mg.assign(mg_t)) updates.append(p.assign(p_t)) updates.append(t.assign_add(1)) return tf.group(*updates)
def add_param(self, spec, shape, name, **kwargs): param = self.add_param_plain(spec, shape, name, **kwargs) if name is not None and name.startswith("W") and self.weight_normalization: # Hacky: check if the parameter is a weight matrix. If so, apply weight normalization if len(param.get_shape()) == 2: v = param g = self.add_param_plain(tf.ones_initializer, (shape[1],), name=name + "_wn/g") param = v * (tf.reshape(g, (1, -1)) / tf.sqrt(tf.reduce_sum(tf.square(v), 0, keep_dims=True))) elif len(param.get_shape()) == 4: v = param g = self.add_param_plain(tf.ones_initializer, (shape[3],), name=name + "_wn/g") param = v * (tf.reshape(g, (1, 1, 1, -1)) / tf.sqrt(tf.reduce_sum(tf.square(v), [0, 1, 2], keep_dims=True))) else: raise NotImplementedError return param
def kl(self, old_dist_info, new_dist_info): old_means = old_dist_info["mean"] old_log_stds = old_dist_info["log_std"] new_means = new_dist_info["mean"] new_log_stds = new_dist_info["log_std"] """ Compute the KL divergence of two multivariate Gaussian distribution with diagonal covariance matrices """ old_std = np.exp(old_log_stds) new_std = np.exp(new_log_stds) # means: (N*A) # std: (N*A) # formula: # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) + # ln(\sigma_2/\sigma_1) numerator = np.square(old_means - new_means) + \ np.square(old_std) - np.square(new_std) denominator = 2 * np.square(new_std) + 1e-8 return np.sum( numerator / denominator + new_log_stds - old_log_stds, axis=-1) # more lossy version # return TT.sum( # numerator / denominator + TT.log(new_std) - TT.log(old_std ), axis=-1)
def kl_sym(self, old_dist_info_vars, new_dist_info_vars): old_means = old_dist_info_vars["mean"] old_log_stds = old_dist_info_vars["log_std"] new_means = new_dist_info_vars["mean"] new_log_stds = new_dist_info_vars["log_std"] """ Compute the KL divergence of two multivariate Gaussian distribution with diagonal covariance matrices """ old_std = tf.exp(old_log_stds) new_std = tf.exp(new_log_stds) # means: (N*A) # std: (N*A) # formula: # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) + # ln(\sigma_2/\sigma_1) numerator = tf.square(old_means - new_means) + \ tf.square(old_std) - tf.square(new_std) denominator = 2 * tf.square(new_std) + 1e-8 return tf.reduce_sum( numerator / denominator + new_log_stds - old_log_stds, reduction_indices=-1)
def test_sgld_sparse(self): tf.reset_default_graph() z = tf.Variable(tf.zeros((5, 2)), dtype=tf.float32) idx = tf.placeholder(tf.int32) zi = tf.gather(z, idx) zloss = tf.square(zi - [10.0, 5.0]) sgld = SGLD(learning_rate=0.4) train_op_sgld = sgld.minimize(zloss) sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) self.assertTrue(np.alltrue(sess.run(z) == 0.0)) sess.run(train_op_sgld, feed_dict={idx: 3}) zh = sess.run(z) self.assertTrue(np.alltrue(zh[[0, 1, 2, 4], :] == 0.0)) self.assertTrue(zh[3, 0] > 0)
def test_psgld_sparse(self): tf.reset_default_graph() z = tf.Variable(tf.zeros((5, 2)), dtype=tf.float32) idx = tf.placeholder(tf.int32) zi = tf.gather(z, idx) zloss = tf.square(zi - [10.0, 5.0]) psgld = pSGLD(learning_rate=0.4) train_op_psgld = psgld.minimize(zloss) sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) self.assertTrue(np.alltrue(sess.run(z) == 0.0)) sess.run(train_op_psgld, feed_dict={idx: 3}) zh = sess.run(z) self.assertTrue(np.alltrue(zh[[0, 1, 2, 4], :] == 0.0)) self.assertTrue(zh[3, 0] > 0)
def gradient_penalty(self): config = self.config gan = self.gan gradient_penalty = config.gradient_penalty if has_attr(gan.inputs, 'gradient_penalty_label'): x = gan.inputs.gradient_penalty_label else: x = gan.inputs.x generator = self.generator or gan.generator g = generator.sample discriminator = self.discriminator or gan.discriminator shape = [1 for t in g.get_shape()] shape[0] = gan.batch_size() uniform_noise = tf.random_uniform(shape=shape,minval=0.,maxval=1.) print("[gradient penalty] applying x:", x, "g:", g, "noise:", uniform_noise) interpolates = x + uniform_noise * (g - x) reused_d = discriminator.reuse(interpolates) gradients = tf.gradients(reused_d, [interpolates])[0] penalty = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=1)) penalty = tf.reduce_mean(tf.square(penalty - 1.)) return float(gradient_penalty) * penalty
def setup_param_noise(self, normalized_obs0): assert self.param_noise is not None # Configure perturbed actor. param_noise_actor = copy(self.actor) param_noise_actor.name = 'param_noise_actor' self.perturbed_actor_tf = param_noise_actor(normalized_obs0) logger.info('setting up param noise') self.perturb_policy_ops = get_perturbed_actor_updates(self.actor, param_noise_actor, self.param_noise_stddev) # Configure separate copy for stddev adoption. adaptive_param_noise_actor = copy(self.actor) adaptive_param_noise_actor.name = 'adaptive_param_noise_actor' adaptive_actor_tf = adaptive_param_noise_actor(normalized_obs0) self.perturb_adaptive_policy_ops = get_perturbed_actor_updates(self.actor, adaptive_param_noise_actor, self.param_noise_stddev) self.adaptive_policy_distance = tf.sqrt(tf.reduce_mean(tf.square(self.actor_tf - adaptive_actor_tf)))
def setup_critic_optimizer(self): logger.info('setting up critic optimizer') normalized_critic_target_tf = tf.clip_by_value(normalize(self.critic_target, self.ret_rms), self.return_range[0], self.return_range[1]) self.critic_loss = tf.reduce_mean(tf.square(self.normalized_critic_tf - normalized_critic_target_tf)) if self.critic_l2_reg > 0.: critic_reg_vars = [var for var in self.critic.trainable_vars if 'kernel' in var.name and 'output' not in var.name] for var in critic_reg_vars: logger.info(' regularizing: {}'.format(var.name)) logger.info(' applying l2 regularization with {}'.format(self.critic_l2_reg)) critic_reg = tc.layers.apply_regularization( tc.layers.l2_regularizer(self.critic_l2_reg), weights_list=critic_reg_vars ) self.critic_loss += critic_reg critic_shapes = [var.get_shape().as_list() for var in self.critic.trainable_vars] critic_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in critic_shapes]) logger.info(' critic shapes: {}'.format(critic_shapes)) logger.info(' critic params: {}'.format(critic_nb_params)) self.critic_grads = U.flatgrad(self.critic_loss, self.critic.trainable_vars, clip_norm=self.clip_norm) self.critic_optimizer = MpiAdam(var_list=self.critic.trainable_vars, beta1=0.9, beta2=0.999, epsilon=1e-08)
def __init__(self, ob_dim, ac_dim): #pylint: disable=W0613 X = tf.placeholder(tf.float32, shape=[None, ob_dim*2+ac_dim*2+2]) # batch of observations vtarg_n = tf.placeholder(tf.float32, shape=[None], name='vtarg') wd_dict = {} h1 = tf.nn.elu(dense(X, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)) h2 = tf.nn.elu(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)) vpred_n = dense(h2, 1, "hfinal", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)[:,0] sample_vpred_n = vpred_n + tf.random_normal(tf.shape(vpred_n)) wd_loss = tf.get_collection("vf_losses", None) loss = U.mean(tf.square(vpred_n - vtarg_n)) + tf.add_n(wd_loss) loss_sampled = U.mean(tf.square(vpred_n - tf.stop_gradient(sample_vpred_n))) self._predict = U.function([X], vpred_n) optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \ clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \ async=1, kfac_update=2, cold_iter=50, \ weight_decay_dict=wd_dict, max_grad_norm=None) vf_var_list = [] for var in tf.trainable_variables(): if "vf" in var.name: vf_var_list.append(var) update_op, self.q_runner = optim.minimize(loss, loss_sampled, var_list=vf_var_list) self.do_update = U.function([X, vtarg_n], update_op) #pylint: disable=E1101 U.initialize() # Initialize uninitialized TF variables
def __init__(self, epsilon=1e-2, shape=()): self._sum = tf.get_variable( dtype=tf.float64, shape=shape, initializer=tf.constant_initializer(0.0), name="runningsum", trainable=False) self._sumsq = tf.get_variable( dtype=tf.float64, shape=shape, initializer=tf.constant_initializer(epsilon), name="runningsumsq", trainable=False) self._count = tf.get_variable( dtype=tf.float64, shape=(), initializer=tf.constant_initializer(epsilon), name="count", trainable=False) self.shape = shape self.mean = tf.to_float(self._sum / self._count) self.std = tf.sqrt( tf.maximum( tf.to_float(self._sumsq / self._count) - tf.square(self.mean) , 1e-2 )) newsum = tf.placeholder(shape=self.shape, dtype=tf.float64, name='sum') newsumsq = tf.placeholder(shape=self.shape, dtype=tf.float64, name='var') newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count') self.incfiltparams = U.function([newsum, newsumsq, newcount], [], updates=[tf.assign_add(self._sum, newsum), tf.assign_add(self._sumsq, newsumsq), tf.assign_add(self._count, newcount)])
def gauss_log_prob(mu, logstd, x): """ Used for computing the log probability, following the formula for the multivariate Gaussian density. All the inputs should have shape (n,a). The `gp_na` contains component-wise probabilitiles, then the reduce_sum results in a tensor of size (n,) which contains the log probability for each of the n elements. (We later perform a mean on this.) Also, the 2*pi part needs 1/2, but doesn't need the sum over the number of components (# of actions) because of the reduce sum here. Finally, logstd doesn't need a 1/2 constant because log(\sigma_i^2) will bring the 2 over. This formula generalizes for an arbitrary number of actions, BUT it assumes that the covariance matrix is diagonal. """ var_na = tf.exp(2*logstd) gp_na = -tf.square(x - mu)/(2*var_na) - 0.5*tf.log(tf.constant(2*np.pi)) - logstd return tf.reduce_sum(gp_na, axis=[1])
def gauss_KL(mu1, logstd1, mu2, logstd2): """ Returns KL divergence among two multivariate Gaussians, component-wise. It assumes the covariance matrix is diagonal. All inputs have shape (n,a). It is not necessary to know the number of actions because reduce_sum will sum over this to get the `d` constant offset. The part consisting of the trace in the formula is blended with the mean difference squared due to the common "denominator" of var2_na. This forumula generalizes for an arbitrary number of actions. I think mu2 and logstd2 should represent the policy before the update. Returns the KL divergence for each of the n components in the minibatch, then we do a reduce_mean outside this. """ var1_na = tf.exp(2.*logstd1) var2_na = tf.exp(2.*logstd2) tmp_matrix = 2.*(logstd2 - logstd1) + (var1_na + tf.square(mu1-mu2))/var2_na - 1 kl_n = tf.reduce_sum(0.5 * tmp_matrix, axis=[1]) # Don't forget the 1/2 !! assert_op = tf.Assert(tf.reduce_all(kl_n >= -0.0000001), [kl_n]) with tf.control_dependencies([assert_op]): kl_n = tf.identity(kl_n) return kl_n
def testGetBackwardOpsChain(self): # a -> b -> c a = tf.placeholder(tf.float32) b = tf.sqrt(a) c = tf.square(b) for n in range(4): for seed_tensors in permutations([a, b, c], n): if c in seed_tensors: truth = [a.op, b.op, c.op] elif b in seed_tensors: truth = [a.op, b.op] elif a in seed_tensors: truth = [a.op] else: truth = [] self.assertEqual(get_backward_ops(seed_tensors), truth) self.assertEqual(get_backward_ops([c], treat_as_inputs=[b]), [c.op]) self.assertEqual( get_backward_ops([b, c], treat_as_inputs=[b]), [c.op]) self.assertEqual( get_backward_ops([a, c], treat_as_inputs=[b]), [a.op, c.op])
def variable_summaries(var, name, collections=None): """Attach a lot of summaries to a Tensor (for TensorBoard visualization). Args: - var: Tensor for variable from which we want to log. - name: Variable name. - collections: List of collections to save the summary to. """ with tf.name_scope(name): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean, collections) num_params = tf.reduce_prod(tf.shape(var)) tf.summary.scalar('num_params', num_params, collections) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev, collections) tf.summary.scalar('max', tf.reduce_max(var), collections) tf.summary.scalar('min', tf.reduce_min(var), collections) tf.summary.histogram('histogram', var, collections) tf.summary.scalar('sparsity', tf.nn.zero_fraction(var), collections)
def pop(tensor, shape): """ Pop art filter :param Tensor tensor: :param list[int] shape: """ images = [] freq = random.randint(1, 3) * 2 ref = _downsample(resample(tensor, shape), shape, [int(shape[0] / (freq * 2)), int(shape[1] / (freq * 2)), shape[2]]) for i in range(freq * freq): image = posterize(ref, random.randint(3, 6)) image = image % tf.random_normal([3], mean=.5, stddev=.25) images.append(image) x, y = point_cloud(freq, distrib=PointDistribution.square, shape=shape, corners=True) out = voronoi(None, shape, diagram_type=VoronoiDiagramType.collage, xy=(x, y, len(x)), nth=random.randint(0, 3), collage_images=images, image_count=4) return outline(out, shape, sobel_func=1)
def loss(self, predictions, real_values): """Return the loss operation between predictions and real_values. Add L2 weight decay term if any. Args: predictions: predicted values real_values: real values Returns: Loss tensor of type float. """ with tf.variable_scope('loss'): # 1/2n \sum^{n}_{i=i}{(x_i - x'_i)^2} mse = tf.divide( tf.reduce_mean( tf.square(tf.subtract(predictions, real_values))), 2., name="mse") tf.add_to_collection(LOSSES, mse) # mse + weight_decay per layer error = tf.add_n(tf.get_collection(LOSSES), name='total_loss') return error
def contrastive_loss(y_pred, y_true, margin = 1.0): """ Contrastive Loss. Computes the constrative loss between y_pred (logits) and y_true (labels). http://yann.lecun.com/exdb/publis/pdf/chopra-05.pdf Sumit Chopra, Raia Hadsell and Yann LeCun (2005). Learning a Similarity Metric Discriminatively, with Application to Face Verification. Arguments: y_pred: `Tensor`. Predicted values. y_true: `Tensor`. Targets (labels). margin: . A self-set parameters that indicate the distance between the expected different identity features. Defaults 1. """ with tf.name_scope("ContrastiveLoss"): dis1 = y_true * tf.square(y_pred) dis2 = (1 - y_true) * tf.square(tf.maximum((margin - y_pred), 0)) return tf.reduce_sum(dis1 +dis2) / 2.
def __init__(self, input_dim, hidden_dim, epoch=250, learning_rate = 0.001): self.epoch = epoch self.learning_rate = learning_rate x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim]) with tf.name_scope("encode"): weights = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32), name="weights") biases = tf.Variable(tf.zeros([hidden_dim]), name="biases") encoded = tf.nn.tanh(tf.matmul(x, weights) + biases) with tf.name_scope("decode"): weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32), name="weights") biases = tf.Variable(tf.zeros([input_dim]), name="biases") decoded = tf.matmul(encoded, weights) + biases self.x = x self.encoded = encoded self.decoded = decoded self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(self.x, self.decoded)))) self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss) self.saver = tf.train.Saver()
def __init__(self, actions, input_dim): self.epsilon = 0.9 self.gamma = 0.01 self.actions = actions output_dim = len(actions) h1_dim = 200 self.x = tf.placeholder(tf.float32, [None, input_dim]) self.y = tf.placeholder(tf.float32, [output_dim]) W1 = tf.Variable(tf.random_normal([input_dim, h1_dim])) b1 = tf.Variable(tf.constant(0.1, shape=[h1_dim])) h1 = tf.nn.relu(tf.matmul(self.x, W1) + b1) W2 = tf.Variable(tf.random_normal([h1_dim, output_dim])) b2 = tf.Variable(tf.constant(0.1, shape=[output_dim])) self.q = tf.nn.relu(tf.matmul(h1, W2) + b2) loss = tf.square(self.y - self.q) self.train_op = tf.train.AdagradOptimizer(0.01).minimize(loss) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def build_model(self): self.x = tf.placeholder(tf.float32, [self.reader.vocab_size], name="input") self.x_idx = tf.placeholder(tf.int32, [None], name="x_idx") self.build_encoder() self.build_generator() # Kullback Leibler divergence self.e_loss = -0.5 * tf.reduce_sum(1 + self.log_sigma_sq - tf.square(self.mu) - tf.exp(self.log_sigma_sq)) # Log likelihood self.g_loss = -tf.reduce_sum(tf.log(tf.gather(self.p_x_i, self.x_idx) + 1e-10)) self.loss = self.e_loss + self.g_loss self.encoder_var_list, self.generator_var_list = [], [] for var in tf.trainable_variables(): if "encoder" in var.name: self.encoder_var_list.append(var) elif "generator" in var.name: self.generator_var_list.append(var) # optimizer for alternative update self.optim_e = tf.train.AdamOptimizer(learning_rate=self.lr) \ .minimize(self.e_loss, global_step=self.step, var_list=self.encoder_var_list) self.optim_g = tf.train.AdamOptimizer(learning_rate=self.lr) \ .minimize(self.g_loss, global_step=self.step, var_list=self.generator_var_list) # optimizer for one shot update self.optim = tf.train.AdamOptimizer(learning_rate=self.lr) \ .minimize(self.loss, global_step=self.step) _ = tf.scalar_summary("encoder loss", self.e_loss) _ = tf.scalar_summary("generator loss", self.g_loss) _ = tf.scalar_summary("total loss", self.loss)
def calculate_mseloss(self, predictions, labels, **unused_params): with tf.name_scope("loss_mse"): float_labels = tf.cast(labels, tf.float32) mse_loss = tf.square(predictions-float_labels) return tf.reduce_mean(tf.reduce_sum(mse_loss, 1))
def calculate_loss(self, predictions, labels, **unused_params): with tf.name_scope("loss_xent"): epsilon = 10e-6 if FLAGS.label_smoothing: float_labels = smoothing(labels) else: float_labels = tf.cast(labels, tf.float32) mse_loss = tf.square(float_labels - predictions) return tf.reduce_mean(tf.reduce_sum(mse_loss, 1))
def calculate_loss(self, predictions, support_predictions, labels, **unused_params): """ support_predictions batch_size x num_models x num_classes predictions = tf.reduce_mean(support_predictions, axis=1) """ model_count = tf.shape(support_predictions)[1] vocab_size = tf.shape(support_predictions)[2] mean_predictions = tf.reduce_mean(support_predictions, axis=1, keep_dims=True) support_labels = tf.tile(tf.expand_dims(tf.cast(labels, dtype=tf.float32), axis=1), multiples=[1,model_count,1]) support_means = tf.stop_gradient(tf.tile(mean_predictions, multiples=[1,model_count,1])) support_predictions = tf.reshape(support_predictions, shape=[-1,model_count*vocab_size]) support_labels = tf.reshape(support_labels, shape=[-1,model_count*vocab_size]) support_means = tf.reshape(support_means, shape=[-1,model_count*vocab_size]) ce_loss_fn = CrossEntropyLoss() # The cross entropy between predictions and ground truth cross_entropy_loss = ce_loss_fn.calculate_loss(support_predictions, support_labels, **unused_params) mse_loss_fn = MeanSquareErrorLoss() # The square error between predictions and mean predictions divergence = mse_loss_fn.calculate_loss(support_predictions, support_means, **unused_params) loss = cross_entropy_loss * (1.0 - FLAGS.support_loss_percent) - divergence * FLAGS.support_loss_percent return loss
def layer_normalize(self, input_raw, epsilon=1e-8): feature_dim = len(input_raw.get_shape()) - 1 mean_input = tf.reduce_mean(input_raw, axis=feature_dim, keep_dims=True) std_input = tf.sqrt(tf.reduce_mean(tf.square(input_raw-mean_input), axis=feature_dim, keep_dims=True)) std_input = tf.maximum(std_input, epsilon) output = (input_raw - mean_input) / std_input return output
def build_graph(all_readers, input_reader, input_data_pattern, all_eval_data_patterns, batch_size=256): original_video_id, original_input, unused_labels_batch, unused_num_frames = ( get_input_evaluation_tensors( input_reader, input_data_pattern, batch_size=batch_size)) video_id_notequal_tensors = [] model_input_tensor = None input_distance_tensors = [] for reader, data_pattern in zip(all_readers, all_eval_data_patterns): video_id, model_input_raw, labels_batch, unused_num_frames = ( get_input_evaluation_tensors( reader, data_pattern, batch_size=batch_size)) video_id_notequal_tensors.append(tf.reduce_sum(tf.cast(tf.not_equal(original_video_id, video_id), dtype=tf.float32))) if model_input_tensor is None: model_input_tensor = model_input_raw input_distance_tensors.append(tf.reduce_mean(tf.reduce_sum(tf.square(model_input_tensor - model_input_raw), axis=1))) video_id_mismatch_tensor = tf.stack(video_id_notequal_tensors) input_distance_tensor = tf.stack(input_distance_tensors) actual_batch_size = tf.shape(original_video_id)[0] tf.add_to_collection("video_id_mismatch", video_id_mismatch_tensor) tf.add_to_collection("input_distance", input_distance_tensor) tf.add_to_collection("actual_batch_size", actual_batch_size)
def build_graph(all_readers, input_reader, input_data_pattern, all_eval_data_patterns, batch_size=256): original_video_id, original_input, unused_labels_batch, unused_num_frames = ( get_input_evaluation_tensors( input_reader, input_data_pattern, batch_size=batch_size)) video_id_equal_tensors = [] model_input_tensor = None input_distance_tensors = [] for reader, data_pattern in zip(all_readers, all_eval_data_patterns): video_id, model_input_raw, labels_batch, unused_num_frames = ( get_input_evaluation_tensors( reader, data_pattern, batch_size=batch_size)) video_id_equal_tensors.append(tf.reduce_sum(tf.cast(tf.not_equal(original_video_id, video_id), dtype=tf.float32))) if model_input_tensor is None: model_input_tensor = model_input_raw input_distance_tensors.append(tf.reduce_mean(tf.reduce_sum(tf.square(model_input_tensor - model_input_raw), axis=1))) video_id_equal_tensor = tf.stack(video_id_equal_tensors) input_distance_tensor = tf.stack(input_distance_tensors) tf.add_to_collection("video_id_equal", video_id_equal_tensor) tf.add_to_collection("input_distance", input_distance_tensor)
def compute_kernel(x, y): x_size = tf.shape(x)[0] y_size = tf.shape(y)[0] dim = tf.shape(x)[1] tiled_x = tf.tile(tf.reshape(x, tf.stack([x_size, 1, dim])), tf.stack([1, y_size, 1])) tiled_y = tf.tile(tf.reshape(y, tf.stack([1, y_size, dim])), tf.stack([x_size, 1, 1])) return tf.exp(-tf.reduce_mean(tf.square(tiled_x - tiled_y), axis=2) / tf.cast(dim, tf.float32))
def testSquare(self): with self.test_session(): x = tf.square([2, 3]) self.assertAllEqual(x.eval(), [4, 9])
def kernel_pred(x_data, prediction_grid): A = tf.reshape(tf.reduce_sum(tf.square(x_data), 1), [-1, 1]) B = tf.reshape(tf.reduce_sum(tf.square(prediction_grid), 1), [-1, 1]) square_distance = tf.add(tf.subtract(A, tf.multiply(2., tf.matmul(x_data, tf.transpose(prediction_grid)))), tf.transpose(B)) return tf.exp(tf.multiply(gamma, tf.abs(square_distance)))
def layer_norm(x, axes=1, initial_bias_value=0.0, epsilon=1e-3, name="var"): """ Apply layer normalization to x Args: x: input variable. initial_bias_value: initial value for the LN bias. epsilon: small constant value to avoid division by zero. scope: scope or name for the LN op. Returns: LN(x) with same shape as x """ if not isinstance(axes, list): axes = [axes] scope = tf.get_variable_scope() with tf.variable_scope(scope): with tf.variable_scope(name): mean = tf.reduce_mean(x, axes, keep_dims=True) variance = tf.sqrt(tf.reduce_mean(tf.square(x - mean), axes, keep_dims=True)) with tf.device('/cpu:0'): gain = tf.get_variable('gain', x.get_shape().as_list()[1:], initializer=tf.constant_initializer(1.0)) bias = tf.get_variable('bias', x.get_shape().as_list()[1:], initializer=tf.constant_initializer(initial_bias_value)) return gain * (x - mean) / (variance + epsilon) + bias
def preproc(self, X): return np.concatenate([np.ones([X.shape[0], 1]), X, np.square(X)/2.0], axis=1)
def gauss_prob(mu, logstd, x): std = tf.exp(logstd) var = tf.square(std) gp = tf.exp(-(x - mu)/(2*var)) / ((2*np.pi)**.5 * std) return tf.reduce_prod(gp, [1])