我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.sqrt()。
def calculate_loss_mix2(self, predictions, predictions_class, predictions_encoder, labels, **unused_params): with tf.name_scope("loss_mix2"): float_labels = tf.cast(labels, tf.float32) float_encoders = float_labels for i in range(FLAGS.encoder_layers): var_i = np.loadtxt(FLAGS.autoencoder_dir+'autoencoder_layer%d.model' % i) weight_i = tf.constant(var_i[:-1,:],dtype=tf.float32) bias_i = tf.reshape(tf.constant(var_i[-1,:],dtype=tf.float32),[-1]) float_encoders = tf.nn.xw_plus_b(float_encoders,weight_i,bias_i) if i<FLAGS.encoder_layers-1: float_encoders = tf.nn.relu(float_encoders) else: hidden_mean = tf.reduce_mean(float_encoders,axis=1,keep_dims=True) hidden_std = tf.sqrt(tf.reduce_mean(tf.square(float_encoders-hidden_mean),axis=1,keep_dims=True)) float_encoders = (float_encoders-hidden_mean)/(hidden_std+1e-6) #float_encoders = tf.nn.sigmoid(float_encoders) cross_entropy_encoder = 0.1*self.calculate_mseloss(predictions_encoder,float_encoders) cross_entropy_loss = self.calculate_loss(predictions,labels) return cross_entropy_encoder+cross_entropy_loss, float_encoders #return cross_entropy_encoder, float_encoders
def ae(x): if nonlinearity_name == 'relu': f = tf.nn.relu elif nonlinearity_name == 'elu': f = tf.nn.elu elif nonlinearity_name == 'gelu': # def gelu(x): # return tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.) # f = gelu def gelu_fast(_x): return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3)))) f = gelu_fast elif nonlinearity_name == 'silu': def silu(_x): return _x * tf.sigmoid(_x) f = silu # elif nonlinearity_name == 'soi': # def soi_map(x): # u = tf.random_uniform(tf.shape(x)) # mask = tf.to_float(tf.less(u, (1 + tf.erf(x / tf.sqrt(2.))) / 2.)) # return tf.cond(is_training, lambda: tf.mul(mask, x), # lambda: tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.)) # f = soi_map else: raise NameError("Need 'relu', 'elu', 'gelu', or 'silu' for nonlinearity_name") h1 = f(tf.matmul(x, W['1']) + b['1']) h2 = f(tf.matmul(h1, W['2']) + b['2']) h3 = f(tf.matmul(h2, W['3']) + b['3']) h4 = f(tf.matmul(h3, W['4']) + b['4']) h5 = f(tf.matmul(h4, W['5']) + b['5']) h6 = f(tf.matmul(h5, W['6']) + b['6']) h7 = f(tf.matmul(h6, W['7']) + b['7']) return tf.matmul(h7, W['8']) + b['8']
def build_encoder(self): """Inference Network. q(h|X)""" with tf.variable_scope("encoder"): q_cell = tf.nn.rnn_cell.LSTMCell(self.embed_dim, self.vocab_size) a_cell = tf.nn.rnn_cell.LSTMCell(self.embed_dim, self.vocab_size) l1 = tf.nn.relu(tf.nn.rnn_cell.linear(tf.expand_dims(self.x, 0), self.embed_dim, bias=True, scope="l1")) l2 = tf.nn.relu(tf.nn.rnn_cell.linear(l1, self.embed_dim, bias=True, scope="l2")) self.mu = tf.nn.rnn_cell.linear(l2, self.h_dim, bias=True, scope="mu") self.log_sigma_sq = tf.nn.rnn_cell.linear(l2, self.h_dim, bias=True, scope="log_sigma_sq") eps = tf.random_normal((1, self.h_dim), 0, 1, dtype=tf.float32) sigma = tf.sqrt(tf.exp(self.log_sigma_sq)) _ = tf.histogram_summary("mu", self.mu) _ = tf.histogram_summary("sigma", sigma) self.h = self.mu + sigma * eps
def build_encoder(self): """Inference Network. q(h|X)""" with tf.variable_scope("encoder"): self.l1_lin = linear(tf.expand_dims(self.x, 0), self.embed_dim, bias=True, scope="l1") self.l1 = tf.nn.relu(self.l1_lin) self.l2_lin = linear(self.l1, self.embed_dim, bias=True, scope="l2") self.l2 = tf.nn.relu(self.l2_lin) self.mu = linear(self.l2, self.h_dim, bias=True, scope="mu") self.log_sigma_sq = linear(self.l2, self.h_dim, bias=True, scope="log_sigma_sq") self.eps = tf.random_normal((1, self.h_dim), 0, 1, dtype=tf.float32) self.sigma = tf.sqrt(tf.exp(self.log_sigma_sq)) self.h = tf.add(self.mu, tf.mul(self.sigma, self.eps)) _ = tf.histogram_summary("mu", self.mu) _ = tf.histogram_summary("sigma", self.sigma) _ = tf.histogram_summary("h", self.h) _ = tf.histogram_summary("mu + sigma", self.mu + self.sigma)
def log_variable(variable, gradient=None): r''' We introduce a function for logging a tensor variable's current state. It logs scalar values for the mean, standard deviation, minimum and maximum. Furthermore it logs a histogram of its state and (if given) of an optimization gradient. ''' name = variable.name mean = tf.reduce_mean(variable) tf.summary.scalar(name='%s/mean' % name, tensor=mean) tf.summary.scalar(name='%s/sttdev' % name, tensor=tf.sqrt(tf.reduce_mean(tf.square(variable - mean)))) tf.summary.scalar(name='%s/max' % name, tensor=tf.reduce_max(variable)) tf.summary.scalar(name='%s/min' % name, tensor=tf.reduce_min(variable)) tf.summary.histogram(name=name, values=variable) if gradient is not None: if isinstance(gradient, tf.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: tf.summary.histogram(name='%s/gradients' % name, values=grad_values)
def batchnorm(x, name, phase, updates, gamma=0.96): k = x.get_shape()[1] runningmean = tf.get_variable(name+"/mean", shape=[1, k], initializer=tf.constant_initializer(0.0), trainable=False) runningvar = tf.get_variable(name+"/var", shape=[1, k], initializer=tf.constant_initializer(1e-4), trainable=False) testy = (x - runningmean) / tf.sqrt(runningvar) mean_ = mean(x, axis=0, keepdims=True) var_ = mean(tf.square(x), axis=0, keepdims=True) std = tf.sqrt(var_) trainy = (x - mean_) / std updates.extend([ tf.assign(runningmean, runningmean * gamma + mean_ * (1 - gamma)), tf.assign(runningvar, runningvar * gamma + var_ * (1 - gamma)) ]) y = switch(phase, trainy, testy) out = y * tf.get_variable(name+"/scaling", shape=[1, k], initializer=tf.constant_initializer(1.0), trainable=True)\ + tf.get_variable(name+"/translation", shape=[1,k], initializer=tf.constant_initializer(0.0), trainable=True) return out # ================================================================ # Mathematical utils # ================================================================
def __init__(self, embedding): self.sess = tf.Session() self.inputs = tf.placeholder(tf.float32, [None, embedding.shape[1]], name='inputs') self.test_vec = tf.placeholder(tf.float32, [1, embedding.shape[1]], name='test_vec') self.cos_distance = tf.matmul(self.inputs, tf.transpose(self.test_vec)) #----------------------------------------------------------------------- # Compute normalized embedding matrix #----------------------------------------------------------------------- row_sum = tf.reduce_sum(tf.square(self.inputs), axis=1, keep_dims=True) norm = tf.sqrt(row_sum) self.normalized = self.inputs / norm self.embedding = self.sess.run(self.normalized, feed_dict={self.inputs: embedding}) #---------------------------------------------------------------------------
def __call__(self, z): z1 = tf.reshape(tf.slice(z, [0, 0], [-1, 1]), [-1]) z2 = tf.reshape(tf.slice(z, [0, 1], [-1, 1]), [-1]) v1 = tf.sqrt((z1 - 5) * (z1 - 5) + z2 * z2) * 2 v2 = tf.sqrt((z1 + 5) * (z1 + 5) + z2 * z2) * 2 v3 = tf.sqrt((z1 - 2.5) * (z1 - 2.5) + (z2 - 2.5 * np.sqrt(3)) * (z2 - 2.5 * np.sqrt(3))) * 2 v4 = tf.sqrt((z1 + 2.5) * (z1 + 2.5) + (z2 + 2.5 * np.sqrt(3)) * (z2 + 2.5 * np.sqrt(3))) * 2 v5 = tf.sqrt((z1 - 2.5) * (z1 - 2.5) + (z2 + 2.5 * np.sqrt(3)) * (z2 + 2.5 * np.sqrt(3))) * 2 v6 = tf.sqrt((z1 + 2.5) * (z1 + 2.5) + (z2 - 2.5 * np.sqrt(3)) * (z2 - 2.5 * np.sqrt(3))) * 2 pdf1 = tf.exp(-0.5 * v1 * v1) / tf.sqrt(2 * np.pi * 0.25) pdf2 = tf.exp(-0.5 * v2 * v2) / tf.sqrt(2 * np.pi * 0.25) pdf3 = tf.exp(-0.5 * v3 * v3) / tf.sqrt(2 * np.pi * 0.25) pdf4 = tf.exp(-0.5 * v4 * v4) / tf.sqrt(2 * np.pi * 0.25) pdf5 = tf.exp(-0.5 * v5 * v5) / tf.sqrt(2 * np.pi * 0.25) pdf6 = tf.exp(-0.5 * v6 * v6) / tf.sqrt(2 * np.pi * 0.25) return -tf.log((pdf1 + pdf2 + pdf3 + pdf4 + pdf5 + pdf6) / 6)
def batchnormalize(X, eps=1e-8, g=None, b=None): if X.get_shape().ndims == 4: mean = tf.reduce_mean(X, [0,1,2]) std = tf.reduce_mean( tf.square(X-mean), [0,1,2] ) X = (X-mean) / tf.sqrt(std+eps) if g is not None and b is not None: g = tf.reshape(g, [1,1,1,-1]) b = tf.reshape(b, [1,1,1,-1]) X = X*g + b elif X.get_shape().ndims == 2: mean = tf.reduce_mean(X, 0) std = tf.reduce_mean(tf.square(X-mean), 0) X = (X-mean) / tf.sqrt(std+eps) if g is not None and b is not None: g = tf.reshape(g, [1,-1]) b = tf.reshape(b, [1,-1]) X = X*g + b else: raise NotImplementedError return X
def Grad_Penalty(real_data,fake_data,Discriminator,config): ''' Implemention from "Improved training of Wasserstein" Interpolation based estimation of the gradient of the discriminator. Used to penalize the derivative rather than explicitly constrain lipschitz. ''' batch_size=config.batch_size LAMBDA=config.lambda_W n_hidden=config.critic_hidden_size alpha = tf.random_uniform([batch_size,1],0.,1.) interpolates = alpha*real_data + ((1-alpha)*fake_data)#Could do more if not fixed batch_size disc_interpolates = Discriminator(interpolates,batch_size,n_hidden=n_hidden,config=config, reuse=True)[1]#logits gradients = tf.gradients(disc_interpolates,[interpolates])[0]#orig slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes-1)**2) grad_cost = LAMBDA*gradient_penalty return grad_cost,slopes
def l2_loss(tensor, weight=1.0, scope=None, normalize=False): """Define a L2Loss, useful for regularize, i.e. weight decay. Args: tensor: tensor to regularize. weight: an optional weight to modulate the loss. scope: Optional scope for op_scope. Returns: the L2 loss op. """ with tf.op_scope([tensor], scope, 'L2Loss'): weight = tf.convert_to_tensor(weight, dtype=tensor.dtype.base_dtype, name='loss_weight') if normalize: loss = tf.sqrt( (tf.sqrt( tf.nn.l2_loss(tensor)) / tf.to_float(tf.size(tensor))) , name='value') else: loss = tf.mul(weight, tf.nn.l2_loss(tensor), name='value') tf.add_to_collection(LOSSES_COLLECTION, loss) return loss
def recode_cost(self, inputs, variation, eps=1e-5, **kwargs): """ Cost for given input batch of samples, under current params. """ h = self.get_h_inputs(inputs) z_mu = tf.matmul(h, self.params['Mhz']) + self.params['bMhz'] z_sig = tf.matmul(h, self.params['Shz']) + self.params['bShz'] # KL divergence between latent space induced by encoder and ... lat_loss = -tf.reduce_sum(1 + z_sig - z_mu**2 - tf.exp(z_sig), 1) z = z_mu + tf.sqrt(tf.exp(z_sig)) * variation h = self.get_h_latents(z) x_mu = self.decoding(tf.matmul(h, self.params['Mhx']) + self.params['bMhx']) x_sig = self.decoding(tf.matmul(h, self.params['Shx']) + self.params['bShx']) # x_sig = tf.clip_by_value(x_mu * (1 - x_mu), .05, 1) # decoding likelihood term like_loss = tf.reduce_sum(tf.log(x_sig + eps) + (inputs - x_mu)**2 / x_sig, 1) # # Mean cross entropy between input and encode-decoded input. # like_loss = 2 * tf.reduce_sum(functions.cross_entropy(inputs, x_mu), 1) return .5 * tf.reduce_mean(like_loss + lat_loss)
def Minibatch_Discriminator(input, num_kernels=100, dim_per_kernel=5, init=False, name='MD'): num_inputs=df_dim*4 theta = tf.get_variable(name+"/theta",[num_inputs, num_kernels, dim_per_kernel], initializer=tf.random_normal_initializer(stddev=0.05)) log_weight_scale = tf.get_variable(name+"/lws",[num_kernels, dim_per_kernel], initializer=tf.constant_initializer(0.0)) W = tf.mul(theta, tf.expand_dims(tf.exp(log_weight_scale)/tf.sqrt(tf.reduce_sum(tf.square(theta),0)),0)) W = tf.reshape(W,[-1,num_kernels*dim_per_kernel]) x = input x=tf.reshape(x, [batchsize,num_inputs]) activation = tf.matmul(x, W) activation = tf.reshape(activation,[-1,num_kernels,dim_per_kernel]) abs_dif = tf.mul(tf.reduce_sum(tf.abs(tf.sub(tf.expand_dims(activation,3),tf.expand_dims(tf.transpose(activation,[1,2,0]),0))),2), 1-tf.expand_dims(tf.constant(np.eye(batchsize),dtype=np.float32),1)) f = tf.reduce_sum(tf.exp(-abs_dif),2)/tf.reduce_sum(tf.exp(-abs_dif)) print(f.get_shape()) print(input.get_shape()) return tf.concat(1,[x, f])
def inference(self): """ building blocks: encoder:6 layers.each layers has two sub-layers. the first is multi-head self-attention mechanism; the second is position-wise fully connected feed-forward network. for each sublayer. use LayerNorm(x+Sublayer(x)). all dimension=512. decoder:6 layers.each layers has three sub-layers. the second layer is performs multi-head attention over the ouput of the encoder stack. for each sublayer. use LayerNorm(x+Sublayer(x)). """ # 1.embedding for encoder input & decoder input # 1.1 position embedding for encoder input input_x_embeded = tf.nn.embedding_lookup(self.Embedding,self.input_x) #[None,sequence_length, embed_size] input_x_embeded=tf.multiply(input_x_embeded,tf.sqrt(tf.cast(self.d_model,dtype=tf.float32))) input_mask=tf.get_variable("input_mask",[self.sequence_length,1],initializer=self.initializer) input_x_embeded=tf.add(input_x_embeded,input_mask) #[None,sequence_length,embed_size].position embedding. # 2. encoder encoder_class=Encoder(self.d_model,self.d_k,self.d_v,self.sequence_length,self.h,self.batch_size,self.num_layer,input_x_embeded,input_x_embeded,dropout_keep_prob=self.dropout_keep_prob,use_residual_conn=self.use_residual_conn) Q_encoded,K_encoded = encoder_class.encoder_fn() #K_v_encoder Q_encoded=tf.reshape(Q_encoded,shape=(self.batch_size,-1)) #[batch_size,sequence_length*d_model] with tf.variable_scope("output"): logits = tf.matmul(Q_encoded, self.W_projection) + self.b_projection #logits shape:[batch_size*decoder_sent_length,self.num_classes] print("logits:",logits) return logits
def yolo_loss(labels, predictions, mask): masked_labels = tf.boolean_mask(labels, mask) masked_predictions = tf.boolean_mask(predictions, mask) # ious = tensor_iou(masked_predictions[..., 1:5], masked_labels[..., 1:5]) # ious = tf.expand_dims(ious, axis=-1) xy_loss = tf.reduce_sum((masked_labels[..., :2] - masked_predictions[..., 1:3]) ** 2) wh_loss = tf.reduce_sum((tf.sqrt(masked_predictions[..., 3:5]) - tf.sqrt(masked_labels[..., 2:4])) ** 2) # conf_loss = tf.reduce_sum((masked_predictions[..., 0] - ious) ** 2) conf_loss = tf.reduce_sum((1 - masked_predictions[..., 0]) ** 2) no_obj_loss = tf.reduce_sum((tf.boolean_mask(predictions, ~mask)[..., 0] ** 2)) class_loss = tf.reduce_sum((masked_predictions[..., 5:] - masked_labels[..., 4:]) ** 2) loss = 5 * (xy_loss + wh_loss) + conf_loss + no_obj_loss + class_loss return loss
def shrink_soft_threshold(r,rvar,theta): """ soft threshold function y=sign(x)*max(0,abs(x)-theta[0]*sqrt(rvar) )*scaling where scaling is theta[1] (default=1) in other words, if theta is len(1), then the standard """ if len(theta.get_shape())>0 and theta.get_shape() != (1,): lam = theta[0] * tf.sqrt(rvar) scale=theta[1] else: lam = theta * tf.sqrt(rvar) scale = None lam = tf.maximum(lam,0) arml = tf.abs(r) - lam xhat = tf.sign(r) * tf.maximum(arml,0) dxdr = tf.reduce_mean(tf.to_float(arml>0),0) if scale is not None: xhat = xhat*scale dxdr = dxdr*scale return (xhat,dxdr)
def shrink_bgest(r,rvar,theta): """Bernoulli-Gaussian MMSE estimator Perform MMSE estimation E[x|r] for x ~ BernoulliGaussian(lambda,xvar1) r|x ~ Normal(x,rvar) The parameters theta[0],theta[1] represent The variance of non-zero x[i] xvar1 = abs(theta[0]) The probability of nonzero x[i] lamba = 1/(exp(theta[1])+1) """ xvar1 = abs(theta[...,0]) loglam = theta[...,1] # log(1/lambda - 1) beta = 1/(1+rvar/xvar1) r2scale = r*r*beta/rvar rho = tf.exp(loglam - .5*r2scale ) * tf.sqrt(1 +xvar1/rvar) rho1 = rho+1 xhat = beta*r/rho1 dxdr = beta*((1+rho*(1+r2scale) ) / tf.square( rho1 )) dxdr = tf.reduce_mean(dxdr,0) return (xhat,dxdr)
def pwlin_grid(r_,rvar_,theta_,dtheta = .75): """piecewise linear with noise-adaptive grid spacing. returns xhat,dxdr where q = r/dtheta/sqrt(rvar) xhat = r * interp(q,theta) all but the last dimensions of theta must broadcast to r_ e.g. r.shape = (500,1000) is compatible with theta.shape=(500,1,7) """ ntheta = int(theta_.get_shape()[-1]) scale_ = dtheta / tf.sqrt(rvar_) ars_ = tf.clip_by_value( tf.expand_dims( tf.abs(r_)*scale_,-1),0.0, ntheta-1.0 ) centers_ = tf.constant( np.arange(ntheta),dtype=tf.float32 ) outer_distance_ = tf.maximum(0., 1.0-tf.abs(ars_ - centers_) ) # new dimension for distance to closest bin centers (or center) gain_ = tf.reduce_sum( theta_ * outer_distance_,axis=-1) # apply the gain (learnable) xhat_ = gain_ * r_ dxdr_ = tf.gradients(xhat_,r_)[0] return (xhat_,dxdr_)
def shrink_spline(r,rvar,theta): """ Spline-based shrinkage function """ scale = theta[0]*tf.sqrt(rvar) rs = tf.sign(r) ar = tf.abs(r/scale) ar2 = tf.square(ar) ar3 = ar*ar2 reg1 = tf.to_float(ar<1) reg2 = tf.to_float(ar<2)-reg1 ar_m2 = 2-ar ar_m2_p2 = tf.square(ar_m2) ar_m2_p3 = ar_m2*ar_m2_p2 beta3 = ( (2./3 - ar2 + .5*ar3)*reg1 + (1./6*(ar_m2_p3))*reg2 ) xhat = r*(theta[1] + theta[2]*beta3) return (xhat,auto_gradients(xhat,r))
def show_shrinkage(shrink_func,theta,**kwargs): tf.reset_default_graph() tf.set_random_seed(kwargs.get('seed',1) ) N = kwargs.get('N',500) L = kwargs.get('L',4) nsigmas = kwargs.get('sigmas',10) shape = (N,L) rvar = 1e-4 r = np.reshape( np.linspace(0,nsigmas,N*L)*math.sqrt(rvar),shape) r_ = tfcf(r) rvar_ = tfcf(np.ones(L)*rvar) xhat_,dxdr_ = shrink_func(r_,rvar_ ,tfcf(theta)) with tf.Session() as sess: sess.run( tf.global_variables_initializer() ) xhat = sess.run(xhat_) import matplotlib.pyplot as plt plt.figure(1) plt.plot(r.reshape(-1),r.reshape(-1),'y') plt.plot(r.reshape(-1),xhat.reshape(-1),'b') if kwargs.has_key('title'): plt.suptitle(kwargs['title']) plt.show()
def set_input_shape(self, input_shape): batch_size, rows, cols, input_channels = input_shape kernel_shape = tuple(self.kernel_shape) + (input_channels, self.output_channels) assert len(kernel_shape) == 4 assert all(isinstance(e, int) for e in kernel_shape), kernel_shape init = tf.random_normal(kernel_shape, dtype=tf.float32) init = init / tf.sqrt(1e-7 + tf.reduce_sum(tf.square(init), axis=(0, 1, 2))) self.kernels = tf.Variable(init) self.b = tf.Variable( np.zeros((self.output_channels,)).astype('float32')) input_shape = list(input_shape) input_shape[0] = 1 dummy_batch = tf.zeros(input_shape) dummy_output = self.fprop(dummy_batch) output_shape = [int(e) for e in dummy_output.get_shape()] output_shape[0] = 1 self.output_shape = tuple(output_shape)
def sub_sampling(data, word_counter, word_dict, sampling_rate): total_words = sum([len(sentence) for sentence in data]) prob_dict = dict() for word, count in word_counter: f = count / total_words p = max(0, 1 - math.sqrt(sampling_rate / f)) prob_dict[word_dict[word]] = p new_data = list() for sentence in data: s = list() for word in sentence: prob = prob_dict[word] if random.random() > prob: s.append(word) new_data.append(s) return new_data
def negative_l2_distance(x1, x2, axis=1): """ Negative L2 Distance. .. math:: L = - \\sqrt{\\sum_i (x1_i - x2_i)^2} Args: x1: First term. x2: Second term. axis: Reduction Indices. Returns: Similarity Value. """ distance = tf.sqrt(tf.reduce_sum(tf.square(x1 - x2), axis=axis)) return - distance
def adam_updates(params, cost_or_grads, lr=0.001, mom1=0.9, mom2=0.999): ''' Adam optimizer ''' updates = [] if type(cost_or_grads) is not list: grads = tf.gradients(cost_or_grads, params) else: grads = cost_or_grads t = tf.Variable(1., 'adam_t') for p, g in zip(params, grads): mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg') if mom1 > 0: v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v') v_t = mom1 * v + (1. - mom1) * g v_hat = v_t / (1. - tf.pow(mom1, t)) updates.append(v.assign(v_t)) else: v_hat = g mg_t = mom2 * mg + (1. - mom2) * tf.square(g) mg_hat = mg_t / (1. - tf.pow(mom2, t)) g_t = v_hat / tf.sqrt(mg_hat + 1e-8) p_t = p - lr * g_t updates.append(mg.assign(mg_t)) updates.append(p.assign(p_t)) updates.append(t.assign_add(1)) return tf.group(*updates)
def add_param(self, spec, shape, name, **kwargs): param = self.add_param_plain(spec, shape, name, **kwargs) if name is not None and name.startswith("W") and self.weight_normalization: # Hacky: check if the parameter is a weight matrix. If so, apply weight normalization if len(param.get_shape()) == 2: v = param g = self.add_param_plain(tf.ones_initializer, (shape[1],), name=name + "_wn/g") param = v * (tf.reshape(g, (1, -1)) / tf.sqrt(tf.reduce_sum(tf.square(v), 0, keep_dims=True))) elif len(param.get_shape()) == 4: v = param g = self.add_param_plain(tf.ones_initializer, (shape[3],), name=name + "_wn/g") param = v * (tf.reshape(g, (1, 1, 1, -1)) / tf.sqrt(tf.reduce_sum(tf.square(v), [0, 1, 2], keep_dims=True))) else: raise NotImplementedError return param
def apply_ln(layer): def _normalize(x, prefix): EPS = 1e-5 dim = x.get_shape()[-1].value bias_name = prefix + "_ln/bias" scale_name = prefix + "_ln/scale" if bias_name not in layer.norm_params: layer.norm_params[bias_name] = layer.add_param( tf.zeros_initializer, (dim,), name=bias_name, regularizable=False) if scale_name not in layer.norm_params: layer.norm_params[scale_name] = layer.add_param( tf.ones_initializer, (dim,), name=scale_name) bias = layer.norm_params[bias_name] scale = layer.norm_params[scale_name] mean, var = tf.nn.moments(x, axes=[1], keep_dims=True) x_normed = (x - mean) / tf.sqrt(var + EPS) return x_normed * scale + bias return _normalize
def __init__(self, name, shape, initial_stdev = 2.0, initial_prec_a = 5.0, initial_prec_b = 1.0, a0 = 1.0, b0 = 1.0, fixed_prec = False, mean_init_std = None): if mean_init_std is None: mean_init_std = 1.0 / np.sqrt(shape[-1]) with tf.variable_scope(name) as scope: #self.mean = tf.get_variable(name="mean", shape=shape, initializer=tf.contrib.layers.xavier_initializer(), dtype = tf.float32) #self.var = tf.Variable(initial_var * np.ones(shape), name = name + ".var", dtype = tf.float32) self.mean = tf.Variable(tf.random_uniform(shape, minval=-mean_init_std, maxval=mean_init_std)) self.logvar = tf.Variable(np.log(initial_stdev**2.0) * np.ones(shape), name = "logvar", dtype = tf.float32) if fixed_prec: self.prec_a = tf.constant(initial_prec_a * np.ones(shape[-1]), name = "prec_a", dtype = tf.float32) self.prec_b = tf.constant(initial_prec_b * np.ones(shape[-1]), name = "prec_b", dtype = tf.float32) else: self.prec_a = tf.Variable(initial_prec_a * np.ones(shape[-1]), name = "prec_a", dtype = tf.float32) self.prec_b = tf.Variable(initial_prec_b * np.ones(shape[-1]), name = "prec_b", dtype = tf.float32) self.prec = tf.div(self.prec_a, self.prec_b, name = "prec") self.var = tf.exp(self.logvar, name = "var") self.a0 = a0 self.b0 = b0 self.shape = shape
def __init__(self, name, shape, initial_stdev = 2.0, initial_prec = 5.0, a0 = 1.0, b0 = 1.0): mean_std = 1.0 / np.sqrt(shape[-1]) with tf.variable_scope(name) as scope: self.mean = tf.Variable(tf.random_uniform(shape, minval=-mean_std, maxval=mean_std)) self.logvar = tf.Variable(np.log(initial_stdev**2.0) * np.ones(shape), name = "logvar", dtype = tf.float32) self.prec = np.repeat(initial_prec, shape[-1]) self.prec_ph= tf.placeholder(shape=shape[-1], name="prec", dtype = tf.float32) self.var = tf.exp(self.logvar, name = "var") self.a0 = a0 self.b0 = b0 self.shape = shape # def prec_div(self): # return - tf.reduce_sum(gammaPrior(self.prec_a, self.prec_b, self.a0, self.b0)) ## outputs E_q[ log N( x | 0, prec^-1) ] + Entropy(q(x)) ## where x is the normally distributed variable
def gradient_penalty(self): config = self.config gan = self.gan gradient_penalty = config.gradient_penalty if has_attr(gan.inputs, 'gradient_penalty_label'): x = gan.inputs.gradient_penalty_label else: x = gan.inputs.x generator = self.generator or gan.generator g = generator.sample discriminator = self.discriminator or gan.discriminator shape = [1 for t in g.get_shape()] shape[0] = gan.batch_size() uniform_noise = tf.random_uniform(shape=shape,minval=0.,maxval=1.) print("[gradient penalty] applying x:", x, "g:", g, "noise:", uniform_noise) interpolates = x + uniform_noise * (g - x) reused_d = discriminator.reuse(interpolates) gradients = tf.gradients(reused_d, [interpolates])[0] penalty = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=1)) penalty = tf.reduce_mean(tf.square(penalty - 1.)) return float(gradient_penalty) * penalty
def setup_param_noise(self, normalized_obs0): assert self.param_noise is not None # Configure perturbed actor. param_noise_actor = copy(self.actor) param_noise_actor.name = 'param_noise_actor' self.perturbed_actor_tf = param_noise_actor(normalized_obs0) logger.info('setting up param noise') self.perturb_policy_ops = get_perturbed_actor_updates(self.actor, param_noise_actor, self.param_noise_stddev) # Configure separate copy for stddev adoption. adaptive_param_noise_actor = copy(self.actor) adaptive_param_noise_actor.name = 'adaptive_param_noise_actor' adaptive_actor_tf = adaptive_param_noise_actor(normalized_obs0) self.perturb_adaptive_policy_ops = get_perturbed_actor_updates(self.actor, adaptive_param_noise_actor, self.param_noise_stddev) self.adaptive_policy_distance = tf.sqrt(tf.reduce_mean(tf.square(self.actor_tf - adaptive_actor_tf)))
def weight_variable(shape, name, var_type='normal', const=1): """Initializes a tensorflow weight variable. Args: shape: An array representing shape of the weight variable name: A string name given to the variable. var_type: can be either 'normal', for weights following a Gaussian distribution around 0, or 'xavier', for the Xavier method const: Numeric value that controls the range of the weights within the Xavier method. Returns: Tensor variable for the weights """ if var_type == 'xavier': """ Xavier initialization of network weights. Taken from: https://gist.github.com/blackecho/3a6e4d512d3aa8aa6cf9 https://stackoverflow.com/questions/33640581/how-to-do-xavier-initialization-on-tensorflow """ assert len(shape) == 2 low = -const * np.sqrt(6.0 / (shape[0] + shape[1])) high = const * np.sqrt(6.0 / (shape[0] + shape[1])) initial = tf.random_uniform((shape[0], shape[1]), minval=low, maxval=high) else: initial = tf.truncated_normal(shape, stddev=1.0 / math.sqrt(float(shape[0])), dtype=tf.float32) return tf.Variable(initial, name=name)
def testGetBackwardOpsChain(self): # a -> b -> c a = tf.placeholder(tf.float32) b = tf.sqrt(a) c = tf.square(b) for n in range(4): for seed_tensors in permutations([a, b, c], n): if c in seed_tensors: truth = [a.op, b.op, c.op] elif b in seed_tensors: truth = [a.op, b.op] elif a in seed_tensors: truth = [a.op] else: truth = [] self.assertEqual(get_backward_ops(seed_tensors), truth) self.assertEqual(get_backward_ops([c], treat_as_inputs=[b]), [c.op]) self.assertEqual( get_backward_ops([b, c], treat_as_inputs=[b]), [c.op]) self.assertEqual( get_backward_ops([a, c], treat_as_inputs=[b]), [a.op, c.op])
def tune(self, acceptance_rate, fresh_start): def adapt_stepsize(): new_step = tf.assign(self.step, (1 - fresh_start) * self.step + 1) rate1 = tf.div(1.0, new_step + self.t0) new_h_bar = tf.assign( self.h_bar, (1 - fresh_start) * (1 - rate1) * self.h_bar + rate1 * (self.delta - acceptance_rate)) log_epsilon = self.mu - tf.sqrt(new_step) / self.gamma * new_h_bar rate = tf.pow(new_step, -self.kappa) new_log_epsilon_bar = tf.assign( self.log_epsilon_bar, rate * log_epsilon + (1 - fresh_start) * (1 - rate) * self.log_epsilon_bar) with tf.control_dependencies([new_log_epsilon_bar]): new_log_epsilon = tf.identity(log_epsilon) return tf.exp(new_log_epsilon) c = tf.cond(self.adapt_step_size, adapt_stepsize, lambda: tf.exp(self.log_epsilon_bar)) return c
def variable_summaries(var, name, collections=None): """Attach a lot of summaries to a Tensor (for TensorBoard visualization). Args: - var: Tensor for variable from which we want to log. - name: Variable name. - collections: List of collections to save the summary to. """ with tf.name_scope(name): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean, collections) num_params = tf.reduce_prod(tf.shape(var)) tf.summary.scalar('num_params', num_params, collections) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev, collections) tf.summary.scalar('max', tf.reduce_max(var), collections) tf.summary.scalar('min', tf.reduce_min(var), collections) tf.summary.histogram('histogram', var, collections) tf.summary.scalar('sparsity', tf.nn.zero_fraction(var), collections)
def normal_map(tensor, shape): """ Generate a tangent-space normal map. :param Tensor tensor: :param list[int] shape: :return: Tensor """ height, width, channels = shape reference = value_map(tensor, shape, keep_dims=True) x = normalize(1 - convolve(ConvKernel.sobel_x, reference, [height, width, 1])) y = normalize(convolve(ConvKernel.sobel_y, reference, [height, width, 1])) z = 1 - tf.abs(normalize(tf.sqrt(x * x + y * y)) * 2 - 1) * .5 + .5 return tf.stack([x[:, :, 0], y[:, :, 0], z[:, :, 0]], 2)
def euclidean_distance(self): x = tf.argmax(tf.reduce_max(self.smoothed_sigm_network, 1), 1) y = tf.argmax(tf.reduce_max(self.smoothed_sigm_network, 2), 1) x = tf.cast(x, tf.float32) y = tf.cast(y, tf.float32) dy = tf.squeeze(self.desired_points[:, 0, :]) dx = tf.squeeze(self.desired_points[:, 1, :]) sx = tf.squared_difference(x, dx) sy = tf.squared_difference(y, dy) l2_dist = tf.sqrt(sx + sy) return l2_dist
def _decoder(self, z): """Define p(x|z) network""" if z is None: mean = None stddev = None logits = None class_predictions = None input_sample = self.epsilon else: z = tf.reshape(z, [-1, self.flags['hidden_size'] * 2]) mean, stddev = tf.split(1, 2, z) # Compute latent variables (z) by calculating mean, stddev stddev = tf.sqrt(tf.exp(stddev)) mlp = Layers(mean) mlp.fc(self.flags['num_classes']) class_predictions = mlp.get_output() logits = tf.nn.softmax(class_predictions) input_sample = mean + self.epsilon * stddev decoder = Layers(tf.expand_dims(tf.expand_dims(input_sample, 1), 1)) decoder.deconv2d(3, 128, padding='VALID') decoder.deconv2d(3, 64, padding='VALID', stride=2) decoder.deconv2d(3, 64, stride=2) decoder.deconv2d(5, 32, stride=2) decoder.deconv2d(7, 1, activation_fn=tf.nn.tanh, s_value=None) return decoder.get_output(), mean, stddev, class_predictions, logits
def _decoder(self, z): """ Define p(x|z) network""" if z is None: mean = None stddev = None input_sample = self.epsilon else: z = tf.reshape(z, [-1, self.flags['hidden_size'] * 2]) print(z.get_shape()) mean, stddev = tf.split(1, 2, z) stddev = tf.sqrt(tf.exp(stddev)) input_sample = mean + self.epsilon * stddev decoder = Layers(tf.expand_dims(tf.expand_dims(input_sample, 1), 1)) decoder.deconv2d(3, 128, padding='VALID') decoder.deconv2d(3, 128, padding='VALID', stride=2) decoder.deconv2d(3, 64, stride=2) decoder.deconv2d(3, 64, stride=2) decoder.deconv2d(5, 1, activation_fn=tf.nn.tanh, s_value=None) return decoder.get_output(), mean, stddev
def ln(tensor, scope=None, epsilon=1e-5): """ Layer normalizes a 2D tensor along its second axis """ assert(len(tensor.get_shape()) == 2) m, v = tf.nn.moments(tensor, [1], keep_dims=True) if not isinstance(scope, str): scope = '' with tf.variable_scope(scope + 'layer_norm'): scale = tf.get_variable('scale', shape=[tensor.get_shape()[1]], initializer=tf.constant_initializer(1)) shift = tf.get_variable('shift', shape=[tensor.get_shape()[1]], initializer=tf.constant_initializer(0)) LN_initial = (tensor - m) / tf.sqrt(v + epsilon) return LN_initial * scale + shift
def build_model(user_indices, item_indices, rank, ratings, user_cnt, item_cnt, lr, lamb, mu, init_value): W_user = tf.Variable(tf.truncated_normal([user_cnt, rank], stddev=init_value/math.sqrt(float(rank)), mean=0), name = 'user_embedding', dtype=tf.float32) W_item = tf.Variable(tf.truncated_normal([item_cnt, rank], stddev=init_value/math.sqrt(float(rank)), mean=0), name = 'item_embedding', dtype=tf.float32) W_user_bias = tf.concat([W_user, tf.ones((user_cnt,1), dtype=tf.float32)], 1, name='user_embedding_bias') W_item_bias = tf.concat([tf.ones((item_cnt,1), dtype=tf.float32), W_item], 1, name='item_embedding_bias') user_feature = tf.nn.embedding_lookup(W_user_bias, user_indices, name = 'user_feature') item_feature = tf.nn.embedding_lookup(W_item_bias, item_indices, name = 'item_feature') preds = tf.add(tf.reduce_sum( tf.multiply(user_feature , item_feature) , 1), mu) square_error = tf.sqrt(tf.reduce_mean( tf.squared_difference(preds, ratings))) loss = square_error + lamb*(tf.reduce_mean(tf.nn.l2_loss(W_user)) + tf.reduce_mean(tf.nn.l2_loss(W_item))) tf.summary.scalar('square_error', square_error) tf.summary.scalar('loss', loss) merged_summary = tf.summary.merge_all() #tf.global_variables_initializer() train_step = tf.train.GradientDescentOptimizer(lr).minimize(loss) # tf.train.AdadeltaOptimizer(learning_rate=lr).minimize(loss) # return train_step, square_error, loss, merged_summary
def gaussian_stochastic(self, input_tensor, num_maps, scope): """ :param inputs_list: list of Tensors to be added and input into the block :return: random variable single draw, mean, standard deviation, and intermediate representation """ with tf.variable_scope(scope): input_tensor = tf.expand_dims(tf.expand_dims(input_tensor, 1), 1) if len(input_tensor.get_shape()) != 4 \ else input_tensor intermediate = slim.conv2d(input_tensor, self._hidden_size, [1, 1], weights_initializer=self._initializer, scope='conv1') mean = slim.conv2d(intermediate, num_maps, [1, 1], weights_initializer=self._initializer, activation_fn=None, scope='mean') sigma2 = tf.nn.softplus( slim.conv2d(intermediate, num_maps, [1, 1], weights_initializer=self._initializer, activation_fn=None, scope='sigma2')) rv_single_draw = mean + tf.sqrt(sigma2) * tf.random_normal(tf.shape(mean)) self.split_labeled_unlabeled(mean, '{}_mu'.format(scope)) self.split_labeled_unlabeled(sigma2, '{}_sigma2'.format(scope)) self.split_labeled_unlabeled(rv_single_draw, '{}_sample'.format(scope)) return rv_single_draw
def linear_mapping_stupid(inputs, out_dim, in_dim=None, dropout=1.0, var_scope_name="linear_mapping"): with tf.variable_scope(var_scope_name): print('name', tf.get_variable_scope().name) input_shape_tensor = tf.shape(inputs) # dynamic shape, no None input_shape = inputs.get_shape().as_list() # static shape. may has None print('input_shape', input_shape) assert len(input_shape) == 3 inputs = tf.reshape(inputs, [-1, input_shape_tensor[-1]]) linear_mapping_w = tf.get_variable("linear_mapping_w", [input_shape[-1], out_dim], initializer=tf.random_normal_initializer(mean=0, stddev=tf.sqrt(dropout*1.0/input_shape[-1]))) linear_mapping_b = tf.get_variable("linear_mapping_b", [out_dim], initializer=tf.zeros_initializer()) output = tf.matmul(inputs, linear_mapping_w) + linear_mapping_b print('xxxxx_params', input_shape, out_dim) #output = tf.reshape(output, [input_shape[0], -1, out_dim]) output = tf.reshape(output, [input_shape_tensor[0], -1, out_dim]) return output
def linear_mapping_weightnorm(inputs, out_dim, in_dim=None, dropout=1.0, var_scope_name="linear_mapping"): with tf.variable_scope(var_scope_name): input_shape = inputs.get_shape().as_list() # static shape. may has None input_shape_tensor = tf.shape(inputs) # use weight normalization (Salimans & Kingma, 2016) w = g* v/2-norm(v) V = tf.get_variable('V', shape=[int(input_shape[-1]), out_dim], dtype=tf.float32, initializer=tf.random_normal_initializer(mean=0, stddev=tf.sqrt(dropout*1.0/int(input_shape[-1]))), trainable=True) V_norm = tf.norm(V.initialized_value(), axis=0) # V shape is M*N, V_norm shape is N g = tf.get_variable('g', dtype=tf.float32, initializer=V_norm, trainable=True) b = tf.get_variable('b', shape=[out_dim], dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=True) # weightnorm bias is init zero assert len(input_shape) == 3 inputs = tf.reshape(inputs, [-1, input_shape[-1]]) inputs = tf.matmul(inputs, V) inputs = tf.reshape(inputs, [input_shape_tensor[0], -1, out_dim]) #inputs = tf.matmul(inputs, V) # x*v scaler = tf.div(g, tf.norm(V, axis=0)) # g/2-norm(v) inputs = tf.reshape(scaler,[1, out_dim])*inputs + tf.reshape(b,[1, out_dim]) # x*v g/2-norm(v) + b return inputs
def make_attention(target_embed, encoder_output, decoder_hidden, layer_idx): with tf.variable_scope("attention_layer_" + str(layer_idx)): embed_size = target_embed.get_shape().as_list()[-1] #k dec_hidden_proj = linear_mapping_weightnorm(decoder_hidden, embed_size, var_scope_name="linear_mapping_att_query") # M*N1*k1 --> M*N1*k dec_rep = (dec_hidden_proj + target_embed) * tf.sqrt(0.5) encoder_output_a = encoder_output.outputs encoder_output_c = encoder_output.attention_values # M*N2*K att_score = tf.matmul(dec_rep, encoder_output_a, transpose_b=True) #M*N1*K ** M*N2*K --> M*N1*N2 att_score = tf.nn.softmax(att_score) length = tf.cast(tf.shape(encoder_output_c), tf.float32) att_out = tf.matmul(att_score, encoder_output_c) * length[1] * tf.sqrt(1.0/length[1]) #M*N1*N2 ** M*N2*K --> M*N1*k att_out = linear_mapping_weightnorm(att_out, decoder_hidden.get_shape().as_list()[-1], var_scope_name="linear_mapping_att_out") return att_out
def __init__(self, input_dim, hidden_dim, epoch=250, learning_rate = 0.001): self.epoch = epoch self.learning_rate = learning_rate x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim]) with tf.name_scope("encode"): weights = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32), name="weights") biases = tf.Variable(tf.zeros([hidden_dim]), name="biases") encoded = tf.nn.tanh(tf.matmul(x, weights) + biases) with tf.name_scope("decode"): weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32), name="weights") biases = tf.Variable(tf.zeros([input_dim]), name="biases") decoded = tf.matmul(encoded, weights) + biases self.x = x self.encoded = encoded self.decoded = decoded self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(self.x, self.decoded)))) self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss) self.saver = tf.train.Saver()
def _norm(input, is_train, reuse=True, norm=None): assert norm in ['instance', 'batch', None] if norm == 'instance': with tf.variable_scope('instance_norm', reuse=reuse): eps = 1e-5 mean, sigma = tf.nn.moments(input, [1, 2], keep_dims=True) normalized = (input - mean) / (tf.sqrt(sigma) + eps) out = normalized # Apply momentum (not mendatory) #c = input.get_shape()[-1] #shift = tf.get_variable('shift', shape=[c], # initializer=tf.zeros_initializer()) #scale = tf.get_variable('scale', shape=[c], # initializer=tf.random_normal_initializer(1.0, 0.02)) #out = scale * normalized + shift elif norm == 'batch': with tf.variable_scope('batch_norm', reuse=reuse): out = tf.contrib.layers.batch_norm(input, decay=0.99, center=True, scale=True, is_training=is_train, updates_collections=None) else: out = input return out