我们从Python开源项目中,提取了以下14个代码示例,用于说明如何使用chainer.functions.sqrt()。
def __call__(self, loc, val, y, train=True): bs = val.data.shape[0] pred, kld0, kld1, kld2 = self.forward(loc, val, y, train=train) # Compute MSE loss mse = F.mean_squared_error(pred, y) rmse = F.sqrt(mse) # Only used for reporting # Now compute the total KLD loss kldt = kld0 * self.lambda0 + kld1 * self.lambda1 + kld2 * self.lambda2 # Total loss is MSE plus regularization losses loss = mse + kldt * (1.0 / self.total_nobs) # Log the errors logs = {'loss': loss, 'rmse': rmse, 'kld0': kld0, 'kld1': kld1, 'kld2': kld2, 'kldt': kldt, 'bias': F.sum(self.bias_mu.b)} reporter.report(logs, self) return loss
def __call__(self, loc, val, y, train=True): bs = val.data.shape[0] ret = self.forward(loc, val, y, train=train) pred, kld0, kld1, kldg, kldi, hypg, hypi = ret # Compute MSE loss mse = F.mean_squared_error(pred, y) rmse = F.sqrt(mse) # Only used for reporting # Now compute the total KLD loss kldt = kld0 * self.lambda0 + kld1 * self.lambda1 kldt += kldg + kldi + hypg + hypi # Total loss is MSE plus regularization losses loss = mse + kldt * (1.0 / self.total_nobs) # Log the errors logs = {'loss': loss, 'rmse': rmse, 'kld0': kld0, 'kld1': kld1, 'kldg': kldg, 'kldi': kldi, 'hypg': hypg, 'hypi': hypi, 'hypglv': F.sum(self.hyper_feat_lv_vec.b), 'hypilv': F.sum(self.hyper_feat_delta_lv.b), 'kldt': kldt, 'bias': F.sum(self.bias_mu.b)} reporter.report(logs, self) return loss
def instance_norm(self, x, gamma=None, beta=None): mean = F.mean(x, axis=-1) mean = F.mean(mean, axis=-1) mean = F.broadcast_to(mean[Ellipsis, None, None], x.shape) var = F.squared_difference(x, mean) std = F.sqrt(var + 1e-5) x_hat = (x - mean) / std if gamma is not None: gamma = F.broadcast_to(gamma[None, Ellipsis, None, None], x.shape) beta = F.broadcast_to(beta[None, Ellipsis, None, None], x.shape) return gamma * x_hat + beta else: return x_hat
def lifted_struct_loss(f_a, f_p, alpha=1.0): """Lifted struct loss function. Args: f_a (~chainer.Variable): Feature vectors as anchor examples. All examples must be different classes each other. f_p (~chainer.Variable): Positive examples corresponding to f_a. Each example must be the same class for each example in f_a. alpha (~float): The margin parameter. Returns: ~chainer.Variable: Loss value. See: `Deep Metric Learning via Lifted Structured Feature Embedding \ <http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/\ Song_Deep_Metric_Learning_CVPR_2016_paper.pdf>`_ """ assert f_a.shape == f_p.shape, 'f_a and f_p must have same shape.' n = 2 * f_a.shape[0] # use shape[0] due to len(Variable) returns its size f = F.vstack((f_a, f_p)) D_sq = squared_distance_matrix(f) pairs_p = np.arange(n).reshape(2, -1) # indexes of positive pairs row = [] col = [] for i, j in pairs_p.T: row.append([i] * (n - 2) + [j] * (n - 2)) col.append(np.tile(np.delete(np.arange(n), (i, j)), 2)) row = np.ravel(row) col = np.ravel(col) pairs_n = np.vstack((row, col)) distances_p = F.sqrt(D_sq[pairs_p[0], pairs_p[1]]) distances_n = F.sqrt(D_sq[pairs_n[0], pairs_n[1]]) distances_n = distances_n.reshape((n // 2, -1)) loss_ij = F.logsumexp(alpha - distances_n, axis=1) + distances_p return F.sum(F.relu(loss_ij) ** 2) / n
def __init__(self, n_features=None, n_dim=8, lossfun=F.mean_squared_error, lambda0=1, lambda1=1, lambda2=1, init_bias_mu=0.0, init_bias_lv=0.0, intx_term=True, total_nobs=1): self.n_dim = n_dim self.n_features = n_features self.lossfun = lossfun self.lambda0 = lambda0 self.lambda1 = lambda1 self.lambda2 = lambda2 self.intx_term = intx_term self.total_nobs = total_nobs # In contrast to the FM model, the slopes and latent vectors # will have means (mu) and log variances (lv) for each component. super(VFM, self).__init__(bias_mu=L.Bias(shape=(1,)), bias_lv=L.Bias(shape=(1,)), slop_mu=L.Bias(shape=(1, 1)), slop_lv=L.Bias(shape=(1, 1)), slop_delta_mu=L.EmbedID(n_features, 1, ignore_label=-1), slop_delta_lv=L.EmbedID(n_features, 1, ignore_label=-1), feat_mu_vec=L.Bias(shape=(1, 1, n_dim)), feat_lv_vec=L.Bias(shape=(1, 1, n_dim)), feat_delta_mu=L.EmbedID(n_features, n_dim, ignore_label=-1), feat_delta_lv=L.EmbedID(n_features, n_dim, ignore_label=-1)) # Xavier initialize weights c = np.sqrt(n_features * n_dim) * 1e3 d = np.sqrt(n_features) * 1e3 self.feat_delta_mu.W.data[...] = np.random.randn(n_features, n_dim) / c self.feat_delta_lv.W.data[...] = np.random.randn(n_features, n_dim) / c self.slop_delta_mu.W.data[...] = np.random.randn(n_features, 1) / d self.slop_delta_lv.W.data[...] = np.random.randn(n_features, 1) / d self.bias_mu.b.data[...] *= 0.0 self.bias_mu.b.data[...] += init_bias_mu self.bias_lv.b.data[...] *= 0.0 self.bias_lv.b.data[...] += init_bias_lv
def __init__(self, n_features=None, n_dim=8, lossfun=F.mean_squared_error, lambda0=5e-3, lambda1=5e-3, lambda2=5e-3, init_bias=0.0, intx_term=True, total_nobs=1): self.n_dim = n_dim self.n_features = n_features self.lossfun = lossfun self.lambda0 = lambda0 self.lambda1 = lambda1 self.lambda2 = lambda2 self.intx_term = intx_term self.total_nobs = total_nobs # These are all the learned weights corresponding # to the overall bias, slope per feature, and latent # interaction vector per feature super(FM, self).__init__(bias=L.Bias(shape=(1,)), slope=L.EmbedID(n_features, 1), latent=L.EmbedID(n_features, n_dim)) # Xavier initialize weights c = np.sqrt(n_features * n_dim) self.latent.W.data[...] = np.random.randn(n_features, n_dim) / c d = np.sqrt(n_features) self.slope.W.data[...] = np.random.randn(n_features, 1) / d self.bias.b.data[...] *= 0.0 self.bias.b.data[...] += init_bias
def average_loss(self, h, a, t): ## print F.reshape(t, (-1, 1)).data ## print (h-F.reshape(t, (-1, 1))).data self.loss = F.sum(abs(h - F.reshape(t, (-1,1)))) ## self.loss = F.sqrt(F.sum(F.square(h - F.reshape(t, (-1,1))))) self.loss /= self.n_patches if self.n_images > 1: h = F.split_axis(h, self.n_images, 0) a = F.split_axis(a, self.n_images, 0) else: h, a = [h], [a] self.y = h self.a = a
def update_core(self): xp = self.gen.xp self._iter += 1 opt_d = self.get_optimizer('dis') for i in range(self._dis_iter): d_fake = self.get_fake_image_batch() d_real = self.get_real_image_batch() y_fake = self.dis(Variable(d_fake), test=False) y_real = self.dis(Variable(d_real), test=False) w1 = F.average(y_fake-y_real) loss_dis = w1 if self._mode == 'gp': eta = np.random.rand() c = (d_real * eta + (1.0 - eta) * d_fake).astype('f') y = self.dis(Variable(c), test=False, retain_forward=True) g = xp.ones_like(y.data) grad_c = self.dis.differentiable_backward(Variable(g)) grad_c_l2 = F.sqrt(F.sum(grad_c**2, axis=(1, 2, 3))) loss_gp = loss_l2(grad_c_l2, 1.0) loss_dis += self._lambda_gp * loss_gp opt_d.zero_grads() loss_dis.backward() opt_d.update() if self._mode == 'clip': self.dis.clip() chainer.report({'loss': loss_dis,'loss_w1': w1}, self.dis) z_in = self.get_latent_code_batch() x_out = self.gen(Variable(z_in), test=False) opt_g = self.get_optimizer('gen') y_fake = self.dis(x_out, test=False) loss_gen = - F.average(y_fake) chainer.report({'loss': loss_gen}, self.gen) opt_g.zero_grads() loss_gen.backward() opt_g.update()
def update_core(self): xp = self.gen.xp self._iter += 1 opt_g = self.get_optimizer('gen') opt_d = self.get_optimizer('dis') data_z0 = self.get_latent_code_batch() x_fake0 = self.gen(Variable(data_z0)) data_z1 = self.get_latent_code_batch() x_fake1 = self.gen(Variable(data_z1)) data_x = self.get_real_image_batch() x_real = Variable(data_x) eta = np.random.rand() x_inter = Variable((data_x * eta + (1.0 - eta) * x_fake0.data).astype('f')) dis_x_fake0 = self.dis(x_fake0) dis_x_fake1 = self.dis(x_fake1) dis_x_real = self.dis(x_real) loss_gen = loss_l2_norm(dis_x_fake0, dis_x_real) + \ loss_l2_norm(dis_x_fake1, dis_x_real) - \ loss_l2_norm(dis_x_fake0, dis_x_fake1) #print(loss_gen.data) chainer.report({'loss': loss_gen}, self.gen) opt_g.zero_grads() loss_gen.backward() opt_g.update() x_fake0.unchain_backward() x_fake1.unchain_backward() loss_surrogate = loss_l2_norm(dis_x_fake0, dis_x_fake1) - \ loss_l2_norm(dis_x_fake0, 0.0) + \ loss_l2_norm(dis_x_real, 0.0) - \ loss_l2_norm(dis_x_real, dis_x_fake1) dis_x_inter = self.dis(x_inter, retain_forward=True) g = xp.ones_like(dis_x_inter.data) t0 = dis_x_inter.data - dis_x_fake1.data t0_norm = xp.sum(t0**2, axis=(1)) ** 0.5 t1_norm = xp.sum(dis_x_inter.data**2, axis=(1)) ** 0.5 t_g = ((t0.transpose() / t0_norm) - (dis_x_inter.data.transpose()) / t1_norm).transpose() g = g * t_g grad = self.dis.differentiable_backward(Variable(g)) grad_l2 = F.sqrt(F.sum(grad**2, axis=(1, 2, 3))) loss_gp = self._lambda_gp * loss_l2(grad_l2, 1.0) loss_dis = loss_surrogate + loss_gp opt_d.zero_grads() loss_dis.backward() opt_d.update() chainer.report({'loss': loss_dis, 'loss_gp': loss_gp}, self.dis)
def update_core(self): xp = self.gen.xp self._iter += 1 opt_g = self.get_optimizer('gen') opt_d = self.get_optimizer('dis') data_z = self.get_latent_code_batch() data_x = self.get_real_image_batch() x_fake = self.gen(Variable(data_z)) dis_fake = self.dis(x_fake) loss_gen = loss_func_dcgan_dis_real(dis_fake) chainer.report({'loss': loss_gen}, self.gen) opt_g.zero_grads() loss_gen.backward() opt_g.update() x_fake.unchain_backward() std_data_x = xp.std(data_x, axis=0, keepdims=True) rnd_x = xp.random.uniform(0, 1, data_x.shape).astype("f") x_perturbed = Variable(data_x + 0.5*rnd_x*std_data_x) x_real = Variable(data_x) dis_real = self.dis(x_real) dis_perturbed = self.dis(x_perturbed, retain_forward=True) g = Variable(xp.ones_like(dis_perturbed.data)) grad = self.dis.differentiable_backward(g) grad_l2 = F.sqrt(F.sum(grad**2, axis=(1, 2, 3))) loss_gp = self._lambda_gp * loss_l2(grad_l2, 1.0) loss_dis = loss_func_dcgan_dis_real(dis_real) + \ loss_func_dcgan_dis_fake(dis_fake) + \ loss_gp opt_d.zero_grads() loss_dis.backward() opt_d.update() chainer.report({'loss': loss_dis, 'loss_gp': loss_gp}, self.dis)
def update_core(self): xp = self.gen.xp self._iter += 1 opt_g = self.get_optimizer('gen') opt_d = self.get_optimizer('dis') data_z = self.get_latent_code_batch() data_tag = self.get_fake_tag_batch() data_x, data_real_tag = self.get_real_image_batch() x_fake = self.gen(F.concat([Variable(data_z),Variable(data_tag)])) dis_fake, dis_g_class = self.dis(x_fake) data_tag[data_tag < 0] = 0.0 loss_g_class =loss_sigmoid_cross_entropy_with_logits(dis_g_class, data_tag) #print(loss_g_class.data) loss_gen = self._lambda_adv * loss_func_dcgan_dis_real(dis_fake) + loss_g_class chainer.report({'loss': loss_gen, 'loss_c': loss_g_class}, self.gen) opt_g.zero_grads() loss_gen.backward() opt_g.update() x_fake.unchain_backward() std_data_x = xp.std(data_x, axis=0, keepdims=True) rnd_x = xp.random.uniform(0, 1, data_x.shape).astype("f") x_perturbed = Variable(data_x + 0.5*rnd_x*std_data_x) x_real = Variable(data_x) dis_real, dis_d_class = self.dis(x_real) dis_perturbed, _ = self.dis(x_perturbed, retain_forward=True) g = Variable(xp.ones_like(dis_perturbed.data)) grad = self.dis.differentiable_backward(g) grad_l2 = F.sqrt(F.sum(grad**2, axis=(1, 2, 3))) loss_gp = self._lambda_gp * loss_l2(grad_l2, 1.0) loss_d_class = loss_sigmoid_cross_entropy_with_logits(dis_d_class, data_real_tag) loss_dis = self._lambda_adv * ( loss_func_dcgan_dis_real(dis_real) + \ loss_func_dcgan_dis_fake(dis_fake) )+ \ loss_d_class + \ loss_gp opt_d.zero_grads() loss_dis.backward() opt_d.update() chainer.report({'loss': loss_dis, 'loss_gp': loss_gp, 'loss_c': loss_d_class}, self.dis)
def compute_distance_of_cluster_heads(self): # list all possible combinations of two cluster heads num_combination = self.nCr(self.ndim_y, 2) # a_labels # [0, 1, 0, 0] # [0, 0, 1, 0] # [0, 0, 1, 0] # [0, 0, 0, 1] # [0, 0, 0, 1] # [0, 0, 0, 1] a_labels = np.zeros((num_combination, self.ndim_y), dtype=np.float32) for i in range(1, self.ndim_y): for n in range(i): j = int(0.5 * i * (i - 1) + n) a_labels[j, i] = 1 # b_labels # [1, 0, 0, 0] # [1, 0, 0, 0] # [0, 1, 0, 0] # [1, 0, 0, 0] # [0, 1, 0, 0] # [0, 0, 1, 0] b_labels = np.zeros((num_combination, self.ndim_y), dtype=np.float32) for i in range(1, self.ndim_y): for n in range(i): j = int(0.5 * i * (i - 1) + n) b_labels[j, n] = 1 xp = self.xp if xp is not np: a_labels = cuda.to_gpu(a_labels) b_labels = cuda.to_gpu(b_labels) a_vector = self.cluster_head(a_labels) b_vector = self.cluster_head(b_labels) distance = functions.sqrt(functions.sum((a_vector - b_vector) ** 2, axis=1)) # clip distance = functions.clip(distance, 0.0, float(self.cluster_head_distance_threshold)) return distance
def __init__(self, n_features=None, n_dim=8, lossfun=F.mean_squared_error, lambda0=1, lambda1=1, lambda2=1, init_bias_mu=0.0, init_bias_lv=0.0, intx_term=True, total_nobs=1): self.n_dim = n_dim self.n_features = n_features self.lossfun = lossfun self.lambda0 = lambda0 self.lambda1 = lambda1 self.lambda2 = lambda2 self.intx_term = intx_term self.total_nobs = total_nobs # In contrast to the FM model, the slopes and latent vectors # will have means (mu) and log variances (lv) for each component. ones_3d = (1, 1, 1) super(AutoVFM, self).__init__(bias_mu=L.Bias(shape=(1,)), bias_lv=L.Bias(shape=(1,)), slop_mu=L.Bias(shape=(1, 1)), slop_lv=L.Bias(shape=(1, 1)), slop_delta_mu=L.EmbedID(n_features, 1, ignore_label=-1), slop_delta_lv=L.EmbedID(n_features, 1, ignore_label=-1), feat_mu_vec=L.Bias(shape=(1, 1, n_dim)), feat_lv_vec=L.Bias(shape=(1, 1, n_dim)), hyper_feat_lv_vec=L.Bias(shape=ones_3d), feat_delta_mu=L.EmbedID(n_features, n_dim, ignore_label=-1), feat_delta_lv=L.EmbedID(n_features, n_dim, ignore_label=-1), hyper_feat_delta_lv=L.Bias(shape=ones_3d)) # Xavier initialize weights c = np.sqrt(n_features * n_dim) * 1e3 d = np.sqrt(n_features) * 1e3 self.feat_delta_mu.W.data[...] = np.random.randn(n_features, n_dim) / c self.feat_delta_lv.W.data[...] = np.random.randn(n_features, n_dim) / c self.slop_delta_mu.W.data[...] = np.random.randn(n_features, 1) / d self.slop_delta_lv.W.data[...] = np.random.randn(n_features, 1) / d self.bias_mu.b.data[...] *= 0.0 self.bias_mu.b.data[...] += init_bias_mu self.bias_lv.b.data[...] *= 0.0 self.bias_lv.b.data[...] += init_bias_lv