我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.mean()。
def reorder_bpr_loss(re_x, his_x, dynamic_user, item_embedding, config): ''' loss function for reorder prediction re_x padded reorder baskets his_x padded history bought items ''' nll = 0 ub_seqs = [] for u, h, du in zip(re_x, his_x, dynamic_user): du_p_product = torch.mm(du, item_embedding.t()) # shape: max_len, num_item nll_u = [] # nll for user for t, basket_t in enumerate(u): if basket_t[0] != 0: pos_idx = torch.cuda.LongTensor(basket_t) if config.cuda else torch.LongTensor(basket_t) # Sample negative products neg = [random.choice(h[t]) for _ in range(len(basket_t))] # replacement # neg = random.sample(range(1, config.num_product), len(basket_t)) # without replacement neg_idx = torch.cuda.LongTensor(neg) if config.cuda else torch.LongTensor(neg) # Score p(u, t, v > v') score = du_p_product[t - 1][pos_idx] - du_p_product[t - 1][neg_idx] # Average Negative log likelihood for basket_t nll_u.append(- torch.mean(torch.nn.LogSigmoid()(score))) nll += torch.mean(torch.cat(nll_u)) return nll
def _train_nn(self): # neural network part self.optimizer.zero_grad() batch_state_before, batch_action, batch_reward, batch_state_after, batch_done = self.get_batch() target = self.agent.estimate_value(batch_reward, batch_state_after, batch_done) q_value = self.agent.q_value(batch_state_before, batch_action) loss = self.agent.net.loss(q_value, target) if self._step % self.gradient_update_freq == 0: loss.backward() self.optimizer.step() if self._step % self.log_freq_by_step == 0: self._writer.add_scalar("epsilon", self.agent.epsilon, self._step) self._writer.add_scalar("q_net-target", (q_value.data - target.data).mean(), self._step) self._writer.add_scalar("loss", loss.data.cpu()[0], self._step) return loss.data[0]
def bn_hat_z_layers(self, hat_z_layers, z_pre_layers): # TODO: Calculate batchnorm using GPU Tensors. assert len(hat_z_layers) == len(z_pre_layers) hat_z_layers_normalized = [] for i, (hat_z, z_pre) in enumerate(zip(hat_z_layers, z_pre_layers)): if self.use_cuda: ones = Variable(torch.ones(z_pre.size()[0], 1).cuda()) else: ones = Variable(torch.ones(z_pre.size()[0], 1)) mean = torch.mean(z_pre, 0) noise_var = np.random.normal(loc=0.0, scale=1 - 1e-10, size=z_pre.size()) if self.use_cuda: var = np.var(z_pre.data.cpu().numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1]) else: var = np.var(z_pre.data.numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1]) var = Variable(torch.FloatTensor(var)) if self.use_cuda: hat_z = hat_z.cpu() ones = ones.cpu() mean = mean.cpu() hat_z_normalized = torch.div(hat_z - ones.mm(mean), ones.mm(torch.sqrt(var + 1e-10))) if self.use_cuda: hat_z_normalized = hat_z_normalized.cuda() hat_z_layers_normalized.append(hat_z_normalized) return hat_z_layers_normalized
def train_ae(self, train_X, optimizer, epochs, verbose=True): N = train_X.data.size()[0] num_batches = N / self.batch_size for e in range(epochs): agg_cost = 0. for k in range(num_batches): start, end = k * self.batch_size, (k + 1) * self.batch_size bX = train_X[start:end] optimizer.zero_grad() Z = self.forward(bX) Z = self.decode(Z) loss = -torch.sum(bX * torch.log(Z) + (1.0 - bX) * torch.log(1.0 - Z), 1) cost = torch.mean(loss) cost.backward() optimizer.step() agg_cost += cost agg_cost /= num_batches if verbose: print("Epoch:", e, "cost:", agg_cost.data[0])
def test_Dropout(self): p = 0.2 input = torch.Tensor(1000).fill_(1-p) module = nn.Dropout(p) output = module.forward(input) self.assertLess(abs(output.mean() - (1-p)), 0.05) gradInput = module.backward(input, input) self.assertLess(abs(gradInput.mean() - (1-p)), 0.05) module = nn.Dropout(p, True) output = module.forward(input.clone()) self.assertLess(abs(output.mean() - (1-p)), 0.05) gradInput = module.backward(input.clone(), input.clone()) self.assertLess(abs(gradInput.mean() - (1-p)), 0.05) # Check that these don't raise errors module.__repr__() str(module)
def test_SpatialDropout(self): p = 0.2 b = random.randint(1, 5) w = random.randint(1, 5) h = random.randint(1, 5) nfeats = 1000 input = torch.Tensor(b, nfeats, w, h).fill_(1) module = nn.SpatialDropout(p) module.training() output = module.forward(input) self.assertLess(abs(output.mean() - (1-p)), 0.05) gradInput = module.backward(input, input) self.assertLess(abs(gradInput.mean() - (1-p)), 0.05) # Check that these don't raise errors module.__repr__() str(module)
def test_VolumetricDropout(self): p = 0.2 bsz = random.randint(1,5) t = random.randint(1,5) w = random.randint(1,5) h = random.randint(1,5) nfeats = 1000 input = torch.Tensor(bsz, nfeats, t, w, h).fill_(1) module = nn.VolumetricDropout(p) module.training() output = module.forward(input) self.assertLess(abs(output.mean() - (1-p)), 0.05) gradInput = module.backward(input, input) self.assertLess(abs(gradInput.mean() - (1-p)), 0.05) # Check that these don't raise errors module.__repr__() str(module)
def _gaussian(self, enc_output): def latent_loss(mu, sigma): pow_mu = mu * mu pow_sigma = sigma * sigma return 0.5 * torch.mean(pow_mu + pow_sigma - torch.log(pow_sigma) - 1) mu = self._enc_mu(enc_output) sigma = torch.exp(.5 * self._enc_log_sigma(enc_output)) self.latent_loss = latent_loss(mu, sigma) weight = next(self.parameters()).data std_z = Variable(weight.new(*sigma.size()), requires_grad=False) std_z.data.copy_(torch.from_numpy( np.random.normal(size=sigma.size()))) return mu + sigma * std_z
def _layer_BatchNorm(self): self.add_body(0, """ @staticmethod def __batch_normalization(dim, name, **kwargs): if dim == 1: layer = nn.BatchNorm1d(**kwargs) elif dim == 2: layer = nn.BatchNorm2d(**kwargs) elif dim == 3: layer = nn.BatchNorm3d(**kwargs) else: raise NotImplementedError() if 'scale' in __weights_dict[name]: layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['scale'])) else: layer.weight.data.fill_(1) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) else: layer.bias.data.fill_(0) layer.state_dict()['running_mean'].copy_(torch.from_numpy(__weights_dict[name]['mean'])) layer.state_dict()['running_var'].copy_(torch.from_numpy(__weights_dict[name]['var'])) return layer""")
def test_Dropout(self): p = 0.2 input = torch.Tensor(1000).fill_(1 - p) module = nn.Dropout(p) output = module.forward(input) self.assertLess(abs(output.mean() - (1 - p)), 0.05) gradInput = module.backward(input, input) self.assertLess(abs(gradInput.mean() - (1 - p)), 0.05) module = nn.Dropout(p, True) output = module.forward(input.clone()) self.assertLess(abs(output.mean() - (1 - p)), 0.05) gradInput = module.backward(input.clone(), input.clone()) self.assertLess(abs(gradInput.mean() - (1 - p)), 0.05) # Check that these don't raise errors module.__repr__() str(module)
def test_SpatialDropout(self): p = 0.2 b = random.randint(1, 5) w = random.randint(1, 5) h = random.randint(1, 5) nfeats = 1000 input = torch.Tensor(b, nfeats, w, h).fill_(1) module = nn.SpatialDropout(p) module.training() output = module.forward(input) self.assertLess(abs(output.mean() - (1 - p)), 0.05) gradInput = module.backward(input, input) self.assertLess(abs(gradInput.mean() - (1 - p)), 0.05) # Check that these don't raise errors module.__repr__() str(module)
def test_VolumetricDropout(self): p = 0.2 bsz = random.randint(1, 5) t = random.randint(1, 5) w = random.randint(1, 5) h = random.randint(1, 5) nfeats = 1000 input = torch.Tensor(bsz, nfeats, t, w, h).fill_(1) module = nn.VolumetricDropout(p) module.training() output = module.forward(input) self.assertLess(abs(output.mean() - (1 - p)), 0.05) gradInput = module.backward(input, input) self.assertLess(abs(gradInput.mean() - (1 - p)), 0.05) # Check that these don't raise errors module.__repr__() str(module)
def forward(self,title,content): title_em = self.encoder(title) content_em = self.encoder(content) title_size = title_em.size() content_size = content_em.size() title_2 = self.pre1(title_em.contiguous().view(-1,256)).view(title_size[0],title_size[1],-1) content_2 = self.pre2(content_em.contiguous().view(-1,256)).view(content_size[0],content_size[1],-1) title_ = t.mean(title_2,dim=1) content_ = t.mean(content_2,dim=1) inputs=t.cat((title_.squeeze(),content_.squeeze()),1) out=self.fc(inputs) # content_out=self.content_fc(content.view(content.size(0),-1)) # out=torch.cat((title_out,content_out),1) # out=self.fc(out) return out
def forward(self,title,content): title_em = self.encoder(title) content_em = self.encoder(content) title_size = title_em.size() content_size = content_em.size() title_2 = t.nn.functional.relu(self.bn(self.pre_fc(title_em.view(-1,256)).view(title_em.size(0),title_em.size(1),-1).transpose(1,2).contiguous())) content_2 = t.nn.functional.relu(self.bn2(self.pre_fc2(content_em.view(-1,256)).view(content_em.size(0),content_em.size(1),-1).transpose(1,2)).contiguous()) # title_2 = self.pre(title_em.contiguous().view(-1,256)).view(title_size) # content_2 = self.pre(content_em.contiguous().view(-1,256)).view(content_size) title_ = t.mean(title_2,dim=2) content_ = t.mean(content_2,dim=2) inputs=t.cat((title_.squeeze(),content_.squeeze()),1) out=self.fc(inputs.view(inputs.size(0),-1)) # content_out=self.content_fc(content.view(content.size(0),-1)) # out=torch.cat((title_out,content_out),1) # out=self.fc(out) return out
def compare_fits(x): shape, scale = fit(x) app_shape, app_scale = x.mean() / x.std(), x.mean() # _, np_shape, _, np_scale = exponweib.fit(x, floc=0) # # Plot # def weib(x, n, a): # a == shape # return (a / n) * (x / n)**(a - 1) * np.exp(-(x / n)**a) # # count, _, _ = plt.hist(x, 100) # xx = np.linspace(x.min(), x.max(), 10000) # yy = weib(xx, scale, shape) # yy_app = weib(xx, app_scale, app_shape) # yy_np = weib(xx, np_scale, np_shape) # plt.plot(xx, yy*(count.max() / yy.max()), label='MLE') # plt.plot(xx, yy_app*(count.max() / yy_app.max()), label='App') # plt.plot(xx, yy_np*(count.max() / yy_np.max()), label='Scipy') # plt.legend() # plt.show() return (shape, scale), (app_shape, app_scale)
def forward(self, x): arr = list() for sentence in x: # Sentence embedding sent_emb = list() for word in sentence: word = np.array(word) word = torch.from_numpy(word) word = Variable(word) # Gets the embedding for each character in # the word char_emb = self.embedding(word) # Computes the mean between all character level # embeddings. MxN -> 1xN char_emb = torch.mean(char_emb, 0) sent_emb.append(char_emb) arr.append(sent_emb) return arr
def change_key_names(old_params, in_channels): new_params = collections.OrderedDict() layer_count = 0 allKeyList = old_params.keys() for layer_key in allKeyList: if layer_count >= len(allKeyList)-2: # exclude fc layers continue else: if layer_count == 0: rgb_weight = old_params[layer_key] # print(type(rgb_weight)) rgb_weight_mean = torch.mean(rgb_weight, dim=1) # TODO: ugly fix here, why torch.mean() turn tensor to Variable # print(type(rgb_weight_mean)) flow_weight = rgb_weight_mean.unsqueeze(1).repeat(1,in_channels,1,1) new_params[layer_key] = flow_weight layer_count += 1 # print(layer_key, new_params[layer_key].size(), type(new_params[layer_key])) else: new_params[layer_key] = old_params[layer_key] layer_count += 1 # print(layer_key, new_params[layer_key].size(), type(new_params[layer_key])) return new_params
def change_key_names(old_params, in_channels): new_params = collections.OrderedDict() layer_count = 0 for layer_key in old_params.keys(): if layer_count < 26: if layer_count == 0: rgb_weight = old_params[layer_key] rgb_weight_mean = torch.mean(rgb_weight, dim=1) flow_weight = rgb_weight_mean.repeat(1,in_channels,1,1) new_params[layer_key] = flow_weight layer_count += 1 # print(layer_key, new_params[layer_key].size()) else: new_params[layer_key] = old_params[layer_key] layer_count += 1 # print(layer_key, new_params[layer_key].size()) return new_params
def sample_gaussian_2d_train(mux, muy, sx, sy, corr, nodesPresent): o_mux, o_muy, o_sx, o_sy, o_corr = mux, muy, sx, sy, corr numNodes = mux.size()[0] next_x = torch.zeros(numNodes) next_y = torch.zeros(numNodes) for node in range(numNodes): if node not in nodesPresent: continue mean = [o_mux[node], o_muy[node]] cov = [[o_sx[node]*o_sx[node], o_corr[node]*o_sx[node]*o_sy[node]], [o_corr[node]*o_sx[node]*o_sy[node], o_sy[node]*o_sy[node]]] next_values = np.random.multivariate_normal(mean, cov, 1) next_x[node] = next_values[0][0] next_y[node] = next_values[0][1] return next_x, next_y
def stats(criterion, a, y, mask): if mask is not None: _, preds = t.max(a.data, 2) batch, sLen, c = a.size() loss = criterion(a.view(-1, c), y.view(-1)) m = t.sum(mask) mask = _sequence_mask(mask, sLen) acc = t.sum(mask.data.float() * (y.data == preds).float()) / float(m.data[0]) #loss = criterion(a.view(-1, c), y.view(-1)) else: _, preds = t.max(a.data, 1) loss = criterion(a, y) acc = t.mean((y.data == preds).float()) return loss, acc
def test_kissgp_classification_error(): model = GPClassificationModel() # Find optimal model hyperparameters model.train() optimizer = optim.Adam(model.parameters(), lr=0.15) optimizer.n_iter = 0 for i in range(20): optimizer.zero_grad() output = model.forward(train_x) loss = -model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Set back to eval mode model.eval() test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) assert(mean_abs_error.data.squeeze()[0] < 1e-5)
def test_kissgp_classification_error(): model = GPClassificationModel() # Find optimal model hyperparameters model.train() optimizer = optim.Adam(model.parameters(), lr=0.15) optimizer.n_iter = 0 for i in range(200): optimizer.zero_grad() output = model.forward(train_x) loss = -model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Set back to eval mode model.eval() test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) assert(mean_abs_error.data.squeeze()[0] < 1e-5)
def test_kissgp_classification_error(): train_x, train_y = train_data() model = GPClassificationModel(train_x.data) # Find optimal model hyperparameters model.train() optimizer = optim.Adam(model.parameters(), lr=0.1) optimizer.n_iter = 0 for i in range(50): optimizer.zero_grad() output = model.forward(train_x) loss = -model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Set back to eval mode model.eval() test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) assert(mean_abs_error.data.squeeze()[0] < 1e-5)
def test_kissgp_classification_error_cuda(): if torch.cuda.is_available(): train_x, train_y = train_data(cuda=True) model = GPClassificationModel(train_x.data).cuda() model.condition(train_x, train_y) # Find optimal model hyperparameters model.train() optimizer = optim.Adam(model.parameters(), lr=0.1) optimizer.n_iter = 0 for i in range(50): optimizer.zero_grad() output = model.forward(train_x) loss = -model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Set back to eval mode model.eval() test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) assert(mean_abs_error.data.squeeze()[0] < 1e-5)
def test_kissgp_classification_error(): gpytorch.functions.use_toeplitz = False model = GPClassificationModel() # Find optimal model hyperparameters model.train() optimizer = optim.Adam(model.parameters(), lr=0.15) optimizer.n_iter = 0 for i in range(100): optimizer.zero_grad() output = model.forward(train_x) loss = -model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Set back to eval mode model.eval() test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) gpytorch.functions.use_toeplitz = True assert(mean_abs_error.data.squeeze()[0] < 5e-2)
def test_spectral_mixture_gp_mean_abs_error(): gp_model = SpectralMixtureGPModel() # Optimize the model gp_model.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) optimizer.n_iter = 0 gpytorch.functions.fastest = False for i in range(50): optimizer.zero_grad() output = gp_model(train_x) loss = -gp_model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() gp_model.condition(train_x, train_y) test_preds = gp_model(test_x).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) # The spectral mixture kernel should be trivially able to extrapolate the sine function. assert(mean_abs_error.data.squeeze()[0] < 0.05)
def test_kissgp_gp_mean_abs_error(): gp_model = GPRegressionModel() # Optimize the model gp_model.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.2) optimizer.n_iter = 0 for i in range(20): optimizer.zero_grad() output = gp_model(train_x) loss = -gp_model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() gp_model.condition(train_x, train_y) test_preds = gp_model(test_x).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) assert(mean_abs_error.data.squeeze()[0] < 0.1)
def test_kissgp_gp_mean_abs_error(): train_x, train_y, test_x, test_y = make_data() gp_model = GPRegressionModel() # Optimize the model gp_model.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) optimizer.n_iter = 0 for i in range(25): optimizer.zero_grad() output = gp_model(train_x) loss = -gp_model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() gp_model.condition(train_x, train_y) test_preds = gp_model(test_x).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) assert(mean_abs_error.data.squeeze()[0] < 0.05)
def test_kissgp_gp_mean_abs_error_cuda(): if torch.cuda.is_available(): train_x, train_y, test_x, test_y = make_data(cuda=True) gp_model = GPRegressionModel().cuda() # Optimize the model gp_model.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) optimizer.n_iter = 0 for i in range(25): optimizer.zero_grad() output = gp_model(train_x) loss = -gp_model.marginal_log_likelihood(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() gp_model.condition(train_x, train_y) test_preds = gp_model(test_x).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) assert(mean_abs_error.data.squeeze()[0] < 0.02)
def th_corrcoef(x): """ mimics np.corrcoef """ # calculate covariance matrix of rows mean_x = th.mean(x, 1) xm = x.sub(mean_x.expand_as(x)) c = xm.mm(xm.t()) c = c / (x.size(1) - 1) # normalize covariance matrix d = th.diag(c) stddev = th.pow(d, 0.5) c = c.div(stddev.expand_as(c)) c = c.div(stddev.expand_as(c).t()) # clamp between -1 and 1 c = th.clamp(c, -1.0, 1.0) return c
def th_matrixcorr(x, y): """ return a correlation matrix between columns of x and columns of y. So, if X.size() == (1000,4) and Y.size() == (1000,5), then the result will be of size (4,5) with the (i,j) value equal to the pearsonr correlation coeff between column i in X and column j in Y """ mean_x = th.mean(x, 0) mean_y = th.mean(y, 0) xm = x.sub(mean_x.expand_as(x)) ym = y.sub(mean_y.expand_as(y)) r_num = xm.t().mm(ym) r_den1 = th.norm(xm,2,0) r_den2 = th.norm(ym,2,0) r_den = r_den1.t().mm(r_den2) r_mat = r_num.div(r_den) return r_mat
def normalized_cross_correlation(self): w = self.weight.view(self.weight.size(0), -1) t_norm = torch.norm(w, p=2, dim=1) if self.in_channels == 1 & sum(self.kernel_size) == 1: ncc = w.squeeze() / torch.norm(self.t0_norm, p=2) ncc = ncc - self.start_ncc return ncc #mean = torch.mean(w, dim=1).unsqueeze(1).expand_as(w) mean = torch.mean(w, dim=1).unsqueeze(1) # 0.2 broadcasting t_factor = w - mean h_product = self.t0_factor * t_factor cov = torch.sum(h_product, dim=1) # (w.size(1) - 1) # had normalization code commented out denom = self.t0_norm * t_norm ncc = cov / denom ncc = ncc - self.start_ncc return ncc
def forward(self, y, weights, mean, std): """ Presents a maximum a-priori objective for a set of predicted means, mixture components, and standard deviations to model a given ground-truth 'y'. Modeled using negative log likelihood. :param y: Non-linear target. :param weights: Predicted mixture components. :param mean: Predicted mixture means. :param std: Predicted mixture standard deviations. :return: """ normalization = 1.0 / ((2.0 * math.pi) ** 0.5) gaussian_sample = (y.expand_as(mean) - mean) * torch.reciprocal(std) gaussian_sample = normalization * torch.reciprocal(std) * torch.exp(-0.5 * gaussian_sample ** 2) return -torch.mean(torch.log(torch.sum(weights * gaussian_sample, dim=1)))
def test_model_custom_loss(): x = torch.rand(20, 4) y = torch.rand(20, 10) model = Model( Dense(10, input_dim=x.size()[-1]), Activation('relu'), Dense(5), Activation('relu'), Dense(y.size()[-1]) ) opt = SGD(lr=0.01, momentum=0.9) def mae(y_true, y_pred): return torch.mean(torch.abs(y_true - y_pred)) history = model.fit(x, y, loss=mae, optimizer=opt, epochs=10) assert len(history['loss']) == 10 assert all(type(v) is float for v in history['loss']) assert history['loss'] == sorted(history['loss'], reverse=True)
def forward(self, anchor, positive, negative): #eucl distance #dist = torch.sum( (anchor - positive) ** 2 - (anchor - negative) ** 2, dim=1)\ # + self.margin if self.dist_type == 0: dist_p = F.pairwise_distance(anchor ,positive) dist_n = F.pairwise_distance(anchor ,negative) if self.dist_type == 1: dist_p = cosine_similarity(anchor, positive) disp_n = cosine_similarity(anchor, negative) dist_hinge = torch.clamp(dist_p - dist_n + self.margin, min=0.0) if self.use_ohem: v, idx = torch.sort(dist_hinge,descending=True) loss = torch.mean(v[0:self.ohem_bs]) else: loss = torch.mean(dist_hinge) return loss
def mean(x, axis=None, keepdims=False): def _mean(x, axis=axis, keepdims=keepdims): y = torch.mean(x, axis) # Since keepdims argument of torch not functional return y if keepdims else torch.squeeze(y, axis) def _compute_output_shape(x, axis=axis, keepdims=keepdims): if axis is None: return () shape = list(_get_shape(x)) if keepdims: shape[axis] = 1 else: del shape[axis] return tuple(shape) return get_op(_mean, output_shape=_compute_output_shape, arguments=[axis, keepdims])(x)
def loss(self,x): self.forward(x) criterion = nn.BCELoss() x_recons = self.sigmoid(self.cs[-1]) Lx = criterion(x_recons,x) * self.A * self.B Lz = 0 kl_terms = [0] * T for t in xrange(self.T): mu_2 = self.mus[t] * self.mus[t] sigma_2 = self.sigmas[t] * self.sigmas[t] logsigma = self.logsigmas[t] # Lz += (0.5 * (mu_2 + sigma_2 - 2 * logsigma)) # 11 kl_terms[t] = 0.5 * torch.sum(mu_2+sigma_2-2 * logsigma,1) - self.T * 0.5 Lz += kl_terms[t] # Lz -= self.T / 2 Lz = torch.mean(Lz) #################################################### loss = Lz + Lx # 12 return loss # correct
def compute_loss(self, input, e, b, clusters, it=0): Loss = Variable(torch.zeros((self.batch_size))).type(dtype) Ls = Variable(torch.zeros((self.batch_size))).type(dtype) for cl in range(clusters // 2): L, m1, m2 = self.compute_diameter(input, e, cl, it=it) mask = ((e / 2).type(dtype_l) == cl).type(dtype) # print('mask', mask[0]) n = mask.sum(1).squeeze() n += (n == 0).type(dtype) # print('mask', mask[0]) log_probs = torch.log((1 - b) * m1 + b * m2 + (1 - mask) + 1e-8) Loss += L * log_probs.sum(1) / n Ls += L Ls = Ls.mean(0) Loss = Loss.mean(0) return Loss, Ls ########################################################################### # Split Phase # ###########################################################################
def GAN_loss(self, x): x = x.view(x.size(0), -1) if isinstance(x, torch.cuda.FloatTensor): eps = torch.cuda.FloatTensor(x.size(0), self.nz).normal_() else: eps = torch.FloatTensor(x.size(0), self.nz).normal_() alpha = torch.FloatTensor(x.size(0), 1).uniform_(0,1) alpha = alpha.expand(x.size(0), x.size(1)) recon_pz = self.decode(Variable(eps)) interpolates = alpha * x.data + (1-alpha) * recon_pz.data interpolates = Variable(interpolates, requires_grad=True) D_interpolates = self.D(interpolates) gradients = grad(D_interpolates, interpolates,create_graph=True)[0] slopes = torch.sum(gradients ** 2, 1).sqrt() gradient_penalty = (torch.mean(slopes - 1.) ** 2) return self.D(x) - self.D(recon_pz) - 10 * gradient_penalty
def bpr_loss(x, dynamic_user, item_embedding, config): ''' bayesian personalized ranking loss for implicit feedback parameters: - x: batch of users' baskets - dynamic_user: batch of users' dynamic representations - item_embedding: item_embedding matrix - config: model configuration ''' nll = 0 ub_seqs = [] for u,du in zip(x, dynamic_user): du_p_product = torch.mm(du, item_embedding.t()) # shape: max_len, num_item nll_u = [] # nll for user for t, basket_t in enumerate(u): if basket_t[0] != 0 and t != 0: pos_idx = torch.cuda.LongTensor(basket_t) if config.cuda else torch.LongTensor(basket_t) # Sample negative products neg = [random.choice(range(1, config.num_product)) for _ in range(len(basket_t))] # replacement # neg = random.sample(range(1, config.num_product), len(basket_t)) # without replacement neg_idx = torch.cuda.LongTensor(neg) if config.cuda else torch.LongTensor(neg) # Score p(u, t, v > v') score = du_p_product[t - 1][pos_idx] - du_p_product[t - 1][neg_idx] #Average Negative log likelihood for basket_t nll_u.append(- torch.mean(torch.nn.LogSigmoid()(score))) nll += torch.mean(torch.cat(nll_u)) return nll
def pool_avg(tensor, dim): return torch.mean(tensor, dim)