我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.sum()。
def evaluate(): """evaluate the model while training""" model.eval() # turn on the eval() switch to disable dropout total_loss = 0 total_correct = 0 for batch, i in enumerate(range(0, len(data_val), args.batch_size)): data, targets = package(data_val[i:min(len(data_val), i+args.batch_size)], volatile=True) if args.cuda: data = data.cuda() targets = targets.cuda() hidden = model.init_hidden(data.size(1)) output, attention = model.forward(data, hidden) output_flat = output.view(data.size(1), -1) total_loss += criterion(output_flat, targets).data prediction = torch.max(output_flat, 1)[1] total_correct += torch.sum((prediction == targets).float()) return total_loss[0] / (len(data_val) // args.batch_size), total_correct.data[0] / len(data_val)
def train_ae(self, train_X, optimizer, epochs, verbose=True): N = train_X.data.size()[0] num_batches = N / self.batch_size for e in range(epochs): agg_cost = 0. for k in range(num_batches): start, end = k * self.batch_size, (k + 1) * self.batch_size bX = train_X[start:end] optimizer.zero_grad() Z = self.forward(bX) Z = self.decode(Z) loss = -torch.sum(bX * torch.log(Z) + (1.0 - bX) * torch.log(1.0 - Z), 1) cost = torch.mean(loss) cost.backward() optimizer.step() agg_cost += cost agg_cost /= num_batches if verbose: print("Epoch:", e, "cost:", agg_cost.data[0])
def node_forward(self, inputs, child_c, child_h): child_h_sum = torch.sum(child_h, dim=0, keepdim=True) iou = self.ioux(inputs) + self.iouh(child_h_sum) i, o, u = torch.split(iou, iou.size(1) // 3, dim=1) i, o, u = F.sigmoid(i), F.sigmoid(o), F.tanh(u) f = F.sigmoid( self.fh(child_h) + self.fx(inputs).repeat(len(child_h), 1) ) fc = torch.mul(f, child_c) c = torch.mul(i, u) + torch.sum(fc, dim=0, keepdim=True) h = torch.mul(o, F.tanh(c)) return c, h
def forward(self, input, target): buffer = input.new() buffer.resize_as_(input).copy_(input) buffer[torch.eq(target, -1.)] = 0 output = buffer.sum() buffer.fill_(self.margin).add_(-1, input) buffer.cmax_(0) buffer[torch.eq(target, 1.)] = 0 output += buffer.sum() if self.size_average: output = output / input.nelement() self.save_for_backward(input, target) return input.new((output,))
def updateOutput(self, input): self._assertInput(input) # set up buffer: self.buff2 = self.buff2 or input[0].new() self.buff2.resize_as_(input[1]) # compute output scores: self.output.resize_(input[0].size(0), self.weight.size(0)) for k in range(self.weight.size(0)): torch.mm(self.buff2, input[0], self.weight[k]) self.buff2.mul_(input[1]) torch.sum(self.output.narrow(1, k, 1), self.buff2, 1) if self.bias: self.output.add_(self.bias.view(1, self.bias.nelement()).expand_as(self.output)) return self.output
def sym_distance_matrix(A, B, eps=1e-18, self_similarity=False): """ Defines the symbolic matrix that contains the distances between the vectors of A and B :param A: the first data matrix :param B: the second data matrix :param self_similarity: zeros the diagonial to improve the stability :params eps: the minimum distance between two vectors (set to a very small number to improve stability) :return: """ # Compute the squared distances AA = torch.sum(A * A, 1).view(-1, 1) BB = torch.sum(B * B, 1).view(1, -1) AB = torch.mm(A, B.transpose(0, 1)) D = AA + BB - 2 * AB # Zero the diagonial if self_similarity: D = D.view(-1) D[::B.size(0) + 1] = 0 D = D.view(A.size(0), B.size(0)) # Return the square root D = torch.sqrt(torch.clamp(D, min=eps)) return D
def test_forward_works_without_mask(self): log_likelihood = self.crf(self.logits, self.tags).data[0] # Now compute the log-likelihood manually manual_log_likelihood = 0.0 # For each instance, manually compute the numerator # (which is just the score for the logits and actual tags) # and the denominator # (which is the log-sum-exp of the scores for the logits across all possible tags) for logits_i, tags_i in zip(self.logits, self.tags): numerator = self.score(logits_i.data, tags_i.data) all_scores = [self.score(logits_i.data, tags_j) for tags_j in itertools.product(range(5), repeat=3)] denominator = math.log(sum(math.exp(score) for score in all_scores)) # And include them in the manual calculation. manual_log_likelihood += numerator - denominator # The manually computed log likelihood should equal the result of crf.forward. assert manual_log_likelihood == approx(log_likelihood)
def test_contrastive_loss_value(self): x0_val = Variable(self.x0) x1_val = Variable(self.x1) t_val = Variable(self.t) tml = ContrastiveLoss(margin=self.margin) loss = tml.forward(x0_val, x1_val, t_val) self.assertEqual(loss.data.numpy().shape, (1, )) self.assertEqual(loss.data.numpy().dtype, np.float32) loss_value = float(loss.data.numpy()) # Compute expected value loss_expect = 0 for i in range(self.x0.size()[0]): x0d, x1d, td = self.x0[i], self.x1[i], self.t[i] d = torch.sum(torch.pow(x0d - x1d, 2)) if td == 1: # similar pair loss_expect += d elif td == 0: # dissimilar pair loss_expect += max(1 - np.sqrt(d), 0)**2 loss_expect /= 2.0 * self.t.size()[0] print("expected %s got %s" % (loss_expect, loss_value)) self.assertAlmostEqual(loss_expect, loss_value, places=5)
def update_memories_with_extra_features_(self, memory_lengths, memories): memory_lengths = memory_lengths.data memories = memories.data if self.extra_features_slots > 0: num_nonempty_memories = memory_lengths.ne(0).sum() updated_memories = memories.new(memories.numel() + num_nonempty_memories * self.extra_features_slots) src_offset = 0 dst_offset = 0 for i in range(memory_lengths.size(0)): for j in range(self.opt['mem_size']): length = memory_lengths[i, j] if length > 0: if self.opt['time_features']: updated_memories[dst_offset] = self.time_feature(j) dst_offset += 1 updated_memories[dst_offset:dst_offset + length] = memories[src_offset:src_offset + length] src_offset += length dst_offset += length memory_lengths += memory_lengths.ne(0).long() * self.extra_features_slots memories.set_(updated_memories)
def get_accuracy(data_loader, classifier_fn, batch_size): """ compute the accuracy over the supervised training set or the testing set """ predictions, actuals = [], [] # use the appropriate data loader for (xs, ys) in data_loader: # use classification function to compute all predictions for each batch xs, ys = Variable(xs), Variable(ys) predictions.append(classifier_fn(xs)) actuals.append(ys) # compute the number of accurate predictions accurate_preds = 0 for pred, act in zip(predictions, actuals): for i in range(pred.size(0)): v = torch.sum(pred[i] == act[i]) accurate_preds += (v.data[0] == 10) # calculate the accuracy between 0 and 1 accuracy = (accurate_preds * 1.0) / (len(predictions) * batch_size) return accuracy
def setUp(self): # simple Gaussian-emission HMM def model(): p_latent = pyro.param("p1", Variable(torch.Tensor([[0.7], [0.3]]))) p_obs = pyro.param("p2", Variable(torch.Tensor([[0.9], [0.1]]))) latents = [Variable(torch.ones(1, 1))] observes = [] for t in range(self.model_steps): latents.append( pyro.sample("latent_{}".format(str(t)), Bernoulli(torch.index_select(p_latent, 0, latents[-1].view(-1).long())))) observes.append( pyro.observe("observe_{}".format(str(t)), Bernoulli(torch.index_select(p_obs, 0, latents[-1].view(-1).long())), self.data[t])) return torch.sum(torch.cat(latents)) self.model_steps = 3 self.data = [pyro.ones(1, 1) for _ in range(self.model_steps)] self.model = model
def setUp(self): # lognormal-normal model # putting some of the parameters inside of a torch module to # make sure that that functionality is ok (XXX: do this somewhere else in the future) self.mu0 = Variable(torch.Tensor([1.0])) # normal prior hyperparameter # normal prior hyperparameter self.tau0 = Variable(torch.Tensor([1.0])) # known precision for observation likelihood self.tau = Variable(torch.Tensor([2.5])) self.n_data = 2 self.data = Variable(torch.Tensor([[1.5], [2.2]])) # two observations self.tau_n = self.tau0 + \ Variable(torch.Tensor([self.n_data])) * self.tau # posterior tau mu_numerator = self.mu0 * self.tau0 + \ self.tau * torch.sum(torch.log(self.data)) self.mu_n = mu_numerator / self.tau_n # posterior mu self.log_mu_n = torch.log(self.mu_n) self.log_tau_n = torch.log(self.tau_n)
def _test_jacobian(self, input_dim, hidden_dim, multiplier): jacobian = torch.zeros(input_dim, input_dim) arn = AutoRegressiveNN(input_dim, hidden_dim, multiplier) def nonzero(x): return torch.sign(torch.abs(x)) for output_index in range(multiplier): for j in range(input_dim): for k in range(input_dim): x = Variable(torch.randn(1, input_dim)) epsilon_vector = torch.zeros(1, input_dim) epsilon_vector[0, j] = self.epsilon delta = (arn(x + Variable(epsilon_vector)) - arn(x)) / self.epsilon jacobian[j, k] = float(delta[0, k + output_index * input_dim].data.cpu().numpy()[0]) permutation = arn.get_permutation() permuted_jacobian = jacobian.clone() for j in range(input_dim): for k in range(input_dim): permuted_jacobian[j, k] = jacobian[permutation[j], permutation[k]] lower_sum = torch.sum(torch.tril(nonzero(permuted_jacobian), diagonal=0)) self.assertTrue(lower_sum == float(0.0))
def setUp(self): # lognormal-normal model # putting some of the parameters inside of a torch module to # make sure that that functionality is ok (XXX: do this somewhere else in the future) self.mu0 = Variable(torch.Tensor([1.0])) # normal prior hyperparameter # normal prior hyperparameter self.tau0 = Variable(torch.Tensor([1.0])) # known precision for observation likelihood self.tau = Variable(torch.Tensor([2.5])) self.n_data = 2 self.data = Variable(torch.Tensor([[1.5], [2.2]])) # two observations self.tau_n = self.tau0 + \ Variable(torch.Tensor([self.n_data])) * self.tau # posterior tau mu_numerator = self.mu0 * self.tau0 + \ self.tau * torch.sum(torch.log(self.data)) self.mu_n = mu_numerator / self.tau_n # posterior mu self.log_mu_n = torch.log(self.mu_n) self.log_tau_n = torch.log(self.tau_n) self.verbose = True
def log_beta(t): """ Computes log Beta function. :param t: :type t: torch.autograd.Variable of dimension 1 or 2 :rtype: torch.autograd.Variable of float (if t.dim() == 1) or torch.Tensor (if t.dim() == 2) """ assert t.dim() in (1, 2) if t.dim() == 1: numer = torch.sum(log_gamma(t)) denom = log_gamma(torch.sum(t)) else: numer = torch.sum(log_gamma(t), 1) denom = log_gamma(torch.sum(t, 1)) return numer - denom
def softmax(x, dim=-1): """ TODO: change to use the default pyTorch implementation when available Source: https://discuss.pytorch.org/t/why-softmax-function-cant-specify-the-dimension-to-operate/2637 :param x: tensor :param dim: Dimension to apply the softmax function to. The elements of the tensor in this dimension must sum to 1. :return: tensor having the same dimension as `x` rescaled along dim """ input_size = x.size() trans_input = x.transpose(dim, len(input_size) - 1) trans_size = trans_input.size() input_2d = trans_input.contiguous().view(-1, trans_size[-1]) try: soft_max_2d = F.softmax(input_2d, 1) except TypeError: # Support older pytorch 0.2 release. soft_max_2d = F.softmax(input_2d) soft_max_nd = soft_max_2d.view(*trans_size) return soft_max_nd.transpose(dim, len(input_size) - 1)
def __call__(self, x, index=None): output = self.pretrained_model(x) if index is None: index = np.argmax(output.data.cpu().numpy()) one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32) one_hot[0][index] = 1 if self.cuda: one_hot = Variable(torch.from_numpy(one_hot).cuda(), requires_grad=True) else: one_hot = Variable(torch.from_numpy(one_hot), requires_grad=True) one_hot = torch.sum(one_hot * output) one_hot.backward(retain_variables=True) grad = x.grad.data.cpu().numpy() grad = grad[0, :, :, :] return grad
def build_loss(self, rpn_cls_score_reshape, rpn_bbox_pred, rpn_data): # classification loss rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(-1, 2) rpn_label = rpn_data[0].view(-1) rpn_keep = Variable(rpn_label.data.ne(-1).nonzero().squeeze()).cuda() rpn_cls_score = torch.index_select(rpn_cls_score, 0, rpn_keep) rpn_label = torch.index_select(rpn_label, 0, rpn_keep) fg_cnt = torch.sum(rpn_label.data.ne(0)) rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label) # box loss rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:] rpn_bbox_targets = torch.mul(rpn_bbox_targets, rpn_bbox_inside_weights) rpn_bbox_pred = torch.mul(rpn_bbox_pred, rpn_bbox_inside_weights) rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) / (fg_cnt + 1e-4) return rpn_cross_entropy, rpn_loss_box
def preProc2(x): # Access the global variables global P, expP, negExpP P = P.type_as(x) expP = expP.type_as(x) negExpP = negExpP.type_as(x) # Create a variable filled with -1. Second part of the condition z = Variable(torch.zeros(x.size())).type_as(x) absX = torch.abs(x) cond1 = torch.gt(absX, negExpP) cond2 = torch.le(absX, negExpP) if (torch.sum(cond1) > 0).data.all(): x1 = torch.sign(x[cond1]) z[cond1] = x1 if (torch.sum(cond2) > 0).data.all(): x2 = x[cond2]*expP z[cond2] = x2 return z
def _forward_alg(self, feats): # calculate in log domain # feats is len(sentence) * tagset_size # initialize alpha with a Tensor with values all equal to -10000. init_alphas = torch.Tensor(1, self.tagset_size).fill_(-10000.) init_alphas[0][self.tag_to_ix[START_TAG]] = 0. forward_var = autograd.Variable(init_alphas) if self.use_gpu: forward_var = forward_var.cuda() for feat in feats: emit_score = feat.view(-1, 1) tag_var = forward_var + self.transitions + emit_score max_tag_var, _ = torch.max(tag_var, dim=1) tag_var = tag_var - max_tag_var.view(-1, 1) forward_var = max_tag_var + torch.log(torch.sum(torch.exp(tag_var), dim=1)).view(1, -1) # ).view(1, -1) terminal_var = (forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]).view(1, -1) alpha = log_sum_exp(terminal_var) # Z(x) return alpha
def forward(self, pos_u, pos_v, neg_u, neg_v): losses = [] emb_u = [] for i in range(len(pos_u)): emb_ui = self.u_embeddings(Variable(torch.LongTensor(pos_u[i]))) emb_u.append(np.sum(emb_ui.data.numpy(), axis=0).tolist()) emb_u = Variable(torch.FloatTensor(emb_u)) emb_v = self.v_embeddings(Variable(torch.LongTensor(pos_v))) score = torch.mul(emb_u, emb_v) score = torch.sum(score, dim=1) score = F.logsigmoid(score) losses.append(sum(score)) neg_emb_u = [] for i in range(len(neg_u)): neg_emb_ui = self.u_embeddings(Variable(torch.LongTensor(neg_u[i]))) neg_emb_u.append(np.sum(neg_emb_ui.data.numpy(), axis=0).tolist()) neg_emb_u = Variable(torch.FloatTensor(neg_emb_u)) neg_emb_v = self.v_embeddings(Variable(torch.LongTensor(neg_v))) neg_score = torch.mul(neg_emb_u, neg_emb_v) neg_score = torch.sum(neg_score, dim=1) neg_score = F.logsigmoid(-1 * neg_score) losses.append(sum(neg_score)) return -1 * sum(losses)
def _forward(self, batch): _, questions, passages, answers, _ = batch batch_num = questions.tensor.size(0) questions.variable() passages.variable() begin_, end_ = self.model(questions, passages) # batch x seq assert begin_.size(0) == batch_num answers = Variable(answers) if torch.cuda.is_available(): answers = answers.cuda() begin, end = answers[:, 0], answers[:, 1] loss = self.loss_fn(begin_, begin) + self.loss_fn(end_, end) _, pred_begin = torch.max(begin_, 1) _, pred_end = torch.max(end_, 1) exact_correct_num = torch.sum( (pred_begin == begin) * (pred_end == end)) em = exact_correct_num.data[0] / batch_num return loss, em
def build_loss_objectiveness(self, region_objectiveness, targets): loss_objectiveness = F.cross_entropy(region_objectiveness, targets) maxv, predict = region_objectiveness.data.max(1) labels = targets.squeeze() fg_cnt = torch.sum(labels.data.ne(0)) bg_cnt = labels.data.numel() - fg_cnt if fg_cnt > 0: self.tp_reg = torch.sum(predict[:fg_cnt].eq(labels.data[:fg_cnt])) else: self.tp_reg = 0. if bg_cnt > 0: self.tf_reg = torch.sum(predict[fg_cnt:].eq(labels.data[fg_cnt:])) else: self.tp_reg = 0. self.fg_cnt_reg = fg_cnt self.bg_cnt_reg = bg_cnt return loss_objectiveness
def _loss(self, output, target, dist, scale_const): # compute the probability of the label class versus the maximum other real = (target * output).sum(1) other = ((1. - target) * output - target * 10000.).max(1)[0] if self.targeted: # if targeted, optimize for making the other class most likely loss1 = torch.clamp(other - real + self.confidence, min=0.) # equiv to max(..., 0.) else: # if non-targeted, optimize for making this class least likely. loss1 = torch.clamp(real - other + self.confidence, min=0.) # equiv to max(..., 0.) loss1 = torch.sum(scale_const * loss1) loss2 = dist.sum() loss = loss1 + loss2 return loss
def forward(self, input): batch_size = input.size(0) num_channels = input.size(1) h = input.size(2) w = input.size(3) n = h * w # number of regions kmax = self.get_positive_k(self.kmax, n) kmin = self.get_positive_k(self.kmin, n) sorted, indices = input.new(), input.new().long() torch.sort(input.view(batch_size, num_channels, n), dim=2, descending=True, out=(sorted, indices)) self.indices_max = indices.narrow(2, 0, kmax) output = sorted.narrow(2, 0, kmax).sum(2).div_(kmax) if kmin > 0 and self.alpha is not 0: self.indices_min = indices.narrow(2, n - kmin, kmin) output.add_(sorted.narrow(2, n - kmin, kmin).sum(2).mul_(self.alpha / kmin)).div_(2) self.save_for_backward(input) return output.view(batch_size, num_channels)
def KLDGaussian(Q, N, eps=1e-8): """KL Divergence between two Gaussians Assuming Q ~ N(mu0, A\sigma_0A') where A = I + vr^{T} and N ~ N(mu1, \sigma_1) """ sum = lambda x: torch.sum(x, dim=1) k = float(Q.mu.size()[1]) # dimension of distribution mu0, v, r, mu1 = Q.mu, Q.v, Q.r, N.mu s02, s12 = (Q.sigma).pow(2) + eps, (N.sigma).pow(2) + eps a = sum(s02 * (1. + 2. * v * r) / s12) + sum(v.pow(2) / s12) * sum(r.pow(2) * s02) # trace term b = sum((mu1 - mu0).pow(2) / s12) # difference-of-means term c = 2. * (sum(N.logsigma - Q.logsigma) - torch.log(1. + sum(v * r) + eps)) # ratio-of-determinants term. # # print('trace: %s' % a) # print('mu_diff: %s' % b) # print('k: %s' % k) # print('det: %s' % c) return 0.5 * (a + b - k + c)
def compute_loss(x_dec, x_next_pred_dec, x, x_next, Qz, Qz_next_pred, Qz_next): # Reconstruction losses if False: x_reconst_loss = (x_dec - x_next).pow(2).sum(dim=1) x_next_reconst_loss = (x_next_pred_dec - x_next).pow(2).sum(dim=1) else: x_reconst_loss = -binary_crossentropy(x, x_dec).sum(dim=1) x_next_reconst_loss = -binary_crossentropy(x_next, x_next_pred_dec).sum(dim=1) logvar = Qz.logsigma.mul(2) KLD_element = Qz.mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar) KLD = torch.sum(KLD_element, dim=1).mul(-0.5) # ELBO bound_loss = x_reconst_loss.add(x_next_reconst_loss).add(KLD) kl = KLDGaussian(Qz_next_pred, Qz_next) return bound_loss.mean(), kl.mean()
def level_curves(fname, npoints = 200, smoothing = 10, level = 0.5) : "Loads regularly sampled curves from a .PNG image." # Find the contour lines img = misc.imread(fname, flatten = True) # Grayscale img = (img.T[:, ::-1]) / 255. img = gaussian_filter(img, smoothing, mode='nearest') lines = find_contours(img, level) # Compute the sampling ratio for every contour line lengths = np.array( [arclength(line) for line in lines] ) points_per_line = np.ceil( npoints * lengths / np.sum(lengths) ) # Interpolate accordingly points = [] ; connec = [] ; index_offset = 0 for ppl, line in zip(points_per_line, lines) : (p, c) = resample(line, ppl) points.append(p) connec.append(c + index_offset) index_offset += len(p) size = np.maximum(img.shape[0], img.shape[1]) points = np.vstack(points) / size connec = np.vstack(connec) return Curve(points, connec) # Pyplot Output =================================================================================
def _kernel_matching(q1_x, q1_mu, xt_x, xt_mu, radius) : """ Given two measures q1 and xt represented by locations/weights arrays, outputs a kernel-fidelity term and an empty 'info' array. """ K_qq, K_qx, K_xx = _cross_kernels(q1_x, xt_x, radius) cost = .5 * ( torch.sum(K_qq * torch.ger(q1_mu,q1_mu)) \ + torch.sum(K_xx * torch.ger(xt_mu,xt_mu)) \ -2*torch.sum(K_qx * torch.ger(q1_mu,xt_mu)) ) # Info = the 2D graph of the blurred distance function # Increase res if you want to get nice smooth pictures... res = 10 ; ticks = np.linspace( 0, 1, res + 1)[:-1] + 1/(2*res) X,Y = np.meshgrid( ticks, ticks ) points = Variable(torch.from_numpy(np.vstack( (X.ravel(), Y.ravel()) ).T).type(dtype), requires_grad=False) info = _k( points, q1_x , radius ) @ q1_mu \ - _k( points, xt_x , radius ) @ xt_mu return [cost , info.view( (res,res) ) ]
def forward(self, input, target): buffer = input.new() buffer.resize_as_(input).copy_(input) buffer[torch.eq(target, -1.)] = 0 output = buffer.sum() buffer.fill_(self.margin).add_(-1, input) buffer.clamp_(min=0) buffer[torch.eq(target, 1.)] = 0 output += buffer.sum() if self.size_average: output = output / input.nelement() self.save_for_backward(input, target) return input.new((output,))
def forward(self, input1, input2, weight, bias=None): self.save_for_backward(input1, input2, weight, bias) output = input1.new(input1.size(0), weight.size(0)) buff = input1.new() # compute output scores: for k, w in enumerate(weight): torch.mm(input1, w, out=buff) buff.mul_(input2) torch.sum(buff, 1, out=output.narrow(1, k, 1)) if bias is not None: output.add_(bias.expand_as(output)) return output
def _test_btrisolve(self, cast): a = torch.FloatTensor((((1.3722, -0.9020), (1.8849, 1.9169)), ((0.7187, -1.1695), (-0.0139, 1.3572)), ((-1.6181, 0.7148), (1.3728, 0.1319)))) b = torch.FloatTensor(((4.02, 6.19), (-1.56, 4.00), (9.81, -4.09))) a, b = cast(a), cast(b) info = cast(torch.IntTensor()) LU_data, pivots = a.btrifact(info=info) self.assertEqual(info.abs().sum(), 0) x = torch.btrisolve(b, LU_data, pivots) b_ = torch.bmm(a, x.unsqueeze(2)).squeeze() self.assertEqual(b_, b)
def test_bernoulli(self): t = torch.ByteTensor(10, 10) def isBinary(t): return torch.ne(t, 0).mul_(torch.ne(t, 1)).sum() == 0 p = 0.5 t.bernoulli_(p) self.assertTrue(isBinary(t)) p = torch.rand(SIZE) t.bernoulli_(p) self.assertTrue(isBinary(t)) q = torch.rand(5, 5) self.assertTrue(isBinary(q.bernoulli()))
def cost_matrix(x, y, p=2): "Returns the matrix of $|x_i-y_j|^p$." x_col = x.unsqueeze(1) y_lin = y.unsqueeze(0) c = torch.sum((torch.abs(x_col - y_lin)) ** p, 2) return c
def gauss_log_prob(means, logstds, x): var = th.exp(2 * logstds) top = (-(x - means)**2) bottom = (2 * var) - 0.5 * LOG2PI - logstds gp = top / bottom return th.sum(gp, dim=1)
def dot_not_flat(A, B): """Equivalent of flattening matrices A, B and doing a vector product.""" return sum([th.sum(a * b) for a, b in zip(A, B)])
def Frobenius(mat): size = mat.size() if len(size) == 3: # batched matrix ret = (torch.sum(torch.sum((mat ** 2), 1), 2).squeeze() + 1e-10) ** 0.5 return torch.sum(ret) / size[0] else: raise Exception('matrix for computing Frobenius norm should be with 3 dims')
def grad_variance(self): global_state = self._global_state beta = self._beta self._grad_var = np.array(0.0, dtype=np.float32) for group_id, group in enumerate(self._optimizer.param_groups): for p_id, p in enumerate(group['params'] ): if p.grad is None: continue grad = p.grad.data state = self._optimizer.state[p] if self._iter == 0: state["grad_avg"] = grad.new().resize_as_(grad).zero_() state["grad_avg_squared"] = 0.0 state["grad_avg"].mul_(beta).add_(1 - beta, grad) self._grad_var += torch.sum(state["grad_avg"] * state["grad_avg"] ) if self._zero_debias: debias_factor = self.zero_debias_factor() else: debias_factor = 1.0 self._grad_var /= -(debias_factor**2) self._grad_var += global_state['grad_norm_squared_avg'] / debias_factor # in case of negative variance: the two term are using different debias factors self._grad_var = max(self._grad_var, eps) if self._sparsity_debias: self._grad_var *= self._sparsity_avg return
def attention_mul(rnn_outputs, att_weights): attn_vectors = None for i in range(rnn_outputs.size(0)): h_i = rnn_outputs[i] a_i = att_weights[i].unsqueeze(1).expand_as(h_i) h_i = a_i * h_i h_i = h_i.unsqueeze(0) if(attn_vectors is None): attn_vectors = h_i else: attn_vectors = torch.cat((attn_vectors,h_i),0) return torch.sum(attn_vectors, 0)
def test_accuracy_mini_batch(tokens, features, labels, word_attn, sent_attn): y_pred = get_predictions(tokens, features, word_attn, sent_attn) y_pred = torch.gt(y_pred, 0.5) correct = np.ndarray.flatten(y_pred.data.cpu().numpy()) labels = torch.gt(labels, 0.5) labels = np.ndarray.flatten(labels.data.cpu().numpy()) num_correct = sum(correct == labels) return float(num_correct) / len(correct)
def weights_normal_init(model, dev=0.01): if isinstance(model, list): for m in model: weights_normal_init(m, dev) else: for m in model.modules(): if isinstance(m, nn.Conv2d): #print torch.sum(m.weight) m.weight.data.normal_(0.0, dev) if m.bias is not None: m.bias.data.fill_(0.0) elif isinstance(m, nn.Linear): m.weight.data.normal_(0.0, dev)
def pretrain(self, x, pt_epochs, verbose=True): n = x.data.size()[0] num_batches = n / self.batch_size t = x # Pre-train 1 autoencoder at a time for i, ae_re in enumerate(self.autoencoders_ref): # Get the current autoencoder ae = getattr(self.sequential, ae_re) # Getting encoded output from the previous autoencoder if i > 0: # Set the requires_grad to False so that backprop doesn't # travel all the way back to the previous autoencoder temp = Variable(torch.FloatTensor(n, ae.d_in), requires_grad=False) for k in range(num_batches): start, end = k * self.batch_size, (k + 1) * self.batch_size prev_ae = getattr(self.sequential, self.autoencoders_ref[i - 1]) temp.data[start:end] = prev_ae.encode(t[start:end], add_noise=False).data t = temp optimizer = SGD(ae.parameters(), lr=self.pre_lr) # Pre-training print("Pre-training Autoencoder:", i) for ep in range(pt_epochs): agg_cost = 0. for k in range(num_batches): start, end = k * self.batch_size, (k + 1) * self.batch_size bt = t[start:end] optimizer.zero_grad() z = ae.encode(bt, add_noise=True) z = ae.decode(z) loss = -torch.sum(bt * torch.log(z) + (1.0 - bt) * torch.log(1.0 - z), 1) cost = torch.mean(loss) cost.backward() optimizer.step() agg_cost += cost agg_cost /= num_batches if verbose: print("Pre-training Autoencoder:", i, "Epoch:", ep, "Cost:", agg_cost.data[0])
def forward(self, input1, input2, y): _output = input1.clone() _output.add_(-1, input2) _output.mul_(-1).mul_(y) _output.add_(self.margin) _output.cmax_(0) output = _output.sum() if self.size_average: output = output / y.size(0) self.save_for_backward(input1, input2, y) return input1.new((output,))
def updateOutput(self, input): assert input.dim() == 2 input_size = input.size() self._output = self._output or input.new() self.norm = self.norm or input.new() self.buffer = self.buffer or input.new() self._output.resize_as_(input) # specialization for the infinity norm if self.p == float('inf'): if not self._indices: self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \ else torch.LongTensor() torch.abs(self.buffer, input) torch.max(self.norm, self._indices, self.buffer, 1) self.norm.add_(self.eps) else: self.normp = self.normp or input.new() if self.p % 2 != 0: torch.abs(self.buffer, input).pow_(self.p) else: torch.pow(self.buffer, input, self.p) torch.sum(self.normp, self.buffer, 1).add_(self.eps) torch.pow(self.norm, self.normp, 1./self.p) torch.div(self._output, input, self.norm.view(-1, 1).expand_as(input)) self.output = self._output.view(input_size) return self.output
def updateGradInput(self, input, gradOutput): self.gradInput.resize_as_(input).zero_() size = list(input.size()) size.insert(self.dim, 1) gradInput = self.gradInput.view(*size) torch.sum(gradInput, gradOutput, self.dim) return self.gradInput
def accGradParameters(self, input, gradOutput, scale=1): if self._input is None: self._input = input.new() self._gradWeight = input.new() self._sum = input.new() batchSize = input.size(0) contiguousView(self._input, input, batchSize, -1) contiguousView(self._gradOutput, gradOutput, batchSize, -1) self._gradWeight = self.gradWeight.view(1, -1) torch.mul(self._repeat, self._input, self._gradOutput) torch.sum(self._sum, self._repeat, 0) self._gradWeight.add_(scale, self._sum)
def updateOutput(self, input, target): # - log(input) * target - log(1 - input) * (1 - target) if input.nelement() != target.nelement(): raise RuntimeError("input and target size mismatch") self.buffer = self.buffer or input.new() buffer = self.buffer weights = self.weights buffer.resize_as_(input) if weights is not None and target.dim() != 1: weights = self.weights.view(1, target.size(1)).expand_as(target) # log(input) * target torch.add(buffer, input, self.eps).log_() if weights is not None: buffer.mul_(weights) output = torch.dot(target, buffer) # log(1 - input) * (1 - target) torch.mul(buffer, input, -1).add_(1+self.eps).log_() if weights is not None: buffer.mul_(weights) output = output + torch.sum(buffer) output = output - torch.dot(target, buffer) if self.sizeAverage: output = output / input.nelement() self.output = - output return self.output