我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.log()。
def train_ae(self, train_X, optimizer, epochs, verbose=True): N = train_X.data.size()[0] num_batches = N / self.batch_size for e in range(epochs): agg_cost = 0. for k in range(num_batches): start, end = k * self.batch_size, (k + 1) * self.batch_size bX = train_X[start:end] optimizer.zero_grad() Z = self.forward(bX) Z = self.decode(Z) loss = -torch.sum(bX * torch.log(Z) + (1.0 - bX) * torch.log(1.0 - Z), 1) cost = torch.mean(loss) cost.backward() optimizer.step() agg_cost += cost agg_cost /= num_batches if verbose: print("Epoch:", e, "cost:", agg_cost.data[0])
def encode(matched, priors, variances): """Encode the variances from the priorbox layers into the ground truth boxes we have matched (based on jaccard overlap) with the prior boxes. Args: matched: (tensor) Coords of ground truth for each prior in point-form Shape: [num_priors, 4]. priors: (tensor) Prior boxes in center-offset form Shape: [num_priors,4]. variances: (list[float]) Variances of priorboxes Return: encoded boxes (tensor), Shape: [num_priors, 4] """ # dist b/t match center and prior's center g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] # encode variance g_cxcy /= (variances[0] * priors[:, 2:]) # match wh / prior wh g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] g_wh = torch.log(g_wh) / variances[1] # return target for smooth_l1_loss return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] # Adapted from https://github.com/Hakuyume/chainer-ssd
def _gaussian(self, enc_output): def latent_loss(mu, sigma): pow_mu = mu * mu pow_sigma = sigma * sigma return 0.5 * torch.mean(pow_mu + pow_sigma - torch.log(pow_sigma) - 1) mu = self._enc_mu(enc_output) sigma = torch.exp(.5 * self._enc_log_sigma(enc_output)) self.latent_loss = latent_loss(mu, sigma) weight = next(self.parameters()).data std_z = Variable(weight.new(*sigma.size()), requires_grad=False) std_z.data.copy_(torch.from_numpy( np.random.normal(size=sigma.size()))) return mu + sigma * std_z
def setUp(self): # normal-normal; known covariance self.lam0 = Variable(torch.Tensor([0.1, 0.1])) # precision of prior self.mu0 = Variable(torch.Tensor([0.0, 0.5])) # prior mean # known precision of observation noise self.lam = Variable(torch.Tensor([6.0, 4.0])) self.data = [] self.data.append(Variable(torch.Tensor([-0.1, 0.3]))) self.data.append(Variable(torch.Tensor([0.00, 0.4]))) self.data.append(Variable(torch.Tensor([0.20, 0.5]))) self.data.append(Variable(torch.Tensor([0.10, 0.7]))) self.n_data = Variable(torch.Tensor([len(self.data)])) self.sum_data = self.data[0] + \ self.data[1] + self.data[2] + self.data[3] self.analytic_lam_n = self.lam0 + \ self.n_data.expand_as(self.lam) * self.lam self.analytic_log_sig_n = -0.5 * torch.log(self.analytic_lam_n) self.analytic_mu_n = self.sum_data * (self.lam / self.analytic_lam_n) +\ self.mu0 * (self.lam0 / self.analytic_lam_n) self.batch_size = 4
def setUp(self): # poisson-gamma model # gamma prior hyperparameter self.alpha0 = Variable(torch.Tensor([1.0])) # gamma prior hyperparameter self.beta0 = Variable(torch.Tensor([1.0])) self.data = [] self.data.append(Variable(torch.Tensor([1.0]))) self.data.append(Variable(torch.Tensor([2.0]))) self.data.append(Variable(torch.Tensor([3.0]))) self.n_data = len(self.data) sum_data = self.data[0] + self.data[1] + self.data[2] self.alpha_n = self.alpha0 + sum_data # posterior alpha self.beta_n = self.beta0 + \ Variable(torch.Tensor([self.n_data])) # posterior beta self.log_alpha_n = torch.log(self.alpha_n) self.log_beta_n = torch.log(self.beta_n)
def setUp(self): # bernoulli-beta model # beta prior hyperparameter self.alpha0 = Variable(torch.Tensor([1.0])) self.beta0 = Variable(torch.Tensor([1.0])) # beta prior hyperparameter self.data = [] self.data.append(Variable(torch.Tensor([0.0]))) self.data.append(Variable(torch.Tensor([1.0]))) self.data.append(Variable(torch.Tensor([1.0]))) self.data.append(Variable(torch.Tensor([1.0]))) self.n_data = len(self.data) self.batch_size = None data_sum = self.data[0] + self.data[1] + self.data[2] + self.data[3] self.alpha_n = self.alpha0 + data_sum # posterior alpha self.beta_n = self.beta0 - data_sum + \ Variable(torch.Tensor([self.n_data])) # posterior beta self.log_alpha_n = torch.log(self.alpha_n) self.log_beta_n = torch.log(self.beta_n)
def setUp(self): # lognormal-normal model # putting some of the parameters inside of a torch module to # make sure that that functionality is ok (XXX: do this somewhere else in the future) self.mu0 = Variable(torch.Tensor([1.0])) # normal prior hyperparameter # normal prior hyperparameter self.tau0 = Variable(torch.Tensor([1.0])) # known precision for observation likelihood self.tau = Variable(torch.Tensor([2.5])) self.n_data = 2 self.data = Variable(torch.Tensor([[1.5], [2.2]])) # two observations self.tau_n = self.tau0 + \ Variable(torch.Tensor([self.n_data])) * self.tau # posterior tau mu_numerator = self.mu0 * self.tau0 + \ self.tau * torch.sum(torch.log(self.data)) self.mu_n = mu_numerator / self.tau_n # posterior mu self.log_mu_n = torch.log(self.mu_n) self.log_tau_n = torch.log(self.tau_n)
def setUp(self): # normal-normal; known covariance self.lam0 = Variable(torch.Tensor([0.1, 0.1])) # precision of prior self.mu0 = Variable(torch.Tensor([0.0, 0.5])) # prior mean # known precision of observation noise self.lam = Variable(torch.Tensor([6.0, 4.0])) self.data = [] self.data.append(Variable(torch.Tensor([-0.1, 0.3]))) self.data.append(Variable(torch.Tensor([0.00, 0.4]))) self.data.append(Variable(torch.Tensor([0.20, 0.5]))) self.data.append(Variable(torch.Tensor([0.10, 0.7]))) self.n_data = Variable(torch.Tensor([len(self.data)])) self.sum_data = self.data[0] + \ self.data[1] + self.data[2] + self.data[3] self.analytic_lam_n = self.lam0 + \ self.n_data.expand_as(self.lam) * self.lam self.analytic_log_sig_n = -0.5 * torch.log(self.analytic_lam_n) self.analytic_mu_n = self.sum_data * (self.lam / self.analytic_lam_n) +\ self.mu0 * (self.lam0 / self.analytic_lam_n) self.verbose = True
def setUp(self): # normal-normal-normal; known covariance self.lam0 = Variable(torch.Tensor([0.1, 0.1])) # precision of prior self.mu0 = Variable(torch.Tensor([0.0, 0.5])) # prior mean # known precision of observation noise self.lam = Variable(torch.Tensor([6.0, 4.0])) self.data = [] self.data.append(Variable(torch.Tensor([-0.1, 0.3]))) self.data.append(Variable(torch.Tensor([0.00, 0.4]))) self.data.append(Variable(torch.Tensor([0.20, 0.5]))) self.data.append(Variable(torch.Tensor([0.10, 0.7]))) self.n_data = Variable(torch.Tensor([len(self.data)])) self.sum_data = self.data[0] + \ self.data[1] + self.data[2] + self.data[3] self.analytic_lam_n = self.lam0 + \ self.n_data.expand_as(self.lam) * self.lam self.analytic_log_sig_n = -0.5 * torch.log(self.analytic_lam_n) self.analytic_mu_n = self.sum_data * (self.lam / self.analytic_lam_n) +\ self.mu0 * (self.lam0 / self.analytic_lam_n) self.verbose = True
def setUp(self): # bernoulli-beta model # beta prior hyperparameter self.alpha0 = Variable(torch.Tensor([1.0])) self.beta0 = Variable(torch.Tensor([1.0])) # beta prior hyperparameter self.data = [] self.data.append(Variable(torch.Tensor([0.0]))) self.data.append(Variable(torch.Tensor([1.0]))) self.data.append(Variable(torch.Tensor([1.0]))) self.data.append(Variable(torch.Tensor([1.0]))) self.n_data = len(self.data) data_sum = self.data[0] + self.data[1] + self.data[2] + self.data[3] self.alpha_n = self.alpha0 + data_sum # posterior alpha self.beta_n = self.beta0 - data_sum + \ Variable(torch.Tensor([self.n_data])) # posterior beta self.log_alpha_n = torch.log(self.alpha_n) self.log_beta_n = torch.log(self.beta_n) self.verbose = True
def setUp(self): # lognormal-normal model # putting some of the parameters inside of a torch module to # make sure that that functionality is ok (XXX: do this somewhere else in the future) self.mu0 = Variable(torch.Tensor([1.0])) # normal prior hyperparameter # normal prior hyperparameter self.tau0 = Variable(torch.Tensor([1.0])) # known precision for observation likelihood self.tau = Variable(torch.Tensor([2.5])) self.n_data = 2 self.data = Variable(torch.Tensor([[1.5], [2.2]])) # two observations self.tau_n = self.tau0 + \ Variable(torch.Tensor([self.n_data])) * self.tau # posterior tau mu_numerator = self.mu0 * self.tau0 + \ self.tau * torch.sum(torch.log(self.data)) self.mu_n = mu_numerator / self.tau_n # posterior mu self.log_mu_n = torch.log(self.mu_n) self.log_tau_n = torch.log(self.tau_n) self.verbose = True
def setUp(self): # normal-normal; known covariance self.lam0 = Variable(torch.Tensor([0.1, 0.1])) # precision of prior self.mu0 = Variable(torch.Tensor([0.0, 0.5])) # prior mean # known precision of observation noise self.lam = Variable(torch.Tensor([6.0, 4.0])) self.n_outer = 3 self.n_inner = 3 self.n_data = Variable(torch.Tensor([self.n_outer * self.n_inner])) self.data = [] self.sum_data = ng_zeros(2) for _out in range(self.n_outer): data_in = [] for _in in range(self.n_inner): data_in.append(Variable(torch.Tensor([-0.1, 0.3]) + torch.randn(2) / torch.sqrt(self.lam.data))) self.sum_data += data_in[-1] self.data.append(data_in) self.analytic_lam_n = self.lam0 + self.n_data.expand_as(self.lam) * self.lam self.analytic_log_sig_n = -0.5 * torch.log(self.analytic_lam_n) self.analytic_mu_n = self.sum_data * (self.lam / self.analytic_lam_n) +\ self.mu0 * (self.lam0 / self.analytic_lam_n) self.verbose = True # this tests rao-blackwellization in elbo for nested list map_datas
def batch_log_pdf(self, x): """ Diagonal Normal log-likelihood Ref: :py:meth:`pyro.distributions.distribution.Distribution.batch_log_pdf` """ # expand to patch size of input mu = self.mu.expand(self.shape(x)) sigma = self.sigma.expand(self.shape(x)) log_pxs = -1 * (torch.log(sigma) + 0.5 * np.log(2.0 * np.pi) + 0.5 * torch.pow((x - mu) / sigma, 2)) # XXX this allows for the user to mask out certain parts of the score, for example # when the data is a ragged tensor. also useful for KL annealing. this entire logic # will likely be done in a better/cleaner way in the future if self.log_pdf_mask is not None: log_pxs = log_pxs * self.log_pdf_mask batch_log_pdf = torch.sum(log_pxs, -1) batch_log_pdf_shape = self.batch_shape(x) + (1,) return batch_log_pdf.contiguous().view(batch_log_pdf_shape)
def log_pdf(self, y, *args, **kwargs): """ :param y: a value sampled from the transformed distribution :type y: torch.autograd.Variable :returns: the score (the log pdf) of y :rtype: torch.autograd.Variable Scores the sample by inverting the bijector(s) and computing the score using the score of the base distribution and the log det jacobian """ inverses = [] next_to_invert = y for bijector in reversed(self.bijectors): inverse = bijector.inverse(next_to_invert) inverses.append(inverse) next_to_invert = inverse log_pdf_base = self.base_dist.log_pdf(inverses[-1], *args, **kwargs) log_det_jacobian = self.bijectors[-1].log_det_jacobian(y, *args, **kwargs) for bijector, inverse in zip(list(reversed(self.bijectors))[1:], inverses[:-1]): log_det_jacobian += bijector.log_det_jacobian(inverse, *args, **kwargs) return log_pdf_base - log_det_jacobian
def log_gamma(xx): if isinstance(xx, torch.Tensor): xx = Variable(xx) ttype = xx.data.type() gamma_coeff = [ 76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5, ] magic1 = 1.000000000190015 magic2 = 2.5066282746310005 x = xx - 1.0 t = x + 5.5 t = t - (x + 0.5) * torch.log(t) ser = Variable(torch.ones(x.size()).type(ttype)) * magic1 for c in gamma_coeff: x = x + 1.0 ser = ser + torch.pow(x / c, -1) return torch.log(ser * magic2) - t
def log_beta(t): """ Computes log Beta function. :param t: :type t: torch.autograd.Variable of dimension 1 or 2 :rtype: torch.autograd.Variable of float (if t.dim() == 1) or torch.Tensor (if t.dim() == 2) """ assert t.dim() in (1, 2) if t.dim() == 1: numer = torch.sum(log_gamma(t)) denom = log_gamma(torch.sum(t)) else: numer = torch.sum(log_gamma(t), 1) denom = log_gamma(torch.sum(t, 1)) return numer - denom
def batch_log_pdf(self, x): """ Evaluates log probability density over one or a batch of samples. Each of alpha and x can be either a single value or a batch of values batched along dimension 0. If they are both batches, their batch sizes must agree. In any case, the rightmost size must agree. :param torch.autograd.Variable x: A value (if x.dim() == 1) or or batch of values (if x.dim() == 2). :param alpha: A vector of concentration parameters. :type alpha: torch.autograd.Variable or None. :return: log probability densities of each element in the batch. :rtype: torch.autograd.Variable of torch.Tensor of dimension 1. """ alpha = self.alpha.expand(self.shape(x)) x_sum = torch.sum(torch.mul(alpha - 1, torch.log(x)), -1) beta = log_beta(alpha) batch_log_pdf_shape = self.batch_shape(x) + (1,) return (x_sum - beta).contiguous().view(batch_log_pdf_shape)
def test_forward_backward(self): import torch import torch.nn.functional as F from torch.autograd import Variable from reid.loss import OIMLoss criterion = OIMLoss(3, 3, scalar=1.0, size_average=False) criterion.lut = torch.eye(3) x = Variable(torch.randn(3, 3), requires_grad=True) y = Variable(torch.range(0, 2).long()) loss = criterion(x, y) loss.backward() probs = F.softmax(x) grads = probs.data - torch.eye(3) abs_diff = torch.abs(grads - x.grad.data) self.assertEquals(torch.log(probs).diag().sum(), -loss) self.assertTrue(torch.max(abs_diff) < 1e-6)
def _forward_alg(self, feats): # calculate in log domain # feats is len(sentence) * tagset_size # initialize alpha with a Tensor with values all equal to -10000. init_alphas = torch.Tensor(1, self.tagset_size).fill_(-10000.) init_alphas[0][self.tag_to_ix[START_TAG]] = 0. forward_var = autograd.Variable(init_alphas) if self.use_gpu: forward_var = forward_var.cuda() for feat in feats: emit_score = feat.view(-1, 1) tag_var = forward_var + self.transitions + emit_score max_tag_var, _ = torch.max(tag_var, dim=1) tag_var = tag_var - max_tag_var.view(-1, 1) forward_var = max_tag_var + torch.log(torch.sum(torch.exp(tag_var), dim=1)).view(1, -1) # ).view(1, -1) terminal_var = (forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]).view(1, -1) alpha = log_sum_exp(terminal_var) # Z(x) return alpha
def __init__(self, traces, sort=True): self.batch = traces self.length = len(traces) self.traces_lengths = [] self.traces_max_length = 0 self.observes_max_length = 0 sb = {} for trace in traces: if trace.length is None: util.logger.log('Batch: Received a trace of length zero.') if trace.length > self.traces_max_length: self.traces_max_length = trace.length if trace.observes_tensor.size(0) > self.observes_max_length: self.observes_max_length = trace.observes_tensor.size(0) h = hash(trace.addresses_suffixed()) if not h in sb: sb[h] = [] sb[h].append(trace) self.sub_batches = [] for _, t in sb.items(): self.sub_batches.append(t) if sort: # Sort the batch in decreasing trace length. self.batch = sorted(self.batch, reverse=True, key=lambda t: t.length) self.traces_lengths = [t.length for t in self.batch]
def loss(self, x, samples): _, proposal_output = self.forward(x, samples) batch_size = len(samples) means = proposal_output[:, 0] stds = proposal_output[:, 1] two_std_squares = 2 * stds * stds + util.epsilon two_pi_std_squares = math.pi * two_std_squares half_log_two_pi_std_squares = 0.5 * torch.log(two_pi_std_squares + util.epsilon) l = 0 for b in range(batch_size): value = samples[b].value[0] mean = means[b] two_std_square = two_std_squares[b] half_log_two_pi_std_square = half_log_two_pi_std_squares[b] l += half_log_two_pi_std_square + ((value - mean)**2) / two_std_square return l
def loss(self, x, samples): _, proposal_output = self.forward(x, samples) prior_mins = Variable(util.Tensor([s.distribution.prior_min for s in samples]), requires_grad=False) prior_maxs = Variable(util.Tensor([s.distribution.prior_max for s in samples]), requires_grad=False) batch_size = len(samples) modes = (proposal_output[:, 0] - prior_mins) / (prior_maxs - prior_mins) certainties = proposal_output[:, 1] + 2 alphas = modes * (certainties - 2) + 1 betas = (1 - modes) * (certainties - 2) + 1 beta_funs = util.beta(alphas, betas) l = 0 for b in range(batch_size): value = samples[b].value[0] prior_min = samples[b].distribution.prior_min prior_max = samples[b].distribution.prior_max normalized_value = (value - prior_min) / (prior_max - prior_min) alpha = alphas[b] beta = betas[b] beta_fun = beta_funs[b] l -= (alpha - 1) * np.log(normalized_value + util.epsilon) + (beta - 1) * np.log(1 - normalized_value + util.epsilon) - torch.log(beta_fun + util.epsilon) - np.log(prior_max - prior_min + util.epsilon) return l
def loss(self, x, samples): _, proposal_output = self.forward(x, samples) batch_size = len(samples) means = proposal_output[:,0:self.mixture_components] stds = proposal_output[:,self.mixture_components:2*self.mixture_components] coeffs = proposal_output[:,2*self.mixture_components:3*self.mixture_components] l = 0 for b in range(batch_size): value = samples[b].value[0] prior_min = samples[b].distribution.prior_min prior_max = samples[b].distribution.prior_max ll = 0 for c in range(self.mixture_components): mean = means[b,c] std = stds[b,c] coeff = coeffs[b,c] xi = (value - mean) / std phi_min = 0.5 * (1 + util.erf(((prior_min - mean) / std) * util.one_over_sqrt_two)) phi_max = 0.5 * (1 + util.erf(((prior_max - mean) / std) * util.one_over_sqrt_two)) ll += coeff * util.one_over_sqrt_two_pi * torch.exp(-0.5 * xi * xi) / (std * (phi_max - phi_min)) l -= torch.log(ll + util.epsilon) return l
def loss(self, x, samples): # FoldedNormal logpdf # https://en.wikipedia.org/wiki/Folded_normal_distribution _, proposal_output = self.forward(x, samples) batch_size = len(samples) locations = proposal_output[:, 0] scales = proposal_output[:, 1] two_scales = 2 * scales + util.epsilon half_log_two_pi_scales = 0.5 * torch.log(math.pi * two_scales + util.epsilon) l = 0 for b in range(batch_size): value = samples[b].value[0] if value < 0: l -= 0 else: location = locations[b] two_scale = two_scales[b] half_log_two_pi_scale = half_log_two_pi_scales[b] logpdf_1 = -half_log_two_pi_scale - ((value - location)**2) / two_scale logpdf_2 = -half_log_two_pi_scale - ((value + location)**2) / two_scale l -= util.logsumexp(torch.cat([logpdf_1, logpdf_2])) return l
def loss(self, x, samples): _, proposal_output = self.forward(x, samples) batch_size = len(samples) modes = proposal_output[:, 0] certainties = proposal_output[:, 1] + 2 alphas = modes * (certainties - 2) + 1 betas = (1 - modes) * (certainties - 2) + 1 beta_funs = util.beta(alphas, betas) l = 0 for b in range(batch_size): value = samples[b].value[0] alpha = alphas[b] beta = betas[b] beta_fun = beta_funs[b] l -= (alpha - 1) * np.log(value + util.epsilon) + (beta - 1) * np.log(1 - value + util.epsilon) - torch.log(beta_fun + util.epsilon) return l
def __init__(self, input_example_non_batch, output_dim, reshape=None, dropout=0): super(ObserveEmbeddingCNN2D6C, self).__init__() self.reshape = reshape if self.reshape is not None: input_example_non_batch = input_example_non_batch.view(self.reshape) self.reshape.insert(0, -1) # For correct handling of the batch dimension in self.forward if input_example_non_batch.dim() == 2: self.input_sample = input_example_non_batch.unsqueeze(0).cpu() elif input_example_non_batch.dim() == 3: self.input_sample = input_example_non_batch.cpu() else: util.logger.log('ObserveEmbeddingCNN2D6C: Expecting a 3d input_example_non_batch (num_channels x height x width) or a 2d input_example_non_batch (height x width). Received: {0}'.format(input_example_non_batch.size())) self.input_channels = self.input_sample.size(0) self.output_dim = output_dim self.conv1 = nn.Conv2d(self.input_channels, 64, 3) self.conv2 = nn.Conv2d(64, 64, 3) self.conv3 = nn.Conv2d(64, 128, 3) self.conv4 = nn.Conv2d(128, 128, 3) self.conv5 = nn.Conv2d(128, 128, 3) self.conv6 = nn.Conv2d(128, 128, 3) self.drop = nn.Dropout(dropout)
def __init__(self, input_example_non_batch, output_dim, reshape=None, dropout=0): super(ObserveEmbeddingCNN3D4C, self).__init__() self.reshape = reshape if self.reshape is not None: input_example_non_batch = input_example_non_batch.view(self.reshape) self.reshape.insert(0, -1) # For correct handling of the batch dimension in self.forward if input_example_non_batch.dim() == 3: self.input_sample = input_example_non_batch.unsqueeze(0).cpu() elif input_example_non_batch.dim() == 4: self.input_sample = input_example_non_batch.cpu() else: util.logger.log('ObserveEmbeddingCNN3D4C: Expecting a 4d input_example_non_batch (num_channels x depth x height x width) or a 3d input_example_non_batch (depth x height x width). Received: {0}'.format(input_example_non_batch.size())) self.input_channels = self.input_sample.size(0) self.output_dim = output_dim self.conv1 = nn.Conv3d(self.input_channels, 64, 3) self.conv2 = nn.Conv3d(64, 64, 3) self.conv3 = nn.Conv3d(64, 128, 3) self.conv4 = nn.Conv3d(128, 128, 3) self.drop = nn.Dropout(dropout)
def set_observe_embedding(self, example_observes, obs_emb, obs_emb_dim, obs_reshape=None): self.obs_emb = obs_emb self.obs_emb_dim = obs_emb_dim if obs_emb == 'fc': observe_layer = ObserveEmbeddingFC(Variable(example_observes), obs_emb_dim, dropout=self.dropout) elif obs_emb == 'cnn1d2c': observe_layer = ObserveEmbeddingCNN1D2C(Variable(example_observes), obs_emb_dim, dropout=self.dropout) observe_layer.configure() elif obs_emb == 'cnn2d6c': observe_layer = ObserveEmbeddingCNN2D6C(Variable(example_observes), obs_emb_dim, obs_reshape, dropout=self.dropout) observe_layer.configure() elif obs_emb == 'cnn3d4c': observe_layer = ObserveEmbeddingCNN3D4C(Variable(example_observes), obs_emb_dim, obs_reshape, dropout=self.dropout) observe_layer.configure() elif obs_emb == 'lstm': observe_layer = ObserveEmbeddingLSTM(Variable(example_observes), obs_emb_dim, dropout=self.dropout) else: util.logger.log('set_observe_embedding: Unsupported observation embedding: ' + obs_emb) self.observe_layer = observe_layer
def KLDGaussian(Q, N, eps=1e-8): """KL Divergence between two Gaussians Assuming Q ~ N(mu0, A\sigma_0A') where A = I + vr^{T} and N ~ N(mu1, \sigma_1) """ sum = lambda x: torch.sum(x, dim=1) k = float(Q.mu.size()[1]) # dimension of distribution mu0, v, r, mu1 = Q.mu, Q.v, Q.r, N.mu s02, s12 = (Q.sigma).pow(2) + eps, (N.sigma).pow(2) + eps a = sum(s02 * (1. + 2. * v * r) / s12) + sum(v.pow(2) / s12) * sum(r.pow(2) * s02) # trace term b = sum((mu1 - mu0).pow(2) / s12) # difference-of-means term c = 2. * (sum(N.logsigma - Q.logsigma) - torch.log(1. + sum(v * r) + eps)) # ratio-of-determinants term. # # print('trace: %s' % a) # print('mu_diff: %s' % b) # print('k: %s' % k) # print('det: %s' % c) return 0.5 * (a + b - k + c)
def _boxes2delta(self, box, anchor): """ box: (x_min, y_min, x_max, y_max) anchor: (cx, cy, w, h) """ # change (x_min, y_min, x_max, y_max) to (cx, cy, w, h) box_wh = box.clone() box_wh[:2] = (box[:2] + box[2:]) / 2 box_wh[2:] = box[2:] - box[:2] box_wh[0::2] *= self.W box_wh[1::2] *= self.H # calc (dcx, dcy, dw, dh) box_delta = box.clone().fill_(0) box_delta[:2] = box_wh[:2] - anchor[:2] box_delta[2:] = torch.log(box_wh[2:]/anchor[2:]) return box_delta
def poisson_loss(observed_ratings, predicted_ratings): """ Poisson loss. Parameters ---------- observed_ratings: tensor Tensor containing observed ratings. predicted_ratings: tensor Tensor containing rating predictions. Returns ------- loss, float The mean value of the loss function. """ assert_no_grad(observed_ratings) return (predicted_ratings - observed_ratings * torch.log(predicted_ratings)).mean()
def backward(self, grad_output): z, log_phi_z = self.saved_tensors log_phi_z_grad = z.new().resize_as_(z).zero_() z_is_small = z.lt(-1) z_is_not_small = 1 - z_is_small if z_is_small.sum() > 0: log_phi_z_grad[z_is_small] = torch.abs(self.denominator.div(self.numerator)).mul(math.sqrt(2 / math.pi)) exp = z[z_is_not_small].pow(2) \ .div(-2) \ .sub(log_phi_z[z_is_not_small]) \ .add(math.log(0.5)) log_phi_z_grad[z_is_not_small] = torch.exp(exp).mul(math.sqrt(2 / math.pi)) return log_phi_z_grad.mul(grad_output)
def logsumexp(x, dim=None): """ Args: x: A pytorch tensor (any dimension will do) dim: int or None, over which to perform the summation. `None`, the default, performs over all axes. Returns: The result of the log(sum(exp(...))) operation. """ if dim is None: xmax = x.max() xmax_ = x.max() return xmax_ + numpy.log(torch.exp(x - xmax).sum()) else: xmax, _ = x.max(dim, keepdim=True) xmax_, _ = x.max(dim) return xmax_ + torch.log(torch.exp(x - xmax).sum(dim))
def bbox_transform(ex_rois, gt_rois): ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights targets_dw = torch.log(gt_widths / ex_widths) targets_dh = torch.log(gt_heights / ex_heights) targets = torch.stack( (targets_dx, targets_dy, targets_dw, targets_dh),1) return targets
def forward(self, y, weights, mean, std): """ Presents a maximum a-priori objective for a set of predicted means, mixture components, and standard deviations to model a given ground-truth 'y'. Modeled using negative log likelihood. :param y: Non-linear target. :param weights: Predicted mixture components. :param mean: Predicted mixture means. :param std: Predicted mixture standard deviations. :return: """ normalization = 1.0 / ((2.0 * math.pi) ** 0.5) gaussian_sample = (y.expand_as(mean) - mean) * torch.reciprocal(std) gaussian_sample = normalization * torch.reciprocal(std) * torch.exp(-0.5 * gaussian_sample ** 2) return -torch.mean(torch.log(torch.sum(weights * gaussian_sample, dim=1)))
def experiments_randseeds(opt, start = 0, end = 5): random_seeds = [1, 101, 512, 1001, 10001] original_exp = opt.experiment file_name = '{0}_{1}_{2}_{3}_{4}_experiments.csv'.format(opt.dataset, opt.D, opt.A, opt.H, opt.critic_last_layer) csv_file = os.path.join(opt.experiment, file_name) with open(csv_file, 'a') as out: max_logprob = 0 best_config = '' for i in range(start, end): rand_seed = random_seeds[i] opt.manualSeed = rand_seed try: opt.experiment = os.path.join(original_exp, '{0}_{1}_{2}_{3}_{4}_{5}'.format(opt.dataset, opt.D, opt.A, opt.H, opt.manualSeed, opt.critic_last_layer)) if not os.path.exists(opt.experiment): os.makedirs(opt.experiment) logprob = train(opt=opt, log_file_path=os.path.join(opt.experiment, '{0}_{1}_{2}_{3}_{4}_{5}_experiments.log'.format(opt.dataset, opt.D, opt.A, opt.H, opt.manualSeed, opt.critic_last_layer))) config = '{0},{1}\n'.format(rand_seed, logprob) if max_logprob == 0 or logprob > max_logprob: max_logprob = logprob best_config = config out.write(config) out.flush() print('best %s ' % best_config) except: traceback.print_exc()
def accumulate_gradient(self, batch_sz, states, actions, rewards, next_states, mask): """ Compute the difference between the return distributions of Q(s,a) and TQ(s_,a). """ states = Variable(states) actions = Variable(actions) next_states = Variable(next_states, volatile=True) # Compute probabilities of Q(s,a*) q_probs = self.policy(states) actions = actions.view(batch_sz, 1, 1) action_mask = actions.expand(batch_sz, 1, self.atoms_no) qa_probs = q_probs.gather(1, action_mask).squeeze() # Compute distribution of Q(s_,a) target_qa_probs = self._get_categorical(next_states, rewards, mask) # Compute the cross-entropy of phi(TZ(x_,a)) || Z(x,a) qa_probs.data.clamp_(0.01, 0.99) # Tudor's trick for avoiding nans loss = - torch.sum(target_qa_probs * torch.log(qa_probs)) # Accumulate gradients loss.backward()
def logsumexp(x, axis=None, keepdims=False): def _logsumexp(x, axis=axis, keepdims=keepdims): y = torch.log(torch.sum(torch.exp(x), axis)) return y if keepdims else torch.squeeze(y, axis) def _compute_output_shape(x, axis=axis, keepdims=keepdims): if axis is None: return () shape = list(_get_shape(x)) if keepdims: shape[axis] = 1 else: del shape[axis] return tuple(shape) return get_op(_logsumexp, output_shape=_compute_output_shape, arguments=[axis, keepdims])(x)
def compute_loss(self, input, e, b, clusters, it=0): Loss = Variable(torch.zeros((self.batch_size))).type(dtype) Ls = Variable(torch.zeros((self.batch_size))).type(dtype) for cl in range(clusters // 2): L, m1, m2 = self.compute_diameter(input, e, cl, it=it) mask = ((e / 2).type(dtype_l) == cl).type(dtype) # print('mask', mask[0]) n = mask.sum(1).squeeze() n += (n == 0).type(dtype) # print('mask', mask[0]) log_probs = torch.log((1 - b) * m1 + b * m2 + (1 - mask) + 1e-8) Loss += L * log_probs.sum(1) / n Ls += L Ls = Ls.mean(0) Loss = Loss.mean(0) return Loss, Ls ########################################################################### # Split Phase # ###########################################################################
def logaddexp(x1: T.FloatTensor, x2: T.FloatTensor) -> T.FloatTensor: """ Elementwise logaddexp function: log(exp(x1) + exp(x2)) Args: x1: A tensor. x2: A tensor. Returns: tensor: Elementwise logaddexp. """ # log(exp(x1) + exp(x2)) # = log( exp(x1) (1 + exp(x2 - x1))) = x1 + log(1 + exp(x2 - x1)) # = log( exp(x2) (exp(x1 - x2) + 1)) = x2 + log(1 + exp(x1 - x2)) diff = torch.min(x2 - x1, x1 - x2) return torch.max(x1, x2) + torch.log1p(exp(diff))
def cross_entropy_loss(self, x, y): '''Cross entropy loss w/o averaging across all samples. Args: x: (tensor) sized [N,D]. y: (tensor) sized [N,]. Return: (tensor) cross entroy loss, sized [N,]. ''' # print(x.size()) # [8732, 16] xmax = x.data.max() # print(x.data.size()) # [8732, 16] # print(xmax.size()) # max--float object log_sum_exp = torch.log(torch.sum(torch.exp(x-xmax), 1)) + xmax # print(log_sum_exp.size()) # [8732,] # print(x.gather(1, y.view(-1,1)).size()) # [8732, 1] # print((log_sum_exp.view(-1, 1) - x.gather(1, y.view(-1,1))).size()) return log_sum_exp.view(-1, 1) - x.gather(1, y.view(-1,1))
def bbox_transform(ex_rois, gt_rois): ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights targets_dw = torch.log(gt_widths / ex_widths) targets_dh = torch.log(gt_heights / ex_heights) targets = torch.stack( (targets_dx, targets_dy, targets_dw, targets_dh), 1) return targets
def logp(x, mean, std): out = 0.5 * ((x - mean) / (std))**2 + 0.5 * LOG2PI + th.log(std) return -out
def EntropicConfusion(features): batch_size = features.size(0) return torch.mul(features, torch.log(features)).sum() * (1.0 / batch_size)
def forward(self, output, target): cross_entropy = F.cross_entropy(output, target) cross_entropy_log = torch.log(cross_entropy) focal_loss = -((1 - cross_entropy) ** self.focusing_param) * cross_entropy_log balanced_focal_loss = self.balance_param * focal_loss return balanced_focal_loss
def pretrain(self, x, pt_epochs, verbose=True): n = x.data.size()[0] num_batches = n / self.batch_size t = x # Pre-train 1 autoencoder at a time for i, ae_re in enumerate(self.autoencoders_ref): # Get the current autoencoder ae = getattr(self.sequential, ae_re) # Getting encoded output from the previous autoencoder if i > 0: # Set the requires_grad to False so that backprop doesn't # travel all the way back to the previous autoencoder temp = Variable(torch.FloatTensor(n, ae.d_in), requires_grad=False) for k in range(num_batches): start, end = k * self.batch_size, (k + 1) * self.batch_size prev_ae = getattr(self.sequential, self.autoencoders_ref[i - 1]) temp.data[start:end] = prev_ae.encode(t[start:end], add_noise=False).data t = temp optimizer = SGD(ae.parameters(), lr=self.pre_lr) # Pre-training print("Pre-training Autoencoder:", i) for ep in range(pt_epochs): agg_cost = 0. for k in range(num_batches): start, end = k * self.batch_size, (k + 1) * self.batch_size bt = t[start:end] optimizer.zero_grad() z = ae.encode(bt, add_noise=True) z = ae.decode(z) loss = -torch.sum(bt * torch.log(z) + (1.0 - bt) * torch.log(1.0 - z), 1) cost = torch.mean(loss) cost.backward() optimizer.step() agg_cost += cost agg_cost /= num_batches if verbose: print("Pre-training Autoencoder:", i, "Epoch:", ep, "Cost:", agg_cost.data[0])
def test_log(self): self._testMath(torch.log, lambda x: math.log(x) if x > 0 else float('nan'))