我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.autograd()。
def calc_gradient_penalty(self, netD, real_data, fake_data): alpha = torch.rand(1, 1) alpha = alpha.expand(real_data.size()) alpha = alpha.cuda() interpolates = alpha * real_data + ((1 - alpha) * fake_data) interpolates = interpolates.cuda() interpolates = Variable(interpolates, requires_grad=True) disc_interpolates = netD.forward(interpolates) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).cuda(), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * self.LAMBDA return gradient_penalty
def xavier_uniform(tensor, gain=1): """Fills the input Tensor or Variable with values according to the method described in "Understanding the difficulty of training deep feedforward neural networks" - Glorot, X. & Bengio, Y. (2010), using a uniform distribution. The resulting tensor will have values sampled from :math:`U(-a, a)` where :math:`a = gain \\times \sqrt{2 / (fan\_in + fan\_out)} \\times \sqrt{3}`. Also known as Glorot initialisation. Args: tensor: an n-dimensional torch.Tensor or autograd.Variable gain: an optional scaling factor Examples: >>> w = torch.Tensor(3, 5) >>> nn.init.xavier_uniform(w, gain=nn.init.calculate_gain('relu')) """ if isinstance(tensor, Variable): xavier_uniform(tensor.data, gain=gain) return tensor fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) std = gain * math.sqrt(2.0 / (fan_in + fan_out)) a = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation return tensor.uniform_(-a, a)
def xavier_normal(tensor, gain=1): """Fills the input Tensor or Variable with values according to the method described in "Understanding the difficulty of training deep feedforward neural networks" - Glorot, X. & Bengio, Y. (2010), using a normal distribution. The resulting tensor will have values sampled from :math:`N(0, std)` where :math:`std = gain \\times \sqrt{2 / (fan\_in + fan\_out)}`. Also known as Glorot initialisation. Args: tensor: an n-dimensional torch.Tensor or autograd.Variable gain: an optional scaling factor Examples: >>> w = torch.Tensor(3, 5) >>> nn.init.xavier_normal(w) """ if isinstance(tensor, Variable): xavier_normal(tensor.data, gain=gain) return tensor fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) std = gain * math.sqrt(2.0 / (fan_in + fan_out)) return tensor.normal_(0, std)
def forward(self, x, lengths, hidden): # Basket Encoding ub_seqs = [] # users' basket sequence for user in x: # x shape (batch of user, time_step, indice of product) nested lists embed_baskets = [] for basket in user: basket = torch.LongTensor(basket).resize_(1, len(basket)) basket = basket.cuda() if self.config.cuda else basket # use cuda for acceleration basket = self.encode(torch.autograd.Variable(basket)) # shape: 1, len(basket), embedding_dim embed_baskets.append(self.pool(basket, dim = 1)) # concat current user's all baskets and append it to users' basket sequence ub_seqs.append(torch.cat(embed_baskets, 1)) # shape: 1, num_basket, embedding_dim # Input for rnn ub_seqs = torch.cat(ub_seqs, 0).cuda() if self.config.cuda else torch.cat(ub_seqs, 0) # shape: batch_size, max_len, embedding_dim packed_ub_seqs = torch.nn.utils.rnn.pack_padded_sequence(ub_seqs, lengths, batch_first=True) # packed sequence as required by pytorch # RNN output, h_u = self.rnn(packed_ub_seqs, hidden) dynamic_user, _ = torch.nn.utils.rnn.pad_packed_sequence(output, batch_first=True) # shape: batch_size, max_len, embedding_dim return dynamic_user, h_u
def test_forward_works_even_with_empty_sequences(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=11, batch_first=True) encoder = PytorchSeq2VecWrapper(lstm) tensor = torch.autograd.Variable(torch.rand([5, 7, 3])) tensor[1, 6:, :] = 0 tensor[2, :, :] = 0 tensor[3, 2:, :] = 0 tensor[4, :, :] = 0 mask = torch.autograd.Variable(torch.ones(5, 7)) mask[1, 6:] = 0 mask[2, :] = 0 mask[3, 2:] = 0 mask[4, :] = 0 results = encoder(tensor, mask) for i in (0, 1, 3): assert not (results[i] == 0.).data.all() for i in (2, 4): assert (results[i] == 0.).data.all()
def test_forward_works_even_with_empty_sequences(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) tensor = torch.autograd.Variable(torch.rand([5, 7, 3])) tensor[1, 6:, :] = 0 tensor[2, :, :] = 0 tensor[3, 2:, :] = 0 tensor[4, :, :] = 0 mask = torch.autograd.Variable(torch.ones(5, 7)) mask[1, 6:] = 0 mask[2, :] = 0 mask[3, 2:] = 0 mask[4, :] = 0 results = encoder(tensor, mask) for i in (0, 1, 3): assert not (results[i] == 0.).data.all() for i in (2, 4): assert (results[i] == 0.).data.all()
def _initializer_wrapper(init_function: Callable[..., None]) -> Type[Initializer]: class Init(Initializer): def __init__(self, **kwargs): self._init_function = init_function self._kwargs = kwargs def __call__(self, tensor: torch.autograd.Variable) -> None: self._init_function(tensor, **self._kwargs) def __repr__(self): return 'Init: %s, with params: %s' % (self._init_function, self._kwargs) @classmethod def from_params(cls, params: Params): return cls(**params.as_dict()) return Init # There are no classes to decorate, so we hack these into Registrable._registry
def get_dropout_mask(dropout_probability: float, tensor_for_masking: torch.autograd.Variable): """ Computes and returns an element-wise dropout mask for a given tensor, where each element in the mask is dropped out with probability dropout_probability. Note that the mask is NOT applied to the tensor - the tensor is passed to retain the correct CUDA tensor type for the mask. Parameters ---------- dropout_probability : float, required. Probability of dropping a dimension of the input. tensor_for_masking : torch.Variable, required. Returns ------- A torch.FloatTensor consisting of the binary mask scaled by 1/ (1 - dropout_probability). This scaling ensures expected values and variances of the output of applying this mask and the original tensor are the same. """ binary_mask = tensor_for_masking.clone() binary_mask.data.copy_(torch.rand(tensor_for_masking.size()) > dropout_probability) # Scale mask by 1/keep_prob to preserve output statistics. dropout_mask = binary_mask.float().div(1.0 - dropout_probability) return dropout_mask
def mlpg(means, variances, windows): """Maximum Liklihood Paramter Generation (MLPG). The parameters are almost same as :func:`nnmnkwii.paramgen.mlpg` expects. The differences are: - The function assumes ``means`` as :obj:`torch.autograd.Variable` instead of :obj:`numpy.ndarray`. - The fucntion assumes ``variances_frames`` as :obj:`torch.FloatTensor`? instead of :obj:`numpy.ndarray`. Args: means (torch.autograd.Variable): Means variances (torch.FloatTensor): Variances windows (list): A sequence of window specification See also: :obj:`nnmnkwii.autograd.MLPG`, :func:`nnmnkwii.paramgen.mlpg` """ T, D = means.size() if variances.dim() == 1 and variances.shape[0] == D: variances = variances.expand(T, D) assert means.size() == variances.size() return MLPG(variances, windows)(means)
def unit_variance_mlpg(R, means): """Special case of MLPG assuming data is normalized to have unit variance. Args: means (torch.autograd.Variable): Means, of shape (``T x D``) or (``T*num_windows x static_dim``). See :func:`nnmnkwii.paramgen.reshape_means` to reshape means from (``T x D``) to (``T*num_windows x static_dim``). R (torch.FloatTensor): MLPG matrix. See also: :obj:`nnmnkwii.autograd.UnitVarianceMLPG`, :func:`nnmnkwii.paramgen.unit_variance_mlpg_matrix`, :func:`reshape_means`. """ return UnitVarianceMLPG(R)(means)
def _eq(x, y): """ Equality comparison for nested data structures with tensors. """ if type(x) is not type(y): return False elif isinstance(x, dict): if set(x.keys()) != set(y.keys()): return False return all(_eq(x_val, y[key]) for key, x_val in x.items()) elif isinstance(x, (np.ndarray, torch.Tensor)): return (x == y).all() elif isinstance(x, torch.autograd.Variable): return (x.data == y.data).all() else: return x == y
def _dist_and_values(self, *args, **kwargs): # XXX currently this whole object is very inefficient values, logits = [], [] for value, logit in self._gen_weighted_samples(*args, **kwargs): ix = _index(values, value) if ix == -1: # Value is new. values.append(value) logits.append(logit) else: # Value has already been seen. logits[ix] = util.log_sum_exp(torch.stack([logits[ix], logit]).squeeze()) logits = torch.stack(logits).squeeze() logits -= util.log_sum_exp(logits) if not isinstance(logits, torch.autograd.Variable): logits = Variable(logits) logits = logits - util.log_sum_exp(logits) d = dist.Categorical(logits=logits, one_hot=False) return d, values
def enumerate_support(self): """ Returns the Bernoulli distribution's support, as a tensor along the first dimension. Note that this returns support values of all the batched RVs in lock-step, rather than the full cartesian product. To iterate over the cartesian product, you must construct univariate Bernoullis and use itertools.product() over all univariate variables (may be expensive). :return: torch variable enumerating the support of the Bernoulli distribution. Each item in the return value, when enumerated along the first dimensions, yields a value from the distribution's support which has the same dimension as would be returned by sample. :rtype: torch.autograd.Variable. """ return Variable(torch.stack([torch.Tensor([t]).expand_as(self.ps) for t in [0, 1]]))
def log_pdf(self, y, *args, **kwargs): """ :param y: a value sampled from the transformed distribution :type y: torch.autograd.Variable :returns: the score (the log pdf) of y :rtype: torch.autograd.Variable Scores the sample by inverting the bijector(s) and computing the score using the score of the base distribution and the log det jacobian """ inverses = [] next_to_invert = y for bijector in reversed(self.bijectors): inverse = bijector.inverse(next_to_invert) inverses.append(inverse) next_to_invert = inverse log_pdf_base = self.base_dist.log_pdf(inverses[-1], *args, **kwargs) log_det_jacobian = self.bijectors[-1].log_det_jacobian(y, *args, **kwargs) for bijector, inverse in zip(list(reversed(self.bijectors))[1:], inverses[:-1]): log_det_jacobian += bijector.log_det_jacobian(inverse, *args, **kwargs) return log_pdf_base - log_det_jacobian
def log_beta(t): """ Computes log Beta function. :param t: :type t: torch.autograd.Variable of dimension 1 or 2 :rtype: torch.autograd.Variable of float (if t.dim() == 1) or torch.Tensor (if t.dim() == 2) """ assert t.dim() in (1, 2) if t.dim() == 1: numer = torch.sum(log_gamma(t)) denom = log_gamma(torch.sum(t)) else: numer = torch.sum(log_gamma(t), 1) denom = log_gamma(torch.sum(t, 1)) return numer - denom
def sample(self): """ Draws either a single sample (if alpha.dim() == 1), or one sample per param (if alpha.dim() == 2). (Un-reparameterized). :param torch.autograd.Variable alpha: """ alpha_np = self.alpha.data.cpu().numpy() if self.alpha.dim() == 1: x_np = spr.dirichlet.rvs(alpha_np)[0] else: x_np = np.empty_like(alpha_np) for i in range(alpha_np.shape[0]): x_np[i, :] = spr.dirichlet.rvs(alpha_np[i, :])[0] x = Variable(type(self.alpha.data)(x_np)) return x
def batch_log_pdf(self, x): """ Evaluates log probability density over one or a batch of samples. Each of alpha and x can be either a single value or a batch of values batched along dimension 0. If they are both batches, their batch sizes must agree. In any case, the rightmost size must agree. :param torch.autograd.Variable x: A value (if x.dim() == 1) or or batch of values (if x.dim() == 2). :param alpha: A vector of concentration parameters. :type alpha: torch.autograd.Variable or None. :return: log probability densities of each element in the batch. :rtype: torch.autograd.Variable of torch.Tensor of dimension 1. """ alpha = self.alpha.expand(self.shape(x)) x_sum = torch.sum(torch.mul(alpha - 1, torch.log(x)), -1) beta = log_beta(alpha) batch_log_pdf_shape = self.batch_shape(x) + (1,) return (x_sum - beta).contiguous().view(batch_log_pdf_shape)
def test_multi_gpu(self): import torch from torch.autograd import Variable import torch.nn as nn from torch.nn.parallel.data_parallel import data_parallel from inferno.extensions.containers.graph import Graph input_shape = [8, 1, 3, 128, 128] model = Graph() \ .add_input_node('input') \ .add_node('conv0', nn.Conv3d(1, 10, 3, padding=1), previous='input') \ .add_node('conv1', nn.Conv3d(10, 1, 3, padding=1), previous='conv0') \ .add_output_node('output', previous='conv1') model.cuda() input = Variable(torch.rand(*input_shape).cuda()) output = data_parallel(model, input, device_ids=[0, 1, 2, 3])
def test_forward(self): import torch from torch.autograd import Variable from reid.models.inception import InceptionNet # model = Inception(num_classes=5, num_features=256, dropout=0.5) # x = Variable(torch.randn(10, 3, 144, 56), requires_grad=False) # y = model(x) # self.assertEquals(y.size(), (10, 5)) model = InceptionNet(num_features=8, norm=True, dropout=0) x = Variable(torch.randn(10, 3, 144, 56), requires_grad=False) y = model(x) self.assertEquals(y.size(), (10, 8)) self.assertEquals(y.norm(2, 1).max(), 1) self.assertEquals(y.norm(2, 1).min(), 1)
def test_forward_backward(self): import torch import torch.nn.functional as F from torch.autograd import Variable from reid.loss import OIMLoss criterion = OIMLoss(3, 3, scalar=1.0, size_average=False) criterion.lut = torch.eye(3) x = Variable(torch.randn(3, 3), requires_grad=True) y = Variable(torch.range(0, 2).long()) loss = criterion(x, y) loss.backward() probs = F.softmax(x) grads = probs.data - torch.eye(3) abs_diff = torch.abs(grads - x.grad.data) self.assertEquals(torch.log(probs).diag().sum(), -loss) self.assertTrue(torch.max(abs_diff) < 1e-6)
def _forward_alg(self, feats): # calculate in log domain # feats is len(sentence) * tagset_size # initialize alpha with a Tensor with values all equal to -10000. init_alphas = torch.Tensor(1, self.tagset_size).fill_(-10000.) init_alphas[0][self.tag_to_ix[START_TAG]] = 0. forward_var = autograd.Variable(init_alphas) if self.use_gpu: forward_var = forward_var.cuda() for feat in feats: emit_score = feat.view(-1, 1) tag_var = forward_var + self.transitions + emit_score max_tag_var, _ = torch.max(tag_var, dim=1) tag_var = tag_var - max_tag_var.view(-1, 1) forward_var = max_tag_var + torch.log(torch.sum(torch.exp(tag_var), dim=1)).view(1, -1) # ).view(1, -1) terminal_var = (forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]).view(1, -1) alpha = log_sum_exp(terminal_var) # Z(x) return alpha
def reinforce_backward(self, reward, output_mask=None): """ If output_mask is not None, then it should be a FloatTensor of shape (N, T) giving a multiplier to the output. """ assert self.multinomial_outputs is not None, 'Must call reinforce_sample first' grad_output = [] def gen_hook(mask): def hook(grad): return grad * mask.contiguous().view(-1, 1).expand_as(grad) return hook if output_mask is not None: for t, probs in enumerate(self.multinomial_probs): mask = Variable(output_mask[:, t]) probs.register_hook(gen_hook(mask)) for sampled_output in self.multinomial_outputs: sampled_output.reinforce(reward) grad_output.append(None) torch.autograd.backward(self.multinomial_outputs, grad_output, retain_variables=True)
def uniform(tensor, a=0, b=1): """Fills the input Tensor or Variable with values drawn from the uniform distribution :math:`U(a, b)`. Args: tensor: an n-dimensional torch.Tensor or autograd.Variable a: the lower bound of the uniform distribution b: the upper bound of the uniform distribution Examples: >>> w = torch.Tensor(3, 5) >>> nn.init.uniform(w) """ if isinstance(tensor, Variable): uniform(tensor.data, a=a, b=b) return tensor return tensor.uniform_(a, b)
def normal(tensor, mean=0, std=1): """Fills the input Tensor or Variable with values drawn from the normal distribution :math:`N(mean, std)`. Args: tensor: an n-dimensional torch.Tensor or autograd.Variable mean: the mean of the normal distribution std: the standard deviation of the normal distribution Examples: >>> w = torch.Tensor(3, 5) >>> nn.init.normal(w) """ if isinstance(tensor, Variable): normal(tensor.data, mean=mean, std=std) return tensor return tensor.normal_(mean, std)
def constant(tensor, val): """Fills the input Tensor or Variable with the value `val`. Args: tensor: an n-dimensional torch.Tensor or autograd.Variable val: the value to fill the tensor with Examples: >>> w = torch.Tensor(3, 5) >>> nn.init.constant(w) """ if isinstance(tensor, Variable): constant(tensor.data, val) return tensor return tensor.fill_(val)
def eye(tensor): """Fills the 2-dimensional input Tensor or Variable with the identity matrix. Preserves the identity of the inputs in Linear layers, where as many inputs are preserved as possible. Args: tensor: a 2-dimensional torch.Tensor or autograd.Variable Examples: >>> w = torch.Tensor(3, 5) >>> nn.init.eye(w) """ if tensor.ndimension() != 2: raise ValueError("Only tensors with 2 dimensions are supported") if isinstance(tensor, Variable): eye(tensor.data) return tensor return tensor.copy_(torch.eye(tensor.size(0), tensor.size(1)))
def kaiming_uniform(tensor, a=0, mode='fan_in'): """Fills the input Tensor or Variable with values according to the method described in "Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification" - He, K. et al. (2015), using a uniform distribution. The resulting tensor will have values sampled from :math:`U(-bound, bound)` where :math:`bound = \sqrt{2 / ((1 + a^2) \\times fan\_in)} \\times \sqrt{3}`. Also known as He initialisation. Args: tensor: an n-dimensional torch.Tensor or autograd.Variable a: the negative slope of the rectifier used after this layer (0 for ReLU by default) mode: either 'fan_in' (default) or 'fan_out'. Choosing `fan_in` preserves the magnitude of the variance of the weights in the forward pass. Choosing `fan_out` preserves the magnitudes in the backwards pass. Examples: >>> w = torch.Tensor(3, 5) >>> nn.init.kaiming_uniform(w, mode='fan_in') """ if isinstance(tensor, Variable): kaiming_uniform(tensor.data, a=a, mode=mode) return tensor fan = _calculate_correct_fan(tensor, mode) gain = calculate_gain('leaky_relu', a) std = gain / math.sqrt(fan) bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation return tensor.uniform_(-bound, bound)
def kaiming_normal(tensor, a=0, mode='fan_in'): """Fills the input Tensor or Variable with values according to the method described in "Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification" - He, K. et al. (2015), using a normal distribution. The resulting tensor will have values sampled from :math:`N(0, std)` where :math:`std = \sqrt{2 / ((1 + a^2) \\times fan\_in)}`. Also known as He initialisation. Args: tensor: an n-dimensional torch.Tensor or autograd.Variable a: the negative slope of the rectifier used after this layer (0 for ReLU by default) mode: either 'fan_in' (default) or 'fan_out'. Choosing `fan_in` preserves the magnitude of the variance of the weights in the forward pass. Choosing `fan_out` preserves the magnitudes in the backwards pass. Examples: >>> w = torch.Tensor(3, 5) >>> nn.init.kaiming_normal(w, mode='fan_out') """ if isinstance(tensor, Variable): kaiming_normal(tensor.data, a=a, mode=mode) return tensor fan = _calculate_correct_fan(tensor, mode) gain = calculate_gain('leaky_relu', a) std = gain / math.sqrt(fan) return tensor.normal_(0, std)
def test_grad_nonleaf_many_outputs(self): # This checks an edge case for function callbacks # We want to capture two grads of a function, but can only # register a single callback. x = Variable(torch.randn(4, 2), requires_grad=True) a, b = x.chunk(2) def hook(*grads): hook_called[0] = True hook_called = [False] x.register_hook(hook) go = torch.randn(2, 2) grad_a, grad_b = torch.autograd.grad( (a + 2 * b), [a, b], grad_outputs=go, create_graph=True) self.assertEqual(grad_a, go) self.assertEqual(grad_b, go * 2) self.assertFalse(hook_called[0]) self.assertIsNone(x.grad)
def test_multi_backward(self): x = Variable(torch.randn(5, 5), requires_grad=True) y = Variable(torch.randn(5, 5), requires_grad=True) q = Variable(torch.randn(5, 5), requires_grad=True) a = Variable(torch.randn(5, 5), requires_grad=True) b = Variable(torch.randn(5, 5), requires_grad=True) q2 = q * 2 z = x + y + q2 c = a * b + q2 grad_z = torch.randn(5, 5) grad_c = torch.randn(5, 5) torch.autograd.backward([z, c], [grad_z, grad_c]) self.assertEqual(x.grad.data, grad_z) self.assertEqual(y.grad.data, grad_z) self.assertEqual(a.grad.data, grad_c * b.data) self.assertEqual(b.grad.data, grad_c * a.data) self.assertEqual(q.grad.data, (grad_c + grad_z) * 2)
def shapes_all(data): """ Recursively walks the data (can be tuples, lists, or dict) and replaces a tensor with its shape tuple whenever it meets a tensor """ if isinstance(data, (tuple, list)): ans = map(shapes_all, data) return type(data)(ans) elif isinstance(data, dict): return {k: shapes_all(v) for k, v in data.items()} elif (isinstance(data, np.ndarray) or torch.is_tensor(data) or isinstance(data, torch.autograd.Variable) or isinstance(data, torch.nn.Parameter)): return shape(data) else: return data
def test_grad_nonleaf_many_outputs(self): # This checks an edge case for function callbacks # We want to capture two grads of a function, but can only # register a single callback. x = Variable(torch.randn(4, 2), requires_grad=True) a, b = x.chunk(2) def hook(*grads): hook_called[0] = True hook_called = [False] x.register_hook(hook) go = torch.randn(2, 2) grad_a, grad_b = torch.autograd.grad( (a + 2 * b), [a, b], grad_outputs=go, create_graph=True) self.assertEqual(grad_a.data, go) self.assertEqual(grad_b.data, go * 2) self.assertFalse(hook_called[0]) self.assertIsNone(x.grad)
def batch_predictions(self, images): # lazy import import torch from torch.autograd import Variable images = self._process_input(images) n = len(images) images = torch.from_numpy(images) if self.cuda: # pragma: no cover images = images.cuda() images = Variable(images, volatile=True) predictions = self._model(images) predictions = predictions.data if self.cuda: # pragma: no cover predictions = predictions.cpu() predictions = predictions.numpy() assert predictions.ndim == 2 assert predictions.shape == (n, self.num_classes()) return predictions
def _loss_fn(self, image, label): # lazy import import torch import torch.nn as nn from torch.autograd import Variable image = self._process_input(image) target = np.array([label]) target = torch.from_numpy(target) if self.cuda: # pragma: no cover target = target.cuda() target = Variable(target) images = torch.from_numpy(image[None]) if self.cuda: # pragma: no cover images = images.cuda() images = Variable(images, volatile=True) predictions = self._model(images) ce = nn.CrossEntropyLoss() loss = ce(predictions, target) loss = loss.data if self.cuda: # pragma: no cover loss = loss.cpu() loss = loss.numpy() return loss
def backward(self, sth): input, label = self.saved_tensors grad_fs = grad_label = None if self.needs_input_grad[0]: fs = torch.nn.Softmax()( torch.autograd.Variable(input, requires_grad=False) ).data # neg. one hot label y = input.new().resize_as_(input).zero_() for i, l in enumerate(label): y[i, l] = -1. fs.add_(y).mul_(1. / len(label)) grad_fs = fs if self.needs_input_grad[1]: raise NotImplementedError() return grad_fs, grad_label
def eval_model(dataset_loader, encoding, model): model.eval() print "evaluating model..." top1 = imSituTensorEvaluation(1, 3, encoding) top5 = imSituTensorEvaluation(5, 3, encoding) mx = len(dataset_loader) for i, (index, input, target) in enumerate(dataset_loader): print "{}/{} batches\r".format(i+1,mx) , input_var = torch.autograd.Variable(input.cuda(), volatile = True) target_var = torch.autograd.Variable(target.cuda(), volatile = True) (scores,predictions) = model.forward_max(input_var) (s_sorted, idx) = torch.sort(scores, 1, True) top1.add_point(target, predictions.data, idx.data) top5.add_point(target, predictions.data, idx.data) print "\ndone." return (top1, top5)
def _viterbi_decode(self, feats): backpointers = [] init_alphas = torch.Tensor(self.tagset_size, 1).fill_(0.).type(self.dtype) forward_var = autograd.Variable(init_alphas).type(self.dtype) for ix,feat in enumerate(feats): if ix == 0: forward_var += feat.view(self.tagset_size, 1) + self.initial_weights else: viterbi_vars, viterbi_idx = torch.max(self.transitions + torch.transpose( forward_var.repeat(1, self.tagset_size), 0 ,1), 1) forward_var = feat.view(self.tagset_size,1) + viterbi_vars backpointers.append(viterbi_idx) terminal_var = forward_var + self.final_weights _ , best_tag_id = torch.max(terminal_var,0) best_tag_id = to_scalar(best_tag_id) path_score = terminal_var[best_tag_id] best_path = [best_tag_id] for bptrs_t in reversed(backpointers): best_tag_id = to_scalar(bptrs_t[best_tag_id]) best_path.append(best_tag_id) best_path.reverse() return path_score, best_path
def _viterbi_decode(self, feats): backpointers = [] init_vvars = torch.Tensor(self.tagset_size, 1).fill_(-10000.).type(self.dtype) init_vvars[self.tag_to_ix[self.START_TAG]][0] = 0 forward_var = autograd.Variable(init_vvars).type(self.dtype) for feat in feats: viterbi_vars, viterbi_idx = torch.max(self.transitions + torch.transpose(forward_var.expand(forward_var.size(0), self.tagset_size), 0, 1), 1) forward_var = feat.view(self.tagset_size, 1) + viterbi_vars backpointers.append(viterbi_idx) terminal_var = forward_var + self.transitions[self.tag_to_ix[self.STOP_TAG]].view(self.tagset_size, 1) _, best_tag_id = torch.max(terminal_var, 0, keepdim=True) best_tag_id = to_scalar(best_tag_id) path_score = terminal_var[best_tag_id] best_path = [best_tag_id] for bptrs_t in reversed(backpointers): best_tag_id = to_scalar(bptrs_t[best_tag_id]) best_path.append(best_tag_id) start = best_path.pop() assert start == self.tag_to_ix[self.START_TAG] # Sanity check best_path.reverse() return path_score, best_path
def init_hidden(self, gpu, last_batch_size=None): if last_batch_size is None: lstm_hidden_batch_size = self.batch_size else: lstm_hidden_batch_size = last_batch_size dims = (self.lstm_layer, lstm_hidden_batch_size, self.lstm_hidden_dim) if self.bilstm_flag: dims = (2*self.lstm_layer, lstm_hidden_batch_size, self.lstm_hidden_dim) init_value = torch.Tensor(np.random.uniform(-0.01, 0.01, dims)) #init_value = torch.zeros(dims) h0 = autograd.Variable(init_value) c0 = autograd.Variable(init_value) if gpu: h0 = h0.cuda() c0 = c0.cuda() return (h0,c0) # from: Variable of batch_size*sent_length*embedding_dim # to: Variable of batch_size*embedding_dim*sent_length