我们从Python开源项目中,提取了以下18个代码示例,用于说明如何使用torch.zeros_like()。
def torch_zeros_like(x): """ Polyfill for `torch.zeros_like()`. """ # Work around https://github.com/pytorch/pytorch/issues/2906 if isinstance(x, Variable): return Variable(torch_zeros_like(x.data)) # Support Pytorch before https://github.com/pytorch/pytorch/pull/2489 try: return torch.zeros_like(x) except AttributeError: return torch.zeros(x.size()).type_as(x)
def test_zeros_like(self): expected = torch.zeros(100, 100) res1 = torch.zeros_like(expected) self.assertEqual(res1, expected) res2 = torch.Tensor() torch.zeros_like(expected, out=res2) self.assertEqual(res2, expected)
def test_zeros_like_cuda(self): expected = torch.zeros(100, 100).cuda() res1 = torch.zeros_like(expected) self.assertEqual(res1, expected) res2 = torch.Tensor().cuda() torch.zeros_like(expected, out=res2) self.assertEqual(res2, expected)
def test_zeros_like_multiple_device(self): expected = torch.zeros(100, 100).cuda() x = torch.cuda.FloatTensor(100, 100, device=1) output = torch.zeros_like(x) self.assertEqual(output, expected)
def schedule_sampling(self, prev, dec_out): """ Resample n inputs to next iteration from the model itself. N is itself sampled from a bernoulli independently for each example in the batch with weights equal to the model's variable self.scheduled_rate. Parameters: ----------- - prev: torch.LongTensor(batch_size) - dec_out: torch.Tensor(batch_size x hid_dim) Returns: partially resampled input -------- - prev: torch.LongTensor(batch_size) """ prev, dec_out = prev.data, dec_out.data # don't register computation keep_mask = torch.bernoulli( torch.zeros_like(prev).float() + self.exposure_rate) == 1 # return if no sampling is necessary if len(keep_mask.nonzero()) == len(prev): return prev sampled = self.decoder.project( Variable(dec_out, volatile=True)).max(1)[1].data if keep_mask.nonzero().dim() == 0: # return all sampled return sampled keep_mask = keep_mask.nonzero().squeeze(1) sampled[keep_mask] = prev[keep_mask] return sampled
def word_dropout_mask(X, dropout_rate, reserved_codes=()): """ Computes a binary mask across batch examples based on a bernoulli distribution with mean equal to dropout. """ probs = torch.zeros_like(X).float() + dropout_rate # zero reserved_codes (avoid dropping reserved symbols) if len(reserved_codes) > 0: probs[sum((X == x) for x in reserved_codes)] = 0 # return binary mask return torch.bernoulli(probs).byte()
def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data if grad.is_sparse: raise RuntimeError('Adadelta does not support sparse gradients') state = self.state[p] # State initialization if len(state) == 0: state['step'] = 0 state['square_avg'] = torch.zeros_like(p.data) state['acc_delta'] = torch.zeros_like(p.data) square_avg, acc_delta = state['square_avg'], state['acc_delta'] rho, eps = group['rho'], group['eps'] state['step'] += 1 if group['weight_decay'] != 0: grad = grad.add(group['weight_decay'], p.data) square_avg.mul_(rho).addcmul_(1 - rho, grad, grad) std = square_avg.add(eps).sqrt_() delta = acc_delta.add(eps).sqrt_().div_(std).mul_(grad) p.data.add_(-group['lr'], delta) acc_delta.mul_(rho).addcmul_(1 - rho, delta, delta) return loss
def __init__(self, params, lr=1e-2, lr_decay=0, weight_decay=0): defaults = dict(lr=lr, lr_decay=lr_decay, weight_decay=weight_decay) super(Adagrad, self).__init__(params, defaults) for group in self.param_groups: for p in group['params']: state = self.state[p] state['step'] = 0 state['sum'] = torch.zeros_like(p.data)
def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: weight_decay = group['weight_decay'] momentum = group['momentum'] dampening = group['dampening'] nesterov = group['nesterov'] for p in group['params']: if p.grad is None: continue d_p = p.grad.data if weight_decay != 0: d_p.add_(weight_decay, p.data) if momentum != 0: param_state = self.state[p] if 'momentum_buffer' not in param_state: buf = param_state['momentum_buffer'] = torch.zeros_like(p.data) buf.mul_(momentum).add_(d_p) else: buf = param_state['momentum_buffer'] buf.mul_(momentum).add_(1 - dampening, d_p) if nesterov: d_p = d_p.add(momentum, buf) else: d_p = buf p.data.add_(-group['lr'], d_p) return loss
def _get_parameters(self, module): params = [] d_params = [] for p in module.parameters(): if p.grad is None: p._grad = torch.zeros_like(p) params.append(p.data) d_params.append(p.grad.data) return params, d_params
def _analytical_jacobian(self, module, input, jacobian_input=True, jacobian_parameters=True): output = self._forward(module, input) output_size = output.nelement() output_t = output.data if isinstance(output, Variable) else output if jacobian_input: jacobian_inp = self._jacobian(input, output_size) flat_jacobian_input = list(iter_tensors(jacobian_inp)) if jacobian_parameters: num_param = sum(p.numel() for p in self._get_parameters(module)[0]) jacobian_param = torch.zeros(num_param, output_size) for i in range(output_size): _, d_param = self._get_parameters(module) d_out = torch.zeros_like(output_t) flat_d_out = d_out.view(-1) flat_d_out[i] = 1 if jacobian_parameters: self._zero_grad_parameters(module) # Variables will accumulate gradient from multiple steps if jacobian_input: self._zero_grad_input(input) d_input = self._backward(module, input, output, d_out) if jacobian_input: for jacobian_x, d_x in zip(flat_jacobian_input, iter_tensors(d_input)): jacobian_x[:, i] = d_x if jacobian_parameters: jacobian_param[:, i] = torch.cat(self._flatten_tensors(d_param), 0) res = tuple() if jacobian_input: res += jacobian_inp, if jacobian_parameters: res += jacobian_param, return res
def _test_zeros_like(self, template_shape_i, template_shape_v=None): template_shape_v = template_shape_v or [] template_shape = template_shape_i + template_shape_v for nnz in [9, 12]: t, _, _ = self._gen_sparse(len(template_shape_i), nnz, template_shape) res = torch.zeros_like(t) self.assertEqual(tuple(res.size()), tuple(template_shape)) self.assertTrue(res._indices().numel() == res._values().numel() == 0) self.assertEqual(res._nnz(), 0) self.assertEqual(res._dimI(), len(template_shape_i)) self.assertEqual(res._dimV(), len(template_shape_v))
def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data if grad.is_sparse: raise RuntimeError('RMSprop does not support sparse gradients') state = self.state[p] # State initialization if len(state) == 0: state['step'] = 0 state['square_avg'] = torch.zeros_like(p.data) if group['momentum'] > 0: state['momentum_buffer'] = torch.zeros_like(p.data) if group['centered']: state['grad_avg'] = torch.zeros_like(p.data) square_avg = state['square_avg'] alpha = group['alpha'] state['step'] += 1 if group['weight_decay'] != 0: grad = grad.add(group['weight_decay'], p.data) square_avg.mul_(alpha).addcmul_(1 - alpha, grad, grad) if group['centered']: grad_avg = state['grad_avg'] grad_avg.mul_(alpha).add_(1 - alpha, grad) avg = square_avg.addcmul(-1, grad_avg, grad_avg).sqrt().add_(group['eps']) else: avg = square_avg.sqrt().add_(group['eps']) if group['momentum'] > 0: buf = state['momentum_buffer'] buf.mul_(group['momentum']).addcdiv_(grad, avg) p.data.add_(-group['lr'], buf) else: p.data.addcdiv_(-group['lr'], grad, avg) return loss
def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data if grad.is_sparse: raise RuntimeError('Adamax does not support sparse gradients') state = self.state[p] # State initialization if len(state) == 0: state['step'] = 0 state['exp_avg'] = torch.zeros_like(p.data) state['exp_inf'] = torch.zeros_like(p.data) exp_avg, exp_inf = state['exp_avg'], state['exp_inf'] beta1, beta2 = group['betas'] eps = group['eps'] state['step'] += 1 if group['weight_decay'] != 0: grad = grad.add(group['weight_decay'], p.data) # Update biased first moment estimate. exp_avg.mul_(beta1).add_(1 - beta1, grad) # Update the exponentially weighted infinity norm. norm_buf = torch.cat([ exp_inf.mul_(beta2).unsqueeze(0), grad.abs().add_(eps).unsqueeze_(0) ], 0) torch.max(norm_buf, 0, keepdim=False, out=(exp_inf, exp_inf.new().long())) bias_correction = 1 - beta1 ** state['step'] clr = group['lr'] / bias_correction p.data.addcdiv_(-clr, exp_avg, exp_inf) return loss
def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data if grad.is_sparse: raise RuntimeError('Rprop does not support sparse gradients') state = self.state[p] # State initialization if len(state) == 0: state['step'] = 0 state['prev'] = torch.zeros_like(p.data) state['step_size'] = grad.new().resize_as_(grad).fill_(group['lr']) etaminus, etaplus = group['etas'] step_size_min, step_size_max = group['step_sizes'] step_size = state['step_size'] state['step'] += 1 sign = grad.mul(state['prev']).sign() sign[sign.gt(0)] = etaplus sign[sign.lt(0)] = etaminus sign[sign.eq(0)] = 1 # update stepsizes with step size updates step_size.mul_(sign).clamp_(step_size_min, step_size_max) # for dir<0, dfdx=0 # for dir>=0 dfdx=dfdx grad = grad.clone() grad[sign.eq(etaminus)] = 0 # update parameters p.data.addcmul_(-1, grad.sign(), step_size) state['prev'].copy_(grad) return loss
def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data if grad.is_sparse: raise RuntimeError('ASGD does not support sparse gradients') state = self.state[p] # State initialization if len(state) == 0: state['step'] = 0 state['eta'] = group['lr'] state['mu'] = 1 state['ax'] = torch.zeros_like(p.data) state['step'] += 1 if group['weight_decay'] != 0: grad = grad.add(group['weight_decay'], p.data) # decay term p.data.mul_(1 - group['lambd'] * state['eta']) # update parameter p.data.add_(-state['eta'], grad) # averaging if state['mu'] != 1: state['ax'].add_(p.data.sub(state['ax']).mul(state['mu'])) else: state['ax'].copy_(p.data) # update eta and mu state['eta'] = (group['lr'] / math.pow((1 + group['lambd'] * group['lr'] * state['step']), group['alpha'])) state['mu'] = 1 / max(1, state['step'] - group['t0']) return loss