我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.autograd.Variable()。
def pad_batch(mini_batch): mini_batch_size = len(mini_batch) # print mini_batch.shape # print mini_batch max_sent_len1 = int(np.max([len(x[0]) for x in mini_batch])) max_sent_len2 = int(np.max([len(x[1]) for x in mini_batch])) # print max_sent_len1, max_sent_len2 # max_token_len = int(np.mean([len(val) for sublist in mini_batch for val in sublist])) main_matrix1 = np.zeros((mini_batch_size, max_sent_len1), dtype= np.int) main_matrix2 = np.zeros((mini_batch_size, max_sent_len2), dtype= np.int) for idx1, i in enumerate(mini_batch): for idx2, j in enumerate(i[0]): try: main_matrix1[i,j] = j except IndexError: pass for idx1, i in enumerate(mini_batch): for idx2, j in enumerate(i[1]): try: main_matrix2[i,j] = j except IndexError: pass main_matrix1_t = Variable(torch.from_numpy(main_matrix1)) main_matrix2_t = Variable(torch.from_numpy(main_matrix2)) # print main_matrix1_t.size() # print main_matrix2_t.size() return [main_matrix1_t, main_matrix2_t] # return [Variable(torch.cat((main_matrix1_t, main_matrix2_t), 0)) # def pad_batch(mini_batch): # # print mini_batch # # print type(mini_batch) # # print mini_batch.shape # # for i, _ in enumerate(mini_batch): # # print i, _ # return [Variable(torch.from_numpy(np.asarray(_))) for _ in mini_batch[0]]
def calc_gradient_penalty(self, netD, real_data, fake_data): alpha = torch.rand(1, 1) alpha = alpha.expand(real_data.size()) alpha = alpha.cuda() interpolates = alpha * real_data + ((1 - alpha) * fake_data) interpolates = interpolates.cuda() interpolates = Variable(interpolates, requires_grad=True) disc_interpolates = netD.forward(interpolates) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).cuda(), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * self.LAMBDA return gradient_penalty
def xavier_uniform(tensor, gain=1): """Fills the input Tensor or Variable with values according to the method described in "Understanding the difficulty of training deep feedforward neural networks" - Glorot, X. & Bengio, Y. (2010), using a uniform distribution. The resulting tensor will have values sampled from :math:`U(-a, a)` where :math:`a = gain \\times \sqrt{2 / (fan\_in + fan\_out)} \\times \sqrt{3}`. Also known as Glorot initialisation. Args: tensor: an n-dimensional torch.Tensor or autograd.Variable gain: an optional scaling factor Examples: >>> w = torch.Tensor(3, 5) >>> nn.init.xavier_uniform(w, gain=nn.init.calculate_gain('relu')) """ if isinstance(tensor, Variable): xavier_uniform(tensor.data, gain=gain) return tensor fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) std = gain * math.sqrt(2.0 / (fan_in + fan_out)) a = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation return tensor.uniform_(-a, a)
def xavier_normal(tensor, gain=1): """Fills the input Tensor or Variable with values according to the method described in "Understanding the difficulty of training deep feedforward neural networks" - Glorot, X. & Bengio, Y. (2010), using a normal distribution. The resulting tensor will have values sampled from :math:`N(0, std)` where :math:`std = gain \\times \sqrt{2 / (fan\_in + fan\_out)}`. Also known as Glorot initialisation. Args: tensor: an n-dimensional torch.Tensor or autograd.Variable gain: an optional scaling factor Examples: >>> w = torch.Tensor(3, 5) >>> nn.init.xavier_normal(w) """ if isinstance(tensor, Variable): xavier_normal(tensor.data, gain=gain) return tensor fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) std = gain * math.sqrt(2.0 / (fan_in + fan_out)) return tensor.normal_(0, std)
def forward(self, x, lengths, hidden): # Basket Encoding ub_seqs = [] # users' basket sequence for user in x: # x shape (batch of user, time_step, indice of product) nested lists embed_baskets = [] for basket in user: basket = torch.LongTensor(basket).resize_(1, len(basket)) basket = basket.cuda() if self.config.cuda else basket # use cuda for acceleration basket = self.encode(torch.autograd.Variable(basket)) # shape: 1, len(basket), embedding_dim embed_baskets.append(self.pool(basket, dim = 1)) # concat current user's all baskets and append it to users' basket sequence ub_seqs.append(torch.cat(embed_baskets, 1)) # shape: 1, num_basket, embedding_dim # Input for rnn ub_seqs = torch.cat(ub_seqs, 0).cuda() if self.config.cuda else torch.cat(ub_seqs, 0) # shape: batch_size, max_len, embedding_dim packed_ub_seqs = torch.nn.utils.rnn.pack_padded_sequence(ub_seqs, lengths, batch_first=True) # packed sequence as required by pytorch # RNN output, h_u = self.rnn(packed_ub_seqs, hidden) dynamic_user, _ = torch.nn.utils.rnn.pad_packed_sequence(output, batch_first=True) # shape: batch_size, max_len, embedding_dim return dynamic_user, h_u
def package(data, volatile=False): """Package data for training / evaluation.""" data = map(lambda x: json.loads(x), data) dat = map(lambda x: map(lambda y: dictionary.word2idx[y], x['text']), data) maxlen = 0 for item in dat: maxlen = max(maxlen, len(item)) targets = map(lambda x: x['label'], data) maxlen = min(maxlen, 500) for i in range(len(data)): if maxlen < len(dat[i]): dat[i] = dat[i][:maxlen] else: for j in range(maxlen - len(dat[i])): dat[i].append(dictionary.word2idx['<pad>']) dat = Variable(torch.LongTensor(dat), volatile=volatile) targets = Variable(torch.LongTensor(targets), volatile=volatile) return dat.t(), targets
def trainBatch(net, criterion, optimizer): data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost
def test(): model.eval() test_loss = 0 correct = 0 for data, target in test_loader: if args.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) output = model(data) test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss pred = output.data.max(1)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).cpu().sum() test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
def printnorm_backward(self, input_, output): global backward_grad # input is a tuple of packed inputs # output is a Variable. output.data is the Tensor we are interested print('Inside ' + self.__class__.__name__ + ' backward') print('') print('input: ', type(input_)) print('input[0]: ', type(input_[0])) print('output: ', type(output)) print('output[0]: ', type(output[0])) print('') print('input size:', input_[0].size()) print('output size:', len(output)) print('output[0] size:', output[0].size()) print('output norm:', output[0].data.norm()) backward_grad = input_[0].data.numpy() # This could be useful for using the features produced by a pretrained network # If all you care about is this feature vector, then use a Variable with volatile=True to speed up inference
def train(epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = Variable(data, requires_grad=True), Variable(target) optimizer.zero_grad() output = model(data) loss = F.cross_entropy(output, target) loss.backward() optimizer.step() # Display the gradients plt.clf() plt.subplot(211); plt.hist(forward_grad.ravel()); plt.title("Features magnitude") plt.subplot(212); plt.hist(backward_grad.ravel()); plt.title("Gradients") plt.show(block=False) plt.pause(0.01) print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0]))
def Saliency_map(image,model,preprocess,ground_truth,use_gpu=False,method=util.GradType.GUIDED): vis_param_dict['method'] = method img_tensor = preprocess(image) img_tensor.unsqueeze_(0) if use_gpu: img_tensor=img_tensor.cuda() input = Variable(img_tensor,requires_grad=True) if input.grad is not None: input.grad.data.zero_() model.zero_grad() output = model(input) ind=torch.LongTensor(1) if(isinstance(ground_truth,np.int64)): ground_truth=np.asscalar(ground_truth) ind[0]=ground_truth ind=Variable(ind) energy=output[0,ground_truth] energy.backward() grad=input.grad if use_gpu: return np.abs(grad.data.cpu().numpy()[0]).max(axis=0) return np.abs(grad.data.numpy()[0]).max(axis=0)
def classifyOneImage(model,img_pil,preprocess): model.eval() img_tensor = preprocess(img_pil) img_tensor.unsqueeze_(0) if use_gpu: img_tensor = img_tensor.cuda() img_variable = Variable(img_tensor) out = model(img_variable) m = nn.Softmax() if use_gpu: return m(out).cpu() return(out) #method == util.GradType.NAIVE or util.GradType.GUIDED
def Occlusion_exp(image,occluding_size,occluding_stride,model,preprocess,classes,groundTruth): img = np.copy(image) height, width,_= img.shape output_height = int(math.ceil((height-occluding_size)/occluding_stride+1)) output_width = int(math.ceil((width-occluding_size)/occluding_stride+1)) ocludedImages=[] for h in range(output_height): for w in range(output_width): #occluder region h_start = h*occluding_stride w_start = w*occluding_stride h_end = min(height, h_start + occluding_size) w_end = min(width, w_start + occluding_size) input_image = copy.copy(img) input_image[h_start:h_end,w_start:w_end,:] = 0 ocludedImages.append(preprocess(Image.fromarray(input_image))) L = np.empty(output_height*output_width) L.fill(groundTruth) L = torch.from_numpy(L) tensor_images = torch.stack([img for img in ocludedImages]) dataset = torch.utils.data.TensorDataset(tensor_images,L) dataloader = torch.utils.data.DataLoader(dataset,batch_size=5,shuffle=False, num_workers=8) heatmap=np.empty(0) model.eval() for data in dataloader: images, labels = data if use_gpu: images, labels = (images.cuda()), (labels.cuda(async=True)) outputs = model(Variable(images)) m = nn.Softmax() outputs=m(outputs) if use_gpu: outs=outputs.cpu() heatmap = np.concatenate((heatmap,outs[0:outs.size()[0],groundTruth].data.numpy())) return heatmap.reshape((output_height, output_width))
def children(self): """ Returns an iterator for the non-empty children of the Node The children are returned as (Node, pos) tuples where pos is 0 for the left subnode and 1 for the right. >>> len(list(create(dimensions=2).children)) 0 >>> len(list(create([ Variable(torch.Tensor([[1, 2]])) ]).children)) 0 >>> len(list(create([ Variable(torch.Tensor([[2, 2]])), Variable(torch.Tensor([[2, 1]])), Variable(torch.Tensor([[2, 3]])) ]).children)) 2 """ if self.left and self.left.data is not None: yield self.left, 0 if self.right and self.right.data is not None: yield self.right, 1
def test(self, nb_episodes=1, maximum_episode_length=5000000): def evaluate_episode(): reward = 0 observation = self.env.reset() for _ in range(maximum_episode_length): action = self.choose_action(self.embedding_network(Variable(Tensor(observation)).unsqueeze(0)), 0) observation, immediate_reward, finished, info = self.env.step(action) reward += immediate_reward if finished: break return reward r = 0 for _ in range(nb_episodes): r += evaluate_episode() return r / nb_episodes
def forward(self, inputs): # set up batch size batch_size = inputs.size(0) # compute hidden and cell hidden = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda()) cell = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda()) hidden_cell = (hidden, cell) # recurrent neural networks outputs, _ = self.rnn.forward(inputs, hidden_cell) outputs = outputs[:, -1, :].contiguous() # compute features by outputs features = self.feature.forward(outputs) return features
def forward(self, inputs): # set up batch size batch_size = inputs.size(0) # compute hidden and cell hidden = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda()) cell = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda()) hidden_cell = (hidden, cell) # recurrent neural networks outputs, _ = self.rnn.forward(inputs, hidden_cell) outputs = outputs.contiguous().view(-1, self.hidden_size * 2) # compute classifications by outputs outputs = self.classifier.forward(outputs) outputs = F.softmax(outputs) outputs = outputs.view(batch_size, -1, self.num_classes) return outputs
def calc_gradient_penalty(netD, real_data, fake_data, sketch): alpha = torch.rand(opt.batchSize, 1, 1, 1) alpha = alpha.cuda() if opt.cuda else alpha interpolates = alpha * real_data + ((1 - alpha) * fake_data) if opt.cuda: interpolates = interpolates.cuda() interpolates = Variable(interpolates, requires_grad=True) disc_interpolates = netD(interpolates, Variable(sketch))[0] gradients = grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).cuda() if opt.cuda else torch.ones( disc_interpolates.size()), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * opt.gpW return gradient_penalty
def calc_gradient_penalty(netD, real_data, fake_data): # print "real_data: ", real_data.size(), fake_data.size() alpha = torch.rand(opt.batchSize, 1, 1, 1) # alpha = alpha.expand(opt.batchSize, real_data.nelement() / opt.batchSize).contiguous().view(opt.batchSize, 3, 64, # 64) alpha = alpha.cuda() if opt.cuda else alpha interpolates = alpha * real_data + ((1 - alpha) * fake_data) if opt.cuda: interpolates = interpolates.cuda() interpolates = Variable(interpolates, requires_grad=True) disc_interpolates = netD(interpolates) gradients = grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).cuda() if opt.cuda else torch.ones( disc_interpolates.size()), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * opt.gpW return gradient_penalty
def get_target_tensor(self, input, target_is_real): if target_is_real: create_label = ((self.real_label_var is None) or (self.real_label_var.numel() != input.numel())) if create_label: real_tensor = self.Tensor(input.size()).fill_(self.real_label) self.real_label_var = Variable(real_tensor, requires_grad=False) target_tensor = self.real_label_var else: create_label = ((self.fake_label_var is None) or (self.fake_label_var.numel() != input.numel())) if create_label: fake_tensor = self.Tensor(input.size()).fill_(self.fake_label) self.fake_label_var = Variable(fake_tensor, requires_grad=False) target_tensor = self.fake_label_var return target_tensor
def init_hidden(self, height, width): self.height = height self.width = width self.batch = height * width self.cell_state = Variable( torch.zeros( self.lstm_layer, self.batch, self.hidden_dim)) self.hidden_state = Variable( torch.zeros( self.lstm_layer, self.batch, self.hidden_dim)) if self.on_gpu: self.cell_state = self.cell_state.cuda() self.hidden_state = self.hidden_state.cuda()
def train(e, model, opt, dataset, arg, cuda=False): model.train() criterion = nn.MSELoss() losses = [] batcher = dataset.get_batcher(shuffle=True, augment=True) for b, (x, y) in enumerate(batcher, 1): x = V(th.from_numpy(x).float()).cuda() y = V(th.from_numpy(y).float()).cuda() opt.zero_grad() logit = model(x) loss = criterion(logit, y) loss.backward() opt.step() losses.append(loss.data[0]) if arg.verbose and b % 50 == 0: loss_t = np.mean(losses[:-49]) print('[train] [e]:%s [b]:%s - [loss]:%s' % (e, b, loss_t)) return losses
def validate(models, dataset, arg, cuda=False): criterion = nn.MSELoss() losses = [] batcher = dataset.get_batcher(shuffle=True, augment=False) for b, (x, y) in enumerate(batcher, 1): x = V(th.from_numpy(x).float()).cuda() y = V(th.from_numpy(y).float()).cuda() # Ensemble average logit = None for model, _ in models: model.eval() logit = model(x) if logit is None else logit + model(x) logit = th.div(logit, len(models)) loss = criterion(logit, y) losses.append(loss.data[0]) return np.mean(losses)
def predict(models, dataset, arg, cuda=False): prediction_file = open('save/predictions.txt', 'w') batcher = dataset.get_batcher(shuffle=False, augment=False) for b, (x, _) in enumerate(batcher, 1): x = V(th.from_numpy(x).float()).cuda() # Ensemble average logit = None for model, _ in models: model.eval() logit = model(x) if logit is None else logit + model(x) logit = th.div(logit, len(models)) prediction = logit.cpu().data[0][0] prediction_file.write('%s\n' % prediction) if arg.verbose and b % 100 == 0: print('[predict] [b]:%s - prediction: %s' % (b, prediction)) # prediction_file.close()
def bn_hat_z_layers(self, hat_z_layers, z_pre_layers): # TODO: Calculate batchnorm using GPU Tensors. assert len(hat_z_layers) == len(z_pre_layers) hat_z_layers_normalized = [] for i, (hat_z, z_pre) in enumerate(zip(hat_z_layers, z_pre_layers)): if self.use_cuda: ones = Variable(torch.ones(z_pre.size()[0], 1).cuda()) else: ones = Variable(torch.ones(z_pre.size()[0], 1)) mean = torch.mean(z_pre, 0) noise_var = np.random.normal(loc=0.0, scale=1 - 1e-10, size=z_pre.size()) if self.use_cuda: var = np.var(z_pre.data.cpu().numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1]) else: var = np.var(z_pre.data.numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1]) var = Variable(torch.FloatTensor(var)) if self.use_cuda: hat_z = hat_z.cpu() ones = ones.cpu() mean = mean.cpu() hat_z_normalized = torch.div(hat_z - ones.mm(mean), ones.mm(torch.sqrt(var + 1e-10))) if self.use_cuda: hat_z_normalized = hat_z_normalized.cuda() hat_z_layers_normalized.append(hat_z_normalized) return hat_z_layers_normalized
def evaluate_performance(ladder, valid_loader, e, agg_cost_scaled, agg_supervised_cost_scaled, agg_unsupervised_cost_scaled, args): correct = 0. total = 0. for batch_idx, (data, target) in enumerate(valid_loader): if args.cuda: data = data.cuda() data, target = Variable(data), Variable(target) output = ladder.forward_encoders_clean(data) # TODO: Do away with the below hack for GPU tensors. if args.cuda: output = output.cpu() target = target.cpu() output = output.data.numpy() preds = np.argmax(output, axis=1) target = target.data.numpy() correct += np.sum(target == preds) total += target.shape[0] print("Epoch:", e + 1, "\t", "Total Cost:", "{:.4f}".format(agg_cost_scaled), "\t", "Supervised Cost:", "{:.4f}".format(agg_supervised_cost_scaled), "\t", "Unsupervised Cost:", "{:.4f}".format(agg_unsupervised_cost_scaled), "\t", "Validation Accuracy:", correct / total)
def forward_noise(self, tilde_h): # z_pre will be used in the decoder cost z_pre = self.linear(tilde_h) z_pre_norm = self.bn_normalize(z_pre) # Add noise noise = np.random.normal(loc=0.0, scale=self.noise_level, size=z_pre_norm.size()) if self.use_cuda: noise = Variable(torch.cuda.FloatTensor(noise)) else: noise = Variable(torch.FloatTensor(noise)) # tilde_z will be used by decoder for reconstruction tilde_z = z_pre_norm + noise # store tilde_z in buffer self.buffer_tilde_z = tilde_z z = self.bn_gamma_beta(tilde_z) h = self.activation(z) return h
def evaluate(model, testloader, use_cuda=False): correct = 0 total = 0 for i, data in enumerate(testloader, 0): if i == 10: break inputs, targets = data inputs = inputs.unsqueeze(1) targets = target_onehot_to_classnum_tensor(targets) if use_cuda and cuda_ava: inputs = Variable(inputs.float().cuda()) targets = targets.cuda() else: inputs = Variable(inputs.float()) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += (predicted == targets).sum() print("Accuracy of the network is: %.5f %%" % (correct / total * 100)) return correct / total
def train(self, dataset): self.model.train() self.optimizer.zero_grad() total_loss = 0.0 indices = torch.randperm(len(dataset)) for idx in tqdm(range(len(dataset)),desc='Training epoch ' + str(self.epoch + 1) + ''): ltree, lsent, rtree, rsent, label = dataset[indices[idx]] linput, rinput = Var(lsent), Var(rsent) target = Var(map_label_to_target(label, dataset.num_classes)) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() output = self.model(ltree, linput, rtree, rinput) loss = self.criterion(output, target) total_loss += loss.data[0] loss.backward() if idx % self.args.batchsize == 0 and idx > 0: self.optimizer.step() self.optimizer.zero_grad() self.epoch += 1 return total_loss / len(dataset) # helper function for testing
def test(self, dataset): self.model.eval() total_loss = 0 predictions = torch.zeros(len(dataset)) indices = torch.arange(1, dataset.num_classes + 1) for idx in tqdm(range(len(dataset)),desc='Testing epoch ' + str(self.epoch) + ''): ltree, lsent, rtree, rsent, label = dataset[idx] linput, rinput = Var(lsent, volatile=True), Var(rsent, volatile=True) target = Var(map_label_to_target(label, dataset.num_classes), volatile=True) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() output = self.model(ltree, linput, rtree, rinput) loss = self.criterion(output, target) total_loss += loss.data[0] output = output.data.squeeze().cpu() predictions[idx] = torch.dot(indices, torch.exp(output)) return total_loss / len(dataset), predictions
def query(self, images): if self.pool_size == 0: return images return_images = [] for image in images.data: image = torch.unsqueeze(image, 0) if self.num_imgs < self.pool_size: self.num_imgs = self.num_imgs + 1 self.images.append(image) return_images.append(image) else: p = random.uniform(0, 1) if p > 0.5: random_id = random.randint(0, self.pool_size-1) tmp = self.images[random_id].clone() self.images[random_id] = image return_images.append(tmp) else: return_images.append(image) return_images = Variable(torch.cat(return_images, 0)) return return_images
def get_target_tensor(self, input, target_is_real): target_tensor = None if target_is_real: create_label = ((self.real_label_var is None) or (self.real_label_var.numel() != input.numel())) if create_label: real_tensor = self.Tensor(input.size()).fill_(self.real_label) self.real_label_var = Variable(real_tensor, requires_grad=False) target_tensor = self.real_label_var else: create_label = ((self.fake_label_var is None) or (self.fake_label_var.numel() != input.numel())) if create_label: fake_tensor = self.Tensor(input.size()).fill_(self.fake_label) self.fake_label_var = Variable(fake_tensor, requires_grad=False) target_tensor = self.fake_label_var return target_tensor
def train(self): for i, data in enumerate(self.dataset, self.iterations + 1): batch_input, batch_target = data self.call_plugins('batch', i, batch_input, batch_target) input_var = Variable(batch_input) target_var = Variable(batch_target) plugin_data = [None, None] def closure(): batch_output = self.model(input_var) loss = self.criterion(batch_output, target_var) loss.backward() if plugin_data[0] is None: plugin_data[0] = batch_output.data plugin_data[1] = loss.data return loss self.optimizer.zero_grad() self.optimizer.step(closure) self.call_plugins('iteration', i, batch_input, batch_target, *plugin_data) self.call_plugins('update', i, self.model) self.iterations += i
def __setattr__(self, name, value): _parameters = self.__dict__.get('_parameters') if isinstance(value, Parameter): if _parameters is None: raise AttributeError( "cannot assign parameter before Module.__init__() call") if value.creator: raise ValueError( "Cannot assign non-leaf Variable to parameter '{0}'. Model " "parameters must be created explicitly. To express '{0}' " "as a function of another variable, compute the value in " "the forward() method.".format(name)) _parameters[name] = value elif _parameters and name in _parameters: if value is not None: raise TypeError("cannot assign '{}' object to parameter '{}' " "(torch.nn.Parameter or None required)" .format(torch.typename(value), name)) _parameters[name] = value else: object.__setattr__(self, name, value)
def _test_dropout(self, cls, input): p = 0.2 input.fill_(1-p) module = cls(p) input_var = Variable(input, requires_grad=True) output = module(input_var) self.assertLess(abs(output.data.mean() - (1-p)), 0.05) output.backward(input) self.assertLess(abs(input_var.grad.mean() - (1-p)), 0.05) module = cls(p, True) input_var = Variable(input.clone(), requires_grad=True) output = module(input_var + 0) self.assertLess(abs(output.data.mean() - (1-p)), 0.05) output.backward(input) self.assertLess(abs(input_var.grad.mean() - (1-p)), 0.05) # Check that these don't raise errors module.__repr__() str(module)
def test_parallel_apply(self): l1 = nn.Linear(10, 5).float().cuda(0) l2 = nn.Linear(10, 5).float().cuda(1) i1 = Variable(torch.randn(2, 10).float().cuda(0)) i2 = Variable(torch.randn(2, 10).float().cuda(1)) expected1 = l1(i1).data expected2 = l2(i2).data inputs = (i1, i2) modules = (l1, l2) expected_outputs = (expected1, expected2) outputs = dp.parallel_apply(modules, inputs) for out, expected in zip(outputs, expected_outputs): self.assertEqual(out.data, expected) inputs = (i1, Variable(i2.data.new())) expected_outputs = (expected1, expected2.new())
def test_load_parameter_dict(self): l = nn.Linear(5, 5) block = nn.Container( conv=nn.Conv2d(3, 3, 3, bias=False) ) net = nn.Container( linear1=l, linear2=l, block=block, empty=None, ) param_dict = { 'linear1.weight': Variable(torch.ones(5, 5)), 'block.conv.bias': Variable(torch.range(1, 3)), } net.load_parameter_dict(param_dict) self.assertIs(net.linear1.weight, param_dict['linear1.weight']) self.assertIs(net.block.conv.bias, param_dict['block.conv.bias'])
def test_MaxUnpool2d_output_size(self): m = nn.MaxPool2d(3, stride=2, return_indices=True) mu = nn.MaxUnpool2d(3, stride=2) big_t = torch.rand(1, 1, 6, 6) big_t[0][0][4][4] = 100 output_big, indices_big = m(Variable(big_t)) self.assertRaises(RuntimeError, lambda: mu(output_big, indices_big)) small_t = torch.rand(1, 1, 5, 5) for i in range(0, 4, 2): for j in range(0, 4, 2): small_t[:,:,i,j] = 100 output_small, indices_small = m(Variable(small_t)) for h in range(3, 10): for w in range(3, 10): if 4 <= h <= 6 and 4 <= w <= 6: size = (h, w) if h == 5: size = torch.LongStorage(size) elif h == 6: size = torch.LongStorage((1, 1) + size) mu(output_small, indices_small, output_size=size) else: self.assertRaises(ValueError, lambda: mu(output_small, indices_small, (h, w)))
def _test_basic_cases_template(self, weight, bias, input, constructor): weight = Variable(weight, requires_grad=True) bias = Variable(bias, requires_grad=True) input = Variable(input, requires_grad=False) optimizer = constructor(weight, bias) def fn(): y = weight.mv(input) if y.is_cuda and bias.is_cuda and y.get_device() != bias.get_device(): y = y.cuda(bias.get_device()) return (y + bias).abs().sum() initial_value = fn().data[0] for i in range(200): weight.grad.zero_() bias.grad.zero_() fn().backward() optimizer.step() self.assertLessEqual(fn().data[0], initial_value)
def _numerical_jacobian(self, module, input, jacobian_input=True, jacobian_parameters=True): output = self._forward(module, input) output_size = output.nelement() if jacobian_parameters: param, d_param = self._get_parameters(module) def fw(input): out = self._forward(module, input) if isinstance(out, Variable): return out.data return out res = tuple() # TODO: enable non-contig tests input = contiguous(input) if jacobian_input: res += get_numerical_jacobian(fw, input, input), if jacobian_parameters: res += torch.cat(list(get_numerical_jacobian(fw, input, p) for p in param), 0), return res
def __call__(self, test_case): module = self.constructor(*self.constructor_args) input = self._get_input() if self.reference_fn is not None: out = test_case._forward(module, input) if isinstance(out, Variable): out = out.data ref_input = self._unpack_input(deepcopy(input)) expected_out = self.reference_fn(ref_input, test_case._get_parameters(module)[0]) test_case.assertEqual(out, expected_out) # TODO: do this with in-memory files as soon as torch.save will support it with TemporaryFile() as f: test_case._forward(module, input) torch.save(module, f) f.seek(0) module_copy = torch.load(f) test_case.assertEqual(test_case._forward(module, input), test_case._forward(module_copy, input)) self._do_test(test_case, module, input)
def __call__(self, test_case): module = self.constructor(*self.constructor_args) input = self._get_input() # Check that these methods don't raise errors module.__repr__() str(module) if self.reference_fn is not None: out = test_case._forward_criterion(module, input, self.target) target = self.target if isinstance(target, Variable): target = target.data expected_out = self.reference_fn(deepcopy(self._unpack_input(input)), deepcopy(target), module) test_case.assertEqual(out, expected_out) test_case.check_criterion_jacobian(module, input, self.target)
def to_gpu(obj, type_map={}): if torch.is_tensor(obj): t = type_map.get(type(obj), get_gpu_type(type(obj))) return obj.clone().type(t) elif torch.is_storage(obj): return obj.new().resize_(obj.size()).copy_(obj) elif isinstance(obj, Variable): assert obj.creator is None t = type_map.get(type(obj.data), get_gpu_type(type(obj.data))) return Variable(obj.data.clone().type(t), requires_grad=obj.requires_grad) elif isinstance(obj, list): return [to_gpu(o, type_map) for o in obj] elif isinstance(obj, tuple): return tuple(to_gpu(o, type_map) for o in obj) else: return deepcopy(obj)
def forward(self, input): input_torch = torch.from_numpy(input) if self.use_gpu: input_torch = input_torch.cuda() else: input_torch = input_torch.float() input_var = Variable(input_torch) # forward out = self.model.forward(input_var) if type(out) is list: clean_out = [] for v in out: clean_out.append(v.data.cpu().numpy()) out = clean_out else: out = out.data.cpu().numpy() self.ready = True return out
def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes # TODO: implement __call__ in PriorBox self.priorbox = PriorBox(v2) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = 300 # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax() self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
def fast_heat_similarity_matrix(X, sigma): """ PyTorch based similarity calculation :param X: the matrix with the data :param sigma: scaling factor :return: the similarity matrix """ use_gpu = False # Use GPU if available if torch.cuda.device_count() > 0: use_gpu = True X = Variable(torch.from_numpy(np.float32(X))) sigma = Variable(torch.from_numpy(np.float32([sigma]))) if use_gpu: X, sigma = X.cuda(), sigma.cuda() D = sym_heat_similarity_matrix(X, sigma) if use_gpu: D = D.cpu() return D.data.numpy()
def test(netG, opt): assert opt.netG != '' test_dir = opt.testdata_dir for f in os.listdir(test_dir): fname, ext = os.path.splitext(f) if ext == '.cmp': print(fname) cmp_file = os.path.join(test_dir, f) ac_data = read_binary_file(cmp_file, dim=47) ac_data = torch.FloatTensor(ac_data) noise = torch.FloatTensor(ac_data.size(0), nz) if opt.cuda: ac_data, noise = ac_data.cuda(), noise.cuda() ac_data = Variable(ac_data) noise = Variable(noise) noise.data.normal_(0, 1) generated_pulses = netG(noise, ac_data) generated_pulses = generated_pulses.data.cpu().numpy() generated_pulses = generated_pulses.reshape(ac_data.size(0), -1) out_file = os.path.join(test_dir, fname + '.pls') with open(out_file, 'wb') as fid: generated_pulses.tofile(fid)