我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.nn.functional.cross_entropy()。
def train(epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = Variable(data, requires_grad=True), Variable(target) optimizer.zero_grad() output = model(data) loss = F.cross_entropy(output, target) loss.backward() optimizer.step() # Display the gradients plt.clf() plt.subplot(211); plt.hist(forward_grad.ravel()); plt.title("Features magnitude") plt.subplot(212); plt.hist(backward_grad.ravel()); plt.title("Gradients") plt.show(block=False) plt.pause(0.01) print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0]))
def forward(self, model, sample): """Compute the loss for the given sample. Returns a tuple with three elements: 1) the loss, as a Variable 2) the sample size, which is used as the denominator for the gradient 3) logging outputs to display while training """ net_output = model(**sample['net_input']) input = net_output.view(-1, net_output.size(-1)) target = sample['target'].view(-1) loss = F.cross_entropy(input, target, size_average=False, ignore_index=self.padding_idx) sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens'] logging_output = { 'loss': loss.data[0], 'sample_size': sample_size, } return loss, sample_size, logging_output
def train_epoch(self, epoch): self.model.train() total_loss = 0 for batch_idx, batch in enumerate(self.train_loader): self.optimizer.zero_grad() output = self.model(batch.sentence_1, batch.sentence_2, batch.ext_feats) loss = F.cross_entropy(output, batch.label, size_average=False) total_loss += loss.data[0] loss.backward() self.optimizer.step() if batch_idx % self.log_interval == 0: self.logger.info('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, min(batch_idx * self.batch_size, len(batch.dataset.examples)), len(batch.dataset.examples), 100. * batch_idx / (len(self.train_loader)), loss.data[0]) ) average_loss, mean_average_precision, mean_reciprocal_rank = self.evaluate(self.train_evaluator, 'train') if self.use_tensorboard: self.writer.add_scalar('{}/train/cross_entropy_loss'.format(self.train_loader.dataset.NAME), average_loss, epoch) self.writer.add_scalar('{}/train/map'.format(self.train_loader.dataset.NAME), mean_average_precision, epoch) self.writer.add_scalar('{}/train/mrr'.format(self.train_loader.dataset.NAME), mean_reciprocal_rank, epoch) return total_loss
def get_scores(self): self.model.eval() test_cross_entropy_loss = 0 qids = [] true_labels = [] predictions = [] for batch in self.data_loader: qids.extend(batch.id.data.cpu().numpy()) output = self.model(batch.sentence_1, batch.sentence_2, batch.ext_feats) test_cross_entropy_loss += F.cross_entropy(output, batch.label, size_average=False).data[0] true_labels.extend(batch.label.data.cpu().numpy()) predictions.extend(output.data.exp()[:, 1].cpu().numpy()) del output qids = list(map(lambda n: int(round(n * 10, 0)) / 10, qids)) mean_average_precision, mean_reciprocal_rank = get_map_mrr(qids, predictions, true_labels, self.data_loader.device) test_cross_entropy_loss /= len(batch.dataset.examples) return [test_cross_entropy_loss, mean_average_precision, mean_reciprocal_rank], ['cross entropy loss', 'map', 'mrr']
def eval(data_iter, model, args): model.eval() corrects, avg_loss = 0, 0 for batch in data_iter: feature, target = batch.text, batch.label feature.data.t_(), target.data.sub_(1) # batch first, index align if args.cuda: feature, target = feature.cuda(), target.cuda() logit = model(feature) loss = F.cross_entropy(logit, target, size_average=False) avg_loss += loss.data[0] corrects += (torch.max(logit, 1) [1].view(target.size()).data == target.data).sum() size = len(data_iter.dataset) avg_loss = avg_loss/size accuracy = 100.0 * corrects/size model.train() print('\nEvaluation - loss: {:.6f} acc: {:.4f}%({}/{}) \n'.format(avg_loss, accuracy, corrects, size))
def eval(data_iter, model, args): model.eval() corrects, avg_loss = 0, 0 for batch in data_iter: feature, target = batch.text, batch.label feature.data.t_(), target.data.sub_(1) # batch first, index align if args.cuda: feature, target = feature.cuda(), target.cuda() logit = model(feature) loss = F.cross_entropy(logit, target, size_average=True) avg_loss += loss.data[0] corrects += (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum() size = len(data_iter.dataset) avg_loss = loss.data[0]/size accuracy = 100.0 * corrects/size model.train() print('\nEvaluation - loss: {:.6f} acc: {:.4f}%({}/{}) \n'.format(avg_loss, accuracy, corrects, size))
def build_loss(self, rpn_cls_score_reshape, rpn_bbox_pred, rpn_data): # classification loss rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(-1, 2) rpn_label = rpn_data[0].view(-1) rpn_keep = Variable(rpn_label.data.ne(-1).nonzero().squeeze()).cuda() rpn_cls_score = torch.index_select(rpn_cls_score, 0, rpn_keep) rpn_label = torch.index_select(rpn_label, 0, rpn_keep) fg_cnt = torch.sum(rpn_label.data.ne(0)) rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label) # box loss rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:] rpn_bbox_targets = torch.mul(rpn_bbox_targets, rpn_bbox_inside_weights) rpn_bbox_pred = torch.mul(rpn_bbox_pred, rpn_bbox_inside_weights) rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) / (fg_cnt + 1e-4) return rpn_cross_entropy, rpn_loss_box
def __init__(self, classes=None, debug=False): super(FasterRCNN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) self.rpn = RPN() self.roi_pool = RoIPool(7, 7, 1.0/16) self.fc6 = FC(512 * 7 * 7, 4096) self.fc7 = FC(4096, 4096) self.score_fc = FC(4096, self.n_classes, relu=False) self.bbox_fc = FC(4096, self.n_classes * 4, relu=False) # loss self.cross_entropy = None self.loss_box = None # for log self.debug = debug
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None): features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas) if self.training: roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes) rois = roi_data[0] # roi pool pooled_features = self.roi_pool(features, rois) x = pooled_features.view(pooled_features.size()[0], -1) x = self.fc6(x) x = F.dropout(x, training=self.training) x = self.fc7(x) x = F.dropout(x, training=self.training) cls_score = self.score_fc(x) cls_prob = F.softmax(cls_score) bbox_pred = self.bbox_fc(x) if self.training: self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data) return cls_prob, bbox_pred, rois
def compute_loss(self, y, t): arc_logits, label_logits = y true_arcs, true_labels = t.T b, l1, l2 = arc_logits.size() true_arcs = _model_var( self.model, pad_sequence(true_arcs, padding=-1, dtype=np.int64)) arc_loss = F.cross_entropy( arc_logits.view(b * l1, l2), true_arcs.view(b * l1), ignore_index=-1) b, l1, d = label_logits.size() true_labels = _model_var( self.model, pad_sequence(true_labels, padding=-1, dtype=np.int64)) label_loss = F.cross_entropy( label_logits.view(b * l1, d), true_labels.view(b * l1), ignore_index=-1) loss = arc_loss + label_loss return loss
def build_loss_objectiveness(self, region_objectiveness, targets): loss_objectiveness = F.cross_entropy(region_objectiveness, targets) maxv, predict = region_objectiveness.data.max(1) labels = targets.squeeze() fg_cnt = torch.sum(labels.data.ne(0)) bg_cnt = labels.data.numel() - fg_cnt if fg_cnt > 0: self.tp_reg = torch.sum(predict[:fg_cnt].eq(labels.data[:fg_cnt])) else: self.tp_reg = 0. if bg_cnt > 0: self.tf_reg = torch.sum(predict[fg_cnt:].eq(labels.data[fg_cnt:])) else: self.tp_reg = 0. self.fg_cnt_reg = fg_cnt self.bg_cnt_reg = bg_cnt return loss_objectiveness
def compute_loss(self, output_logprobs, y): """ Compute loss. We assume that the first element of the output sequence y is a start token, and that each element of y is left-aligned and right-padded with self.NULL out to T_out. We want the output_logprobs to predict the sequence y, shifted by one timestep so that y[0] is fed to the network and then y[1] is predicted. We also don't want to compute loss for padded timesteps. Inputs: - output_logprobs: Variable of shape (N, T_out, V_out) - y: LongTensor Variable of shape (N, T_out) """ self.multinomial_outputs = None V_in, V_out, D, H, L, N, T_in, T_out = self.get_dims(y=y) mask = y.data != self.NULL y_mask = Variable(torch.Tensor(N, T_out).fill_(0).type_as(mask)) y_mask[:, 1:] = mask[:, 1:] y_masked = y[y_mask] out_mask = Variable(torch.Tensor(N, T_out).fill_(0).type_as(mask)) out_mask[:, :-1] = mask[:, 1:] out_mask = out_mask.view(N, T_out, 1).expand(N, T_out, V_out) out_masked = output_logprobs[out_mask].view(-1, V_out) loss = F.cross_entropy(out_masked, y_masked) return loss
def forward(self, predict, target, weight=None): """ Args: predict:(n, c, h, w) target:(n, h, w) weight (Tensor, optional): a manual rescaling weight given to each class. If given, has to be a Tensor of size "nclasses" """ assert not target.requires_grad assert predict.dim() == 4 assert target.dim() == 3 assert predict.size(0) == target.size(0), "{0} vs {1} ".format(predict.size(0), target.size(0)) assert predict.size(2) == target.size(1), "{0} vs {1} ".format(predict.size(2), target.size(1)) assert predict.size(3) == target.size(2), "{0} vs {1} ".format(predict.size(3), target.size(3)) n, c, h, w = predict.size() target_mask = (target >= 0) * (target != self.ignore_label) target = target[target_mask] predict = predict.transpose(1, 2).transpose(2, 3).contiguous() predict = predict[target_mask.view(n, h, w, 1).repeat(1, 1, 1, c)].view(-1, c) loss = F.cross_entropy(predict, target, weight=weight, size_average=self.size_average) return loss
def train(model, generator, batch_num, epoch): model.train() for batch_idx in range(batch_num): data, target = next(generator) data, target = torch.from_numpy(data), torch.from_numpy(target) # convert BHWC to BCHW data = data.permute(0, 3, 1, 2) data, target = data.float().cuda(), target.long().cuda() data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = F.cross_entropy(output, target) loss.backward() optimizer.step() print('Train Epoch: {}\tLoss: {:.6f}'.format(epoch, loss.data[0]))
def test(model, generator, batch_num, epoch): model.eval() test_loss = 0 correct = 0 for batch_idx in range(batch_num): data, target = next(generator) data, target = torch.from_numpy(data), torch.from_numpy(target) # convert BHWC to BCHW data = data.permute(0, 3, 1, 2) data, target = data.float().cuda(), target.long().cuda() data, target = Variable(data), Variable(target) output = model(data) test_loss += F.cross_entropy(output, target).data[0] pred = output.data.max(1)[1] # get the index of the max log-probability correct += pred.eq(target.data).cpu().sum() test_loss /= batch_num# loss function already averages over batch size print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format( test_loss, correct, n_test, 100. * correct / n_test)) # --- # Normal CNN
def forward(self, predict, target, weight=None): """ Args: predict:(n, c, h, w) target:(n, h, w) weight (Tensor, optional): a manual rescaling weight given to each class. If given, has to be a Tensor of size "nclasses" """ assert not target.requires_grad assert predict.dim() == 4 assert target.dim() == 3 assert predict.size(0) == target.size(0), "{0} vs {1} ".format(predict.size(0), target.size(0)) assert predict.size(2) == target.size(1), "{0} vs {1} ".format(predict.size(2), target.size(1)) assert predict.size(3) == target.size(2), "{0} vs {1} ".format(predict.size(3), target.size(3)) n, c, h, w = predict.size() target_mask = (target >= 0) * (target != self.ignore_label) target = target[target_mask] if not target.data.dim(): return Variable(torch.zeros(1)) predict = predict.transpose(1, 2).transpose(2, 3).contiguous() predict = predict[target_mask.view(n, h, w, 1).repeat(1, 1, 1, c)].view(-1, c) loss = F.cross_entropy(predict, target, weight=weight, size_average=self.size_average) return loss
def __init__(self, classes=None, debug=False): super(FasterRCNN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) self.rpn = RPN() self.roi_pool = RoIPool(7, 7, 1.0/16) self.fc6 = FC(1024 * 7 * 7, 4096) self.fc7 = FC(4096, 4096) self.score_fc = FC(4096, self.n_classes, relu=False) self.bbox_fc = FC(4096, self.n_classes * 4, relu=False) # loss self.cross_entropy = None self.loss_box = None # for log self.debug = debug
def __init__(self, classes=None, debug=False): super(RFCN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) self.rpn = RPN() #self.psroi_pool = PSRoIPool(7,7,1.0/16,7,15) This is for test self.psroi_pool_cls = PSRoIPool(7,7, 1.0/16, 7, self.n_classes) self.psroi_pool_loc = PSRoIPool(7,7, 1.0/16, 7, 8) self.new_conv = Conv2d(512, 1024, 1, same_padding=False) self.rfcn_score = Conv2d(1024,7*7*8, 1,1, bn=False) self.rfcn_bbox = Conv2d(1024, 7*7*self.n_classes,1,1,bn=False) self.bbox_pred = nn.AvgPool2d((7,7),stride=(7,7)) self.cls_score = nn.AvgPool2d((7,7),stride=(7,7)) # loss self.cross_entropy = None self.loss_box = None # for log self.debug = debug
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None): features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas) if self.training: roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes) rois = roi_data[0] # roi pool conv_new1 = self.new_conv(features) r_score_map = self.rfcn_score(conv_new1) r_bbox_map = self.rfcn_bbox(conv_new1) psroi_pooled_cls = self.psroi_pool_cls(r_score_map, rois) psroi_pooled_loc = self.psroi_pool_loc(r_bbox_map, rois) bbox_pred = self.bbox_pred(psroi_pooled_loc) bbox_pred = torch.squeeze(bbox_pred) cls_score = self.cls_score(psroi_pooled_cls) cls_score = torch.squeeze(cls_score) cls_prob = F.softmax(cls_score) if self.training: self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data) return cls_prob, bbox_pred, rois
def loss(self,examples): # IMPORTANT: Sort the examples by their size. recurrent network stuff needs this examples.sort(key = lambda e: len(e.tokens), reverse = True) x = variable(np.array([ e.sequence.draw() for e in examples], dtype = np.float32)) x = x.unsqueeze(1) # insert the channel imageFeatures = self.encoder(x) inputs, sizes, T = self.decoder.buildCaptions([ e.tokens for e in examples ]) outputDistributions = self.decoder(imageFeatures, inputs, sizes) T = pack_padded_sequence(T, sizes, batch_first = True)[0] return F.cross_entropy(outputDistributions, T)
def validater(self, batch_loader): def validate(batch_size, use_cuda): input = batch_loader.next_batch(batch_size, 'valid') input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target] = input logits, _, kld = self(0., encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, z=None) logits = logits.view(-1, self.params.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) return cross_entropy, kld return validate
def train(): net.train() loss_avg = 0.0 for batch_idx, (data, target) in enumerate(train_loader): data, target = torch.autograd.Variable(data.cuda()), torch.autograd.Variable(target.cuda()) # forward output = net(data) # backward optimizer.zero_grad() loss = F.cross_entropy(output, target) loss.backward() optimizer.step() # exponential moving average loss_avg = loss_avg * 0.2 + loss.data[0] * 0.8 state['train_loss'] = loss_avg # test function (forward only)
def test(): net.eval() loss_avg = 0.0 correct = 0 for batch_idx, (data, target) in enumerate(test_loader): data, target = torch.autograd.Variable(data.cuda()), torch.autograd.Variable(target.cuda()) # forward output = net(data) loss = F.cross_entropy(output, target) # accuracy pred = output.data.max(1)[1] correct += pred.eq(target.data).sum() # test loss average loss_avg += loss.data[0] state['test_loss'] = loss_avg / len(test_loader) state['test_accuracy'] = correct / len(test_loader.dataset) # Main loop
def get_attr_loss(output, attributes, flip, params): """ Compute attributes loss. """ assert type(flip) is bool k = 0 loss = 0 for (_, n_cat) in params.attr: # categorical x = output[:, k:k + n_cat].contiguous() y = attributes[:, k:k + n_cat].max(1)[1].view(-1) if flip: # generate different categories shift = torch.LongTensor(y.size()).random_(n_cat - 1) + 1 y = (y + Variable(shift.cuda())) % n_cat loss += F.cross_entropy(x, y) k += n_cat return loss
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None): features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas) if self.training: roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes) rois = roi_data[0] # roi pool pooled_features = self.roi_pool(features, rois) x = pooled_features.view(pooled_features.size()[0], -1) # x = self.fc6(x) # x = F.dropout(x, training=self.training) # x = self.fc7(x) # x = F.dropout(x, training=self.training) x = self.fcs(x) cls_score = self.score_fc(x) cls_prob = F.softmax(cls_score) bbox_pred = self.bbox_fc(x) if self.training: self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data) return cls_prob, bbox_pred, rois
def train_model(self, train_loader, path, num_batch): self.train() fitness = 0 train_len = 0 for batch_idx, (data, target) in enumerate(train_loader): if self.args.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) self.optimizer.zero_grad() output = self(data, path, -1) pred = output.data.max(1)[1] # get the index of the max log-probability fitness += pred.eq(target.data).cpu().sum() train_len += len(target.data) loss = F.cross_entropy(output, target) loss.backward() self.optimizer.step() if not batch_idx < num_batch -1: break fitness = fitness / train_len return fitness
def train_step(self, blobs, train_op): self.forward(blobs['data'], blobs['im_info'], blobs['gt_boxes']) rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, loss = self._losses["rpn_cross_entropy"].data[0], \ self._losses['rpn_loss_box'].data[0], \ self._losses['cross_entropy'].data[0], \ self._losses['loss_box'].data[0], \ self._losses['total_loss'].data[0] #utils.timer.timer.tic('backward') train_op.zero_grad() self._losses['total_loss'].backward() #utils.timer.timer.toc('backward') train_op.step() self.delete_intermediate_states() return rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, loss
def ours_train(m, x, labels, data, att_crit=None, optimizers=None): """ Train the direct attribute prediction model :param m: Model we're using :param x: [batch_size, 3, 224, 224] Image input :param labels: [batch_size] variable with indices of the right verbs :param embeds: [vocab_size, 300] Variables with embeddings of all of the verbs :param atts_matrix: [vocab_size, att_dim] matrix with GT attributes of the verbs :param att_crit: AttributeLoss module that computes the loss :param optimizers: the decorator will use these to update parameters :return: """ logits = ours_logits(m, x, data, att_crit=att_crit) loss = m.l2_penalty if len(logits) == 1: loss += F.cross_entropy(logits[0], labels, size_average=True) else: sum_logits = sum(logits) for l in logits: loss += F.cross_entropy(l, labels, size_average=True)/(len(logits)+1) loss += F.cross_entropy(sum_logits, labels, size_average=True)/(len(logits)+1) return loss
def crossentropyloss(logits, label): mask = (label.view(-1) != VOID_LABEL) nonvoid = mask.long().sum() if nonvoid == 0: # only void pixels, the gradients should be 0 return logits.sum() * 0. # if nonvoid == mask.numel(): # # no void pixel, use builtin # return F.cross_entropy(logits, Variable(label)) target = label.view(-1)[mask] C = logits.size(1) logits = logits.permute(0, 2, 3, 1) # B, H, W, C logits = logits.contiguous().view(-1, C) mask2d = mask.unsqueeze(1).expand(mask.size(0), C).contiguous().view(-1) logits = logits[mask2d].view(-1, C) loss = F.cross_entropy(logits, Variable(target)) return loss
def train(epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = F.cross_entropy(output, target) loss.backward() optimizer.step() print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0]))
def test(): model.eval() test_loss = 0 correct = 0 for batch_idx, (data, target) in enumerate(test_loader): data, target = Variable(data, volatile=True), Variable(target) output = model(data) test_loss += F.cross_entropy(output, target, size_average=False).data[0] # sum up batch loss pred = output.data.max(1)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).cpu().sum() test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
def forward(self, output, target): cross_entropy = F.cross_entropy(output, target) cross_entropy_log = torch.log(cross_entropy) focal_loss = -((1 - cross_entropy) ** self.focusing_param) * cross_entropy_log balanced_focal_loss = self.balance_param * focal_loss return balanced_focal_loss
def train(epoch): color_model.train() try: for batch_idx, (data, classes) in enumerate(train_loader): messagefile = open('./message.txt', 'a') original_img = data[0].unsqueeze(1).float() img_ab = data[1].float() if have_cuda: original_img = original_img.cuda() img_ab = img_ab.cuda() classes = classes.cuda() original_img = Variable(original_img) img_ab = Variable(img_ab) classes = Variable(classes) optimizer.zero_grad() class_output, output = color_model(original_img, original_img) ems_loss = torch.pow((img_ab - output), 2).sum() / torch.from_numpy(np.array(list(output.size()))).prod() cross_entropy_loss = 1/300 * F.cross_entropy(class_output, classes) loss = ems_loss + cross_entropy_loss lossmsg = 'loss: %.9f\n' % (loss.data[0]) messagefile.write(lossmsg) ems_loss.backward(retain_variables=True) cross_entropy_loss.backward() optimizer.step() if batch_idx % 500 == 0: message = 'Train Epoch:%d\tPercent:[%d/%d (%.0f%%)]\tLoss:%.9f\n' % ( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0]) messagefile.write(message) torch.save(color_model.state_dict(), 'colornet_params.pkl') messagefile.close() # print('Train Epoch: {}[{}/{}({:.0f}%)]\tLoss: {:.9f}\n'.format( # epoch, batch_idx * len(data), len(train_loader.dataset), # 100. * batch_idx / len(train_loader), loss.data[0])) except Exception: logfile = open('log.txt', 'w') logfile.write(traceback.format_exc()) logfile.close() finally: torch.save(color_model.state_dict(), 'colornet_params.pkl')
def train(train_iter, dev_iter, model, args): if args.cuda: model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) steps = 0 model.train() for epoch in range(1, args.epochs+1): for batch in train_iter: feature, target = batch.text, batch.label feature.data.t_(), target.data.sub_(1) # batch first, index align if args.cuda: feature, target = feature.cuda(), target.cuda() optimizer.zero_grad() logit = model(feature) #print('logit vector', logit.size()) #print('target vector', target.size()) loss = F.cross_entropy(logit, target) loss.backward() optimizer.step() steps += 1 if steps % args.log_interval == 0: corrects = (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum() accuracy = 100.0 * corrects/batch.batch_size sys.stdout.write( '\rBatch[{}] - loss: {:.6f} acc: {:.4f}%({}/{})'.format(steps, loss.data[0], accuracy, corrects, batch.batch_size)) if steps % args.test_interval == 0: eval(dev_iter, model, args) if steps % args.save_interval == 0: if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) save_prefix = os.path.join(args.save_dir, 'snapshot') save_path = '{}_steps{}.pt'.format(save_prefix, steps) torch.save(model, save_path)
def forward(self, inputs, targets): inputs = oim(inputs, targets, self.lut, momentum=self.momentum) inputs *= self.scalar loss = F.cross_entropy(inputs, targets, weight=self.weight, size_average=self.size_average) return loss, inputs
def eval(data_iter, model, args, scheduler): model.eval() corrects, avg_loss = 0, 0 for batch in data_iter: feature, target = batch.text, batch.label target.data.sub_(1) # feature.data.t_(), target.data.sub_(1) # batch first, index align # feature.data.t_(),\ # target.data.sub_(1) # batch first, index align # target = autograd.Variable(target) if args.cuda is True: feature, target = feature.cuda(), target.cuda() model.hidden = model.init_hidden(args.lstm_num_layers, args.batch_size) if feature.size(1) != args.batch_size: # continue model.hidden = model.init_hidden(args.lstm_num_layers, feature.size(1)) logit = model(feature) loss = F.cross_entropy(logit, target, size_average=False) # scheduler.step(loss.data[0]) avg_loss += loss.data[0] corrects += (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum() size = len(data_iter.dataset) avg_loss = loss.data[0]/size accuracy = float(corrects)/size * 100.0 model.train() print('\nEvaluation - loss: {:.6f} acc: {:.4f}%({}/{}) \n'.format(avg_loss, accuracy, corrects, size))
def eval(data_iter, model, args, scheduler): model.eval() corrects, avg_loss = 0, 0 for batch in data_iter: feature, target = batch.text, batch.label feature.data.t_(), target.data.sub_(1) # batch first, index align if args.cuda: feature, target = feature.cuda(), feature.cuda() logit = model(feature) loss = F.cross_entropy(logit, target, size_average=False) # scheduler.step(loss.data[0]) # if args.init_clip_max_norm is not None: # # print("aaaa {} ".format(args.init_clip_max_norm)) # utils.clip_grad_norm(model.parameters(), max_norm=args.init_clip_max_norm) avg_loss += loss.data[0] corrects += (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum() size = len(data_iter.dataset) avg_loss = loss.data[0]/size # accuracy = float(corrects)/size * 100.0 accuracy = 100.0 * corrects/size model.train() print('\nEvaluation - loss: {:.6f} acc: {:.4f}%({}/{}) \n'.format(avg_loss, accuracy, corrects, size))
def __init__(self): super(RPN, self).__init__() self.features = VGG16(bn=False) self.conv1 = Conv2d(512, 512, 3, same_padding=True) self.score_conv = Conv2d(512, len(self.anchor_scales) * 3 * 2, 1, relu=False, same_padding=False) self.bbox_conv = Conv2d(512, len(self.anchor_scales) * 3 * 4, 1, relu=False, same_padding=False) # loss self.cross_entropy = None self.los_box = None
def loss(self): return self.cross_entropy + self.loss_box * 10
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None): im_data = network.np_to_variable(im_data, is_cuda=True) im_data = im_data.permute(0, 3, 1, 2) features = self.features(im_data) rpn_conv1 = self.conv1(features) # rpn score rpn_cls_score = self.score_conv(rpn_conv1) rpn_cls_score_reshape = self.reshape_layer(rpn_cls_score, 2) rpn_cls_prob = F.softmax(rpn_cls_score_reshape) rpn_cls_prob_reshape = self.reshape_layer(rpn_cls_prob, len(self.anchor_scales)*3*2) # rpn boxes rpn_bbox_pred = self.bbox_conv(rpn_conv1) # proposal layer cfg_key = 'TRAIN' if self.training else 'TEST' rois = self.proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, self._feat_stride, self.anchor_scales) # generating training labels and build the rpn loss if self.training: assert gt_boxes is not None rpn_data = self.anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, self._feat_stride, self.anchor_scales) self.cross_entropy, self.loss_box = self.build_loss(rpn_cls_score_reshape, rpn_bbox_pred, rpn_data) return features, rois
def build_loss(self, cls_score, bbox_pred, roi_data): # classification loss label = roi_data[1].squeeze() fg_cnt = torch.sum(label.data.ne(0)) bg_cnt = label.data.numel() - fg_cnt # for log if self.debug: maxv, predict = cls_score.data.max(1) self.tp = torch.sum(predict[:fg_cnt].eq(label.data[:fg_cnt])) if fg_cnt > 0 else 0 self.tf = torch.sum(predict[fg_cnt:].eq(label.data[fg_cnt:])) self.fg_cnt = fg_cnt self.bg_cnt = bg_cnt ce_weights = torch.ones(cls_score.size()[1]) ce_weights[0] = float(fg_cnt) / bg_cnt ce_weights = ce_weights.cuda() cross_entropy = F.cross_entropy(cls_score, label, weight=ce_weights) # bounding box regression L1 loss bbox_targets, bbox_inside_weights, bbox_outside_weights = roi_data[2:] bbox_targets = torch.mul(bbox_targets, bbox_inside_weights) bbox_pred = torch.mul(bbox_pred, bbox_inside_weights) loss_box = F.smooth_l1_loss(bbox_pred, bbox_targets, size_average=False) / (fg_cnt + 1e-4) return cross_entropy, loss_box
def build_loss_cls(self, cls_score, labels): labels = labels.squeeze() fg_cnt = torch.sum(labels.data.ne(0)) bg_cnt = labels.data.numel() - fg_cnt ce_weights = np.sqrt(self.predicate_loss_weight) ce_weights[0] = float(fg_cnt) / (bg_cnt + 1e-5) ce_weights = ce_weights.cuda() # print '[relationship]:' # print 'ce_weights:' # print ce_weights # print 'cls_score:' # print cls_score # print 'labels' # print labels ce_weights = ce_weights.cuda() cross_entropy = F.cross_entropy(cls_score, labels, weight=ce_weights) maxv, predict = cls_score.data.max(1) # if DEBUG: # print '[predicate]:' # if predict.sum() > 0: # print predict # print 'labels' # print labels if fg_cnt == 0: tp = 0 else: tp = torch.sum(predict[bg_cnt:].eq(labels.data[bg_cnt:])) tf = torch.sum(predict[:bg_cnt].eq(labels.data[:bg_cnt])) fg_cnt = fg_cnt bg_cnt = bg_cnt return cross_entropy, tp, tf, fg_cnt, bg_cnt
def eval(data_iter, model, args): model.eval() corrects, avg_loss = 0, 0 for batch in data_iter: feature, target = batch.text, batch.labels target.data.sub_(1) # batch first, index align x = feature.data.numpy() x = x.T feature = autograd.Variable(torch.from_numpy(x)) if args.cuda: feature, target = feature.cuda(), target.cuda() logit = model(feature) loss = F.cross_entropy(logit, target, size_average=False) avg_loss += loss.data[0] corrects += (torch.max(logit, 1) [1].view(target.size()).data == target.data).sum() size = len(data_iter.dataset) avg_loss = loss.data[0] / size accuracy = 100.0 * corrects / size model.train() print('\nEvaluation - loss: {:.6f} acc: {:.4f}%({}/{}) \n'.format(avg_loss, accuracy, corrects, size)) return accuracy, avg_loss
def eval(data_iter, model, args): model.eval() corrects, avg_loss = 0, 0 for batch in data_iter: feature, target = batch.text, batch.labels target.data.sub_(1) # batch first, index align x = feature.data.numpy() x = x.T feature = autograd.Variable(torch.from_numpy(x)) if args.cuda: feature, target = feature.cuda(), target.cuda() logit = model(feature) loss = F.cross_entropy(logit, target, size_average=False) avg_loss += loss.data[0] corrects += (torch.max(logit, 1) [1].view(target.size()).data == target.data).sum() size = len(data_iter.dataset) avg_loss = loss.data[0] / size accuracy = 100.0 * corrects / size model.train() print('\nEvaluation - loss: {:.6f} acc: {:.4f}%({}/{}) \n'.format(avg_loss, accuracy, corrects, size))