我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.nn.functional.softmax()。
def forward(self, inputs): # set up batch size batch_size = inputs.size(0) # compute hidden and cell hidden = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda()) cell = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda()) hidden_cell = (hidden, cell) # recurrent neural networks outputs, _ = self.rnn.forward(inputs, hidden_cell) outputs = outputs.contiguous().view(-1, self.hidden_size * 2) # compute classifications by outputs outputs = self.classifier.forward(outputs) outputs = F.softmax(outputs) outputs = outputs.view(batch_size, -1, self.num_classes) return outputs
def forward(self, hidden, encoder_outputs): # hidden.size() = (B, H), encoder_outputs.size() = (B, S, H) batch_size, encoder_outputs_len, _ = encoder_outputs.size() # Create variable to store attention energies # attn_energies.size() = (B, S) attn_energies = Variable(torch.zeros((batch_size, encoder_outputs_len))) # B x S if Config.use_cuda: attn_energies = attn_energies.cuda() # Calculate energies for each encoder output # attn_energies.size() = (B, S) for i in range(encoder_outputs_len): attn_energies[:, i] = self.score(hidden, encoder_outputs[:, i]) # print attn_energies[:, i] # Normalize energies to weights in range 0 to 1 return F.softmax(attn_energies)
def forward(self, input, source_hids): # input: bsz x input_embed_dim # source_hids: srclen x bsz x output_embed_dim # x: bsz x output_embed_dim x = self.input_proj(input) # compute attention attn_scores = (source_hids * x.unsqueeze(0)).sum(dim=2) attn_scores = F.softmax(attn_scores.t()).t() # srclen x bsz # sum weighted sources x = (attn_scores.unsqueeze(2) * source_hids).sum(dim=0) x = F.tanh(self.output_proj(torch.cat((x, input), dim=1))) return x, attn_scores
def forward(self, x, target_embedding, encoder_out): residual = x # attention x = (self.in_projection(x) + target_embedding) * math.sqrt(0.5) x = self.bmm(x, encoder_out[0]) # softmax over last dim sz = x.size() x = F.softmax(x.view(sz[0] * sz[1], sz[2])) x = x.view(sz) attn_scores = x x = self.bmm(x, encoder_out[1]) # scale attention output s = encoder_out[1].size(1) x = x * (s * math.sqrt(1.0 / s)) # project back x = (self.out_projection(x) + residual) * math.sqrt(0.5) return x, attn_scores
def forward(self, x, y, x_mask): """ x = batch * len * h1 y = batch * h2 x_mask = batch * len """ Wy = self.linear(y) if self.linear is not None else y xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2) xWy.data.masked_fill_(x_mask.data, -float('inf')) if self.training: # In training we output log-softmax for NLL alpha = F.log_softmax(xWy) else: # ...Otherwise 0-1 probabilities alpha = F.softmax(xWy) return alpha
def softmax(x, dim=-1): """ TODO: change to use the default pyTorch implementation when available Source: https://discuss.pytorch.org/t/why-softmax-function-cant-specify-the-dimension-to-operate/2637 :param x: tensor :param dim: Dimension to apply the softmax function to. The elements of the tensor in this dimension must sum to 1. :return: tensor having the same dimension as `x` rescaled along dim """ input_size = x.size() trans_input = x.transpose(dim, len(input_size) - 1) trans_size = trans_input.size() input_2d = trans_input.contiguous().view(-1, trans_size[-1]) try: soft_max_2d = F.softmax(input_2d, 1) except TypeError: # Support older pytorch 0.2 release. soft_max_2d = F.softmax(input_2d) soft_max_nd = soft_max_2d.view(*trans_size) return soft_max_nd.transpose(dim, len(input_size) - 1)
def getAttnOutput(input, attnScorer, winSize=0): # get attention output following [Liu and Lane, Interspeech 2016]. the input is seqlen X batchsize X dim. if winSize is 0, all the time steps are used for the weigted averaging attnSeq = [] for i in range(input.size(0)): curSeq = [] if i > 0: leftBegin = 0 if winSize > 0: leftBegin = max(0, i-winSize) curSeq.append(input[leftBegin:i]) if i < input.size(0): leftEnd = input.size(0) if winSize > 0: leftEnd = min(i+winSize+1, input.size(0)) curSeq.append(input[i:leftEnd]) curSeq = torch.cat(curSeq, 0) cur = input[i:i+1].expand_as(curSeq) attnScores = attnScorer( torch.cat([cur, curSeq], 2).view(-1, 2*input.size(2)) ) # get attention scores transAttnScores = attnScores.view(curSeq.size(0), input.size(1)).transpose(0, 1) # batchSize X curSeqLen smOut = F.softmax(transAttnScores).transpose(0, 1) smOutSeq = smOut.unsqueeze(2).expand_as(curSeq) weightedAvgSeq = (curSeq * smOutSeq).sum(0) attnSeq.append(weightedAvgSeq) attnSeq = torch.cat(attnSeq, 0) return torch.cat([input, attnSeq], 2)
def sample(self, inputs, max_length): targets, init_states = self.initialize(inputs, eval=False) emb, output, hidden, context = init_states outputs = [] samples = [] batch_size = targets.size(1) num_eos = targets[0].data.byte().new(batch_size).zero_() for i in range(max_length): output, hidden = self.decoder.step(emb, output, hidden, context) outputs.append(output) dist = F.softmax(self.generator(output)) sample = dist.multinomial(1, replacement=False).view(-1).data samples.append(sample) # Stop if all sentences reach EOS. num_eos |= (sample == lib.Constants.EOS) if num_eos.sum() == batch_size: break emb = self.decoder.word_lut(Variable(sample)) outputs = torch.stack(outputs) samples = torch.stack(samples) return samples, outputs
def test_forward_backward(self): import torch import torch.nn.functional as F from torch.autograd import Variable from reid.loss import OIMLoss criterion = OIMLoss(3, 3, scalar=1.0, size_average=False) criterion.lut = torch.eye(3) x = Variable(torch.randn(3, 3), requires_grad=True) y = Variable(torch.range(0, 2).long()) loss = criterion(x, y) loss.backward() probs = F.softmax(x) grads = probs.data - torch.eye(3) abs_diff = torch.abs(grads - x.grad.data) self.assertEquals(torch.log(probs).diag().sum(), -loss) self.assertTrue(torch.max(abs_diff) < 1e-6)
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None): features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas) if self.training: roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes) rois = roi_data[0] # roi pool pooled_features = self.roi_pool(features, rois) x = pooled_features.view(pooled_features.size()[0], -1) x = self.fc6(x) x = F.dropout(x, training=self.training) x = self.fc7(x) x = F.dropout(x, training=self.training) cls_score = self.score_fc(x) cls_prob = F.softmax(cls_score) bbox_pred = self.bbox_fc(x) if self.training: self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data) return cls_prob, bbox_pred, rois
def _decode(self, tokens, encoder_outs): # wrap in Variable tokens = Variable(tokens, volatile=True) avg_probs = None avg_attn = None for model, encoder_out in zip(self.models, encoder_outs): decoder_out, attn = model.decoder(tokens, encoder_out) probs = F.softmax(decoder_out[:, -1, :]).data attn = attn[:, -1, :].data if avg_probs is None or avg_attn is None: avg_probs = probs avg_attn = attn else: avg_probs.add_(probs) avg_attn.add_(attn) avg_probs.div_(len(self.models)) avg_probs.log_() avg_attn.div_(len(self.models)) return avg_probs, avg_attn
def forward(self, x, y, x_mask): """ Args: x: batch * len * hdim1 y: batch * hdim2 x_mask: batch * len (1 for padding, 0 for true) Output: alpha = batch * len """ Wy = self.linear(y) if self.linear is not None else y xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2) xWy.data.masked_fill_(x_mask.data, -float('inf')) if self.normalize: if self.training: # In training we output log-softmax for NLL alpha = F.log_softmax(xWy) else: # ...Otherwise 0-1 probabilities alpha = F.softmax(xWy) else: alpha = xWy.exp() return alpha
def forward(self, x, x_mask): """ Args: x: batch * len * hdim x_mask: batch * len (1 for padding, 0 for true) Output: alpha: batch * len """ x_flat = x.view(-1, x.size(-1)) scores = self.linear(x_flat).view(x.size(0), x.size(1)) scores.data.masked_fill_(x_mask.data, -float('inf')) alpha = F.softmax(scores) return alpha # ------------------------------------------------------------------------------ # Functional # ------------------------------------------------------------------------------
def object_detection_gt_boxes(self, image_path, gt_boxes): min_score = 1/150. image = cv2.imread(image_path) # print 'image.shape', image.shape im_data, im_scales = self.get_image_blob_noscale(image) gt_boxes[:, :4] = gt_boxes[:, :4] * im_scales[0] # print 'im_data.shape', im_data.shape # print 'im_scales', im_scales im_info = np.array( [[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32) object_result = self(im_data, im_info, gt_boxes)[0] cls_prob_object, bbox_object, object_rois = object_result[:] prob_object = F.softmax(cls_prob_object) prob = prob_object.cpu().data top_5_cls = torch.topk(prob[:, 1:], 5, dim=1) # print 'im_scales[0]', im_scales[0] return top_5_cls[1].numpy()
def forward(self, v, u): """ Input: - v: N x D x H x W - u: N x D Returns: - next_u: N x D """ N, K = v.size(0), self.hidden_dim D, H, W = v.size(1), v.size(2), v.size(3) v_proj = self.Wv(v) # N x K x H x W u_proj = self.Wu(u) # N x K u_proj_expand = u_proj.view(N, K, 1, 1).expand(N, K, H, W) h = F.tanh(v_proj + u_proj_expand) p = F.softmax(self.Wp(h).view(N, H * W)).view(N, 1, H, W) self.attention_maps = p.data.clone() v_tilde = (p.expand_as(v) * v).sum(2).sum(3).view(N, D) next_u = u + v_tilde return next_u
def new_att_module(self): class NewAttModule(nn.Module): def __init__(self): super(NewAttModule, self).__init__() def forward(self, linput, rinput): self.lPad = linput.view(-1, linput.size(0), linput.size(1)) self.lPad = linput # self.lPad = Padding(0, 0)(linput) TODO: figureout why padding? self.M_r = torch.mm(self.lPad, rinput.t()) self.alpha = F.softmax(self.M_r.transpose(0, 1)) self.Yl = torch.mm(self.alpha, self.lPad) return self.Yl att_module = NewAttModule() if getattr(self, "att_module_master", None): for (tar_param, src_param) in zip(att_module.parameters(), self.att_module_master.parameters()): tar_param.grad.data = src_param.grad.data.clone() return att_module
def forward(self, x): x = F.relu(self.conv11(x)) x = self.bn11(x) x = F.relu(self.conv12(x)) x = self.bn12(x) x = F.relu(self.conv21(x)) x = self.bn21(x) x = F.relu(self.conv22(x)) x = self.bn22(x) x = F.avg_pool2d(x, kernel_size=[x.size(2), x.size(3)]) x = self.fc(x.view(x.size()[:2]))# x = F.softmax(x) return x
def forward(self, x): x = F.relu(self.conv11(x)) x = self.bn11(x) x = self.offset12(x) x = F.relu(self.conv12(x)) x = self.bn12(x) x = self.offset21(x) x = F.relu(self.conv21(x)) x = self.bn21(x) x = self.offset22(x) x = F.relu(self.conv22(x)) x = self.bn22(x) x = F.avg_pool2d(x, kernel_size=[x.size(2), x.size(3)]) x = self.fc(x.view(x.size()[:2])) x = F.softmax(x) return x
def forward(self, x, trainable, fast=True): q, img, ans, prog = x #Need ans for reinforce if not trainable: ans = None #Safety p = self.ProgramGenerator(q) #Finicky handling of PG-EE transition batch, sLen, v = p.size() p = p.view(-1, v) p = F.softmax(p) p = p.view(batch, sLen, v) p, pInds = t.max(p, 2) pInds = pInds[:, :, 0] p= p[:, :, 0] a = self.ExecutionEngine((pInds, p, img), fast=fast) return a
def forward(self, logits, labels): softmaxes = F.softmax(logits) confidences, predictions = torch.max(softmaxes, 1) accuracies = predictions.eq(labels) ece = Variable(torch.zeros(1)).type_as(logits) for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers): # Calculated |confidence - accuracy| in each bin in_bin = confidences.gt(bin_lower) * confidences.le(bin_upper) prop_in_bin = in_bin.float().mean() if prop_in_bin.data[0] > 0: accuracy_in_bin = accuracies[in_bin].float().mean() avg_confidence_in_bin = confidences[in_bin].mean() ece += torch.abs(avg_confidence_in_bin- accuracy_in_bin) * prop_in_bin return ece
def forward(self, x, y, x_mask): """ x = batch * len * h1 y = batch * h2 x_mask = batch * len """ Wy = self.linear(y) if self.linear is not None else y xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2) xWy.data.masked_fill_(x_mask.data, -float('inf')) if self.training: # In training we output log-softmax for NLL alpha = F.log_softmax(xWy, dim=1) else: # ...Otherwise 0-1 probabilities alpha = F.softmax(xWy, dim=1) return alpha
def forward(self, X): # input X is a 4D tensor assert(X.size(1)==self.D,"Encoding Layer wrong channels!") if X.dim() == 3: # BxDxN B, N, K, D = X.size(0), X.size(2), self.K, self.D X = X.transpose(1,2).contiguous() elif X.dim() == 4: # BxDxHxW B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D X = X.view(B,D,-1).transpose(1,2).contiguous() else: raise RuntimeError('Encoding Layer unknown input dims!') # assignment weights A = F.softmax(scaledL2()(X, self.codewords, self.scale)) # aggregate E = aggregate()(A, X, self.codewords) return E
def assign(R, S): r""" Calculate assignment weights for given residuals (:math:`R`) and scale (:math:`S`) .. math:: a_{ik} = \frac{exp(-s_k\|r_{ik}\|^2)}{\sum_{j=1}^K exp(-s_j\|r_{ik}\|^2)} Shape: - Input: :math:`R\in\mathcal{R}^{B\times N\times K\times D}` :math:`S\in \mathcal{R}^K` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.) - Output :math:`A\in\mathcal{R}^{B\times N\times K}` """ L = square_squeeze()(R) K = S.size(0) SL = L * S.view(1,1,K) return F.softmax(SL)
def forward(self, X): if isinstance(X, tuple) or isinstance(X, list): # for self-parallel mode, please see encoding.nn return my_data_parallel(self, X) elif not isinstance(X, Variable): raise RuntimeError('unknown input type') # input X is a 4D tensor assert(X.size(1)==self.D) if X.dim() == 3: # BxDxN B, N, K, D = X.size(0), X.size(2), self.K, self.D X = X.transpose(1,2).contiguous() elif X.dim() == 4: # BxDxHxW B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D X = X.view(B,D,-1).transpose(1,2).contiguous() else: raise RuntimeError('Encoding Layer unknown input dims!') # assignment weights NxKxD A = F.softmax(scaledL2(X, self.codewords, self.scale), dim=1) # aggregate E = aggregate(A, X, self.codewords) return E
def forward(self, input, compute_loss=False, avg_loss=True): # compute posterior en1 = F.softplus(self.en1_fc(input)) # en1_fc output en2 = F.softplus(self.en2_fc(en1)) # encoder2 output en2 = self.en2_drop(en2) posterior_mean = self.mean_bn (self.mean_fc (en2)) # posterior mean posterior_logvar = self.logvar_bn(self.logvar_fc(en2)) # posterior log variance posterior_var = posterior_logvar.exp() # take sample eps = Variable(input.data.new().resize_as_(posterior_mean.data).normal_()) # noise z = posterior_mean + posterior_var.sqrt() * eps # reparameterization p = F.softmax(z) # mixture probability p = self.p_drop(p) # do reconstruction recon = F.softmax(self.decoder_bn(self.decoder(p))) # reconstructed distribution over vocabulary if compute_loss: return recon, self.loss(input, recon, posterior_mean, posterior_logvar, posterior_var, avg_loss) else: return recon
def forward(self, qu, w, cand): qu = Variable(qu) w = Variable(w) cand = Variable(cand) embed_q = self.embed_B(qu) embed_w1 = self.embed_A(w) embed_w2 = self.embed_C(w) embed_c = self.embed_C(cand) #pdb.set_trace() q_state = torch.sum(embed_q, 1).squeeze(1) w1_state = torch.sum(embed_w1, 1).squeeze(1) w2_state = torch.sum(embed_w2, 1).squeeze(1) for _ in range(self.config.hop): sent_dot = torch.mm(q_state, torch.transpose(w1_state, 0, 1)) sent_att = F.softmax(sent_dot) a_dot = torch.mm(sent_att, w2_state) a_dot = self.H(a_dot) q_state = torch.add(a_dot, q_state) f_feat = torch.mm(q_state, torch.transpose(embed_c, 0, 1)) score = F.log_softmax(f_feat) return score
def forward(self, hidden, encoder_outputs): max_len = encoder_outputs.size(0) this_batch_size = encoder_outputs.size(1) # Create variable to store attention energies attn_energies = Variable(torch.zeros(this_batch_size, max_len)) # B x S if USE_CUDA: attn_energies = attn_energies.cuda() # For each batch of encoder outputs for b in range(this_batch_size): # Calculate energy for each encoder output for i in range(max_len): attn_energies[b, i] = self.score(hidden[:, b], encoder_outputs[i, b].unsqueeze(0)) # Normalize energies to weights in range 0 to 1, resize to 1 x B x S return F.softmax(attn_energies).unsqueeze(1)
def forward(self, output, context): batch_size = output.size(0) hidden_size = output.size(2) input_size = context.size(1) # (batch, out_len, dim) * (batch, in_len, dim) -> (batch, out_len, in_len) attn = torch.bmm(output, context.transpose(1, 2)) if self.mask is not None: attn.data.masked_fill_(self.mask, -float('inf')) attn = F.softmax(attn.view(-1, input_size)).view(batch_size, -1, input_size) # (batch, out_len, in_len) * (batch, in_len, dim) -> (batch, out_len, dim) mix = torch.bmm(attn, context) # concat -> (batch, out_len, 2*dim) combined = torch.cat((mix, output), dim=2) # output -> (batch, out_len, dim) output = F.tanh(self.linear_out(combined.view(-1, 2 * hidden_size))).view(batch_size, -1, hidden_size) return output, attn
def forward(self, h, att_feats, p_att_feats): # The p_att_feats here is already projected att_size = att_feats.numel() // att_feats.size(0) // self.rnn_size att = p_att_feats.view(-1, att_size, self.att_hid_size) att_h = self.h2att(h) # batch * att_hid_size att_h = att_h.unsqueeze(1).expand_as(att) # batch * att_size * att_hid_size dot = att + att_h # batch * att_size * att_hid_size dot = F.tanh(dot) # batch * att_size * att_hid_size dot = dot.view(-1, self.att_hid_size) # (batch * att_size) * att_hid_size dot = self.alpha_net(dot) # (batch * att_size) * 1 dot = dot.view(-1, att_size) # batch * att_size weight = F.softmax(dot) # batch * att_size att_feats_ = att_feats.view(-1, att_size, self.rnn_size) # batch * att_size * att_feat_size att_res = torch.bmm(weight.unsqueeze(1), att_feats_).squeeze(1) # batch * att_feat_size return att_res
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None): features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas) if self.training: roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes) rois = roi_data[0] # roi pool conv_new1 = self.new_conv(features) r_score_map = self.rfcn_score(conv_new1) r_bbox_map = self.rfcn_bbox(conv_new1) psroi_pooled_cls = self.psroi_pool_cls(r_score_map, rois) psroi_pooled_loc = self.psroi_pool_loc(r_bbox_map, rois) bbox_pred = self.bbox_pred(psroi_pooled_loc) bbox_pred = torch.squeeze(bbox_pred) cls_score = self.cls_score(psroi_pooled_cls) cls_score = torch.squeeze(cls_score) cls_prob = F.softmax(cls_score) if self.training: self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data) return cls_prob, bbox_pred, rois
def forward(self, input, hidden, encoder_output, encoder_outputs): embedded = self.embedding(input).view(1, 1, -1) embedded = self.dropout(embedded) attn_weights = F.softmax( self.attn(torch.cat((embedded[0], hidden[0]), 1))) attn_weights = attn_weights.cuda() if use_cuda else attn_weights attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0)) attn_applied = attn_applied.cuda() if use_cuda else attn_applied output = torch.cat((embedded[0], attn_applied[0]), 1) output = output.cuda() if use_cuda else output output = self.attn_combine(output).unsqueeze(0) for i in range(self.n_layers): output = F.relu(output) output = output.cuda() if use_cuda else output output, hidden = self.gru(output, hidden) output = F.log_softmax(self.out(output[0])) output = output.cuda() if use_cuda else output return output, hidden, attn_weights
def forward(self, x): x = self.conv(x) sizes = x.size() x = x.view(sizes[0], sizes[1] * sizes[2], sizes[3]) # Collapse feature dimension x = x.transpose(1, 2).transpose(0, 1).contiguous() # TxNxH x = self.rnns(x) if not self._bidirectional: # no need for lookahead layer in bidirectional x = self.lookahead(x) x = self.fc(x) x = x.transpose(0, 1) # identity in training mode, softmax in eval mode x = self.inference_softmax(x) return x
def forward(self, dec_out, enc_outs, enc_att=None, mask=None): """ Parameters: ----------- - dec_out: torch.Tensor(batch_size x hid_dim) - enc_outs: torch.Tensor(seq_len x batch_size x hid_dim) - enc_att: (optional), torch.Tensor(seq_len x batch_size x att_dim) - mask: (optional), torch.ByteTensor(batch_size x seq_len) """ # (batch x seq_len) weights = self.scorer(dec_out, enc_outs, enc_att=enc_att) if mask is not None: # weights = weights * mask.float() weights.data.masked_fill_(1 - mask.data, -float('inf')) weights = F.softmax(weights, dim=1) # (eq 7) context = weights.unsqueeze(1).bmm(enc_outs.transpose(0, 1)).squeeze(1) # (eq 5) linear out combining context and hidden context = F.tanh(self.linear_out(torch.cat([context, dec_out], 1))) return context, weights
def _content_focus(self, memory_vb): """ variables needed: key_vb: [batch_size x num_heads x mem_wid] -> similarity key vector, to compare to each row in memory -> by cosine similarity beta_vb: [batch_size x num_heads x 1] -> NOTE: refer here: https://github.com/deepmind/dnc/issues/9 -> \in (1, +inf) after oneplus(); similarity key strength -> amplify or attenuate the pecision of the focus memory_vb: [batch_size x mem_hei x mem_wid] returns: wc_vb: [batch_size x num_heads x mem_hei] -> the attention weight by content focus """ K_vb = batch_cosine_sim(self.key_vb, memory_vb) # [batch_size x num_heads x mem_hei] self.wc_vb = K_vb * self.beta_vb.expand_as(K_vb) # [batch_size x num_heads x mem_hei] self.wc_vb = F.softmax(self.wc_vb.transpose(0, 2)).transpose(0, 2)
def forward(self, x): """ A model for non-linear data that works off of mixing multiple Gaussian distributions together. Uses linear projections of a given input to generate a set of N Gaussian models' mixture components, means and standard deviations. :param x: (num. samples, input dim.) :return: Mixture components, means, and standard deviations in the form (num. samples, num. mixtures) """ x = F.tanh(self.projection(x)) weights = F.softmax(self.weights_projection(x)) means = self.mean_projection(x) stds = torch.exp(self.std_projection(x)) return weights, means, stds
def forward(self, *hidden_states): if len(hidden_states) == 1: hidden_state = hidden_states[0] return F.softmax(F.tanh(self.projection(hidden_state))) * hidden_state elif len(hidden_states) == 2: left_hidden_state, right_hidden_state = hidden_states if self.mode == 0 or self.mode == 1: if self.mode == 0: left_attention_weights = F.softmax(F.tanh(self.projection(left_hidden_state))) right_attention_weights = F.softmax(F.tanh(self.projection(right_hidden_state))) elif self.mode == 1: left_attention_weights = F.softmax(F.tanh(self.left_projection(left_hidden_state))) right_attention_weights = F.softmax(F.tanh(self.right_projection(right_hidden_state))) return left_attention_weights * left_hidden_state, right_attention_weights * right_hidden_state elif self.mode == 2: hidden_state = torch.cat([left_hidden_state, right_hidden_state], dim=1) attention_weights = F.softmax(F.tanh(self.projection(hidden_state))) return attention_weights * left_hidden_state, attention_weights * right_hidden_state
def forward(self, last_state, states, mask=None): sequence_length, batch_size, hidden_dim = states.size() last_state = last_state.unsqueeze(0).expand(sequence_length, batch_size, last_state.size(1)) if self.mode == "dot": energies = last_state * states energies = energies.sum(dim=2).squeeze() elif self.mode == "general": expanded_projection = self.projection.expand(sequence_length, *self.projection.size()) energies = last_state * states.bmm(expanded_projection) energies = energies.sum(dim=2).squeeze() elif self.mode == "concat": expanded_reduction = self.reduction.expand(sequence_length, *self.reduction.size()) expanded_projection = self.projection.expand(sequence_length, *self.projection.size()) energies = F.tanh(torch.cat([last_state, states], dim=2).bmm(expanded_reduction)) energies = energies.bmm(expanded_projection).squeeze() if type(mask) == torch.autograd.Variable: energies = energies + ((mask == 0).float() * -10000) attention_weights = F.softmax(energies) return attention_weights
def forward(self, last_state, states): if len(states.size()) == 2: states = states.unsqueeze(0) sequence_length, batch_size, state_dim = states.size() transformed_last_state = last_state @ self.projection transformed_last_state = transformed_last_state.expand(sequence_length, batch_size, self.encoder_dim) transformed_last_state = transformed_last_state.transpose(0, 1).contiguous() transformed_last_state = transformed_last_state.view(batch_size, -1) states = states.transpose(0, 1).contiguous() states = states.view(batch_size, -1) energies = transformed_last_state * states energies = energies.sum(dim=1) if self.encoder_dim is not None: attention_weights = torch.cat([torch.exp(energies[0]), F.softmax(energies[1:])], dim=0) else: attention_weights = F.softmax(energies) return attention_weights
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None): features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas) if self.training: roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes) rois = roi_data[0] # roi pool pooled_features = self.roi_pool(features, rois) x = pooled_features.view(pooled_features.size()[0], -1) # x = self.fc6(x) # x = F.dropout(x, training=self.training) # x = self.fc7(x) # x = F.dropout(x, training=self.training) x = self.fcs(x) cls_score = self.score_fc(x) cls_prob = F.softmax(cls_score) bbox_pred = self.bbox_fc(x) if self.training: self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data) return cls_prob, bbox_pred, rois
def forward(self, input, hidden, encoder_outputs): embedded = self.embedding(input).view(1, 1, -1) embedded = self.dropout(embedded) attn_weights = F.softmax( self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1) attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0)) output = torch.cat((embedded[0], attn_applied[0]), 1) output = self.attn_combine(output).unsqueeze(0) for i in range(self.n_layers): output = F.relu(output) output, hidden = self.gru(output, hidden) output = F.log_softmax(self.out(output[0]), dim=1) return output, hidden, attn_weights
def module_cls(self): """Return a simple module that concatenates its 2 inputs in forward step. """ class MyModule(nn.Module): def __init__(self): super(MyModule, self).__init__() self.dense = nn.Linear(20, 2) # pylint: disable=arguments-differ def forward(self, X0, X1): X = torch.cat((X0, X1), 1) X = F.softmax(self.dense(X), dim=-1) return X return MyModule