我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.nn.functional.log_softmax()。
def forward(self, input, target): if input.dim()>2: input = input.view(input.size(0),input.size(1),-1) # N,C,H,W => N,C,H*W input = input.transpose(1,2) # N,C,H*W => N,H*W,C input = input.contiguous().view(-1,input.size(2)) # N,H*W,C => N*H*W,C target = target.view(-1,1) logpt = F.log_softmax(input) logpt = logpt.gather(1,target) logpt = logpt.view(-1) pt = Variable(logpt.data.exp()) if self.alpha is not None: if self.alpha.type()!=input.data.type(): self.alpha = self.alpha.type_as(input.data) at = self.alpha.gather(0,target.data.view(-1)) logpt = logpt * Variable(at) loss = -1 * (1-pt)**self.gamma * logpt if self.size_average: return loss.mean() else: return loss.sum()
def forward(self, x): n_idx = 0 c_idx = 1 h_idx = 2 w_idx = 3 x = self.lookup_table(x) x = x.unsqueeze(c_idx) enc_outs = [] for encoder in self.encoders: enc_ = F.relu(encoder(x)) k_h = enc_.size()[h_idx] enc_ = F.max_pool2d(enc_, kernel_size=(k_h, 1)) enc_ = enc_.squeeze(w_idx) enc_ = enc_.squeeze(h_idx) enc_outs.append(enc_) encoding = self.dropout(torch.cat(enc_outs, 1)) return F.log_softmax(self.logistic(encoding))
def forward(self, model, sample): """Compute the loss for the given sample. Returns a tuple with three elements: 1) the loss, as a Variable 2) the sample size, which is used as the denominator for the gradient 3) logging outputs to display while training """ net_output = model(**sample['net_input']) input = F.log_softmax(net_output.view(-1, net_output.size(-1))) target = sample['target'].view(-1) loss = LabelSmoothedNLLLoss.apply(input, target, self.eps, self.padding_idx, self.weights) sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens'] logging_output = { 'loss': loss.data[0], 'sample_size': sample_size, } return loss, sample_size, logging_output
def forward(self, x, y, x_mask): """ x = batch * len * h1 y = batch * h2 x_mask = batch * len """ Wy = self.linear(y) if self.linear is not None else y xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2) xWy.data.masked_fill_(x_mask.data, -float('inf')) if self.training: # In training we output log-softmax for NLL alpha = F.log_softmax(xWy) else: # ...Otherwise 0-1 probabilities alpha = F.softmax(xWy) return alpha
def forward(self, x, y, x_mask): """ Args: x: batch * len * hdim1 y: batch * hdim2 x_mask: batch * len (1 for padding, 0 for true) Output: alpha = batch * len """ Wy = self.linear(y) if self.linear is not None else y xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2) xWy.data.masked_fill_(x_mask.data, -float('inf')) if self.normalize: if self.training: # In training we output log-softmax for NLL alpha = F.log_softmax(xWy) else: # ...Otherwise 0-1 probabilities alpha = F.softmax(xWy) else: alpha = xWy.exp() return alpha
def baseline_search(self, input, beam_size=None): # This is the simple greedy search batch_size = input.size(0) hidden_feat = self.lstm_im(input.view(1, input.size()[0], input.size()[1]))[1] x = Variable(torch.ones(1, batch_size,).type(torch.LongTensor) * self.start, requires_grad=False).cuda() # <start> output = [] flag = torch.ones(batch_size) for i in range(self.nseq): input_x = self.encoder(x.view(1, -1)) output_feature, hidden_feat = self.lstm_word(input_x, hidden_feat) output_t = self.decoder(output_feature.view(-1, output_feature.size(2))) output_t = F.log_softmax(output_t) logprob, x = output_t.max(1) output.append(x) flag[x.cpu().eq(self.end).data] = 0 if flag.sum() == 0: break output = torch.stack(output, 0).squeeze().transpose(0, 1).cpu().data return output
def forward(self, x): x = self.conv1(x) x = self.maxpool(x) x = self.stage2(x) x = self.stage3(x) x = self.stage4(x) # global average pooling layer x = F.avg_pool2d(x, x.data.size()[-2:]) # flatten for input to fully-connected layer x = x.view(x.size(0), -1) x = self.fc(x) return F.log_softmax(x, dim=1)
def forward(self, prev_samples, upper_tier_conditioning): (batch_size, _, _) = upper_tier_conditioning.size() prev_samples = self.embedding( prev_samples.contiguous().view(-1) ).view( batch_size, -1, self.q_levels ) prev_samples = prev_samples.permute(0, 2, 1) upper_tier_conditioning = upper_tier_conditioning.permute(0, 2, 1) x = F.relu(self.input(prev_samples) + upper_tier_conditioning) x = F.relu(self.hidden(x)) x = self.output(x).permute(0, 2, 1).contiguous() return F.log_softmax(x.view(-1, self.q_levels)) \ .view(batch_size, -1, self.q_levels)
def forward(self, x): nBatch = x.size(0) x = F.max_pool2d(self.conv1(x), 2) x = F.max_pool2d(self.conv2(x), 2) x = x.view(nBatch, -1) L = self.M*self.L Q = L.mm(L.t()) + self.eps*Variable(torch.eye(self.nHidden)).cuda() Q = Q.unsqueeze(0).expand(nBatch, self.nHidden, self.nHidden) G = self.G.unsqueeze(0).expand(nBatch, self.nineq, self.nHidden) z0 = self.qp_z0(x) s0 = self.qp_s0(x) h = z0.mm(self.G.t())+s0 e = Variable(torch.Tensor()) inputs = self.qp_o(x) x = QPFunction()(Q, inputs, G, h, e, e) x = x[:,:10] return F.log_softmax(x)
def forward(self, x): nBatch = x.size(0) # FC-ReLU-QP-FC-Softmax x = x.view(nBatch, -1) x = F.relu(self.fc1(x)) Q = self.Q.unsqueeze(0).expand(nBatch, self.Q.size(0), self.Q.size(1)) p = -x.view(nBatch,-1) G = self.G.unsqueeze(0).expand(nBatch, self.G.size(0), self.G.size(1)) h = self.h.unsqueeze(0).expand(nBatch, self.h.size(0)) A = self.A.unsqueeze(0).expand(nBatch, self.A.size(0), self.A.size(1)) b = self.b.unsqueeze(0).expand(nBatch, self.b.size(0)) x = QPFunction(verbose=False)(Q, p.double(), G, h, A, b).float() x = self.fc2(x) return F.log_softmax(x)
def feedforward_test(): import torch.nn as nn import torch.nn.functional as F fc1 = nn.Linear(10,20) fc1.weight.data.normal_(0.0,1.0) fc1.bias.data.normal_(0.0,1.0) fc2 = nn.Linear(20,2) fc2.weight.data.normal_(0.0,1.0) fc2.bias.data.normal_(0.0,1.0) model = lambda x: F.log_softmax(fc2(F.relu(fc1(x)))) data = Variable(torch.rand(10,10)) out_path = 'out' if not os.path.isdir(out_path): os.mkdir(out_path) uid = str(uuid.uuid4()) torch2c.compile(model(data),'feedforward',os.path.join(out_path,uid),compile_test=True)
def forward(self, x): if self.deep: x = x.view(-1, 28*28) for fc in self.fcs[:-1]: x = F.relu(fc(x)) x = self.fcs[-1](x) return F.log_softmax(x) else: x = x.view(-1, 28*28) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = F.relu(self.fc3(x)) x = F.relu(self.fc4(x)) x = self.fc5(x) return F.log_softmax(x)
def forward(self, x, y, x_mask): """ x = batch * len * h1 y = batch * h2 x_mask = batch * len """ Wy = self.linear(y) if self.linear is not None else y xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2) xWy.data.masked_fill_(x_mask.data, -float('inf')) if self.training: # In training we output log-softmax for NLL alpha = F.log_softmax(xWy, dim=1) else: # ...Otherwise 0-1 probabilities alpha = F.softmax(xWy, dim=1) return alpha
def forward(self, qu, w, cand): qu = Variable(qu) w = Variable(w) cand = Variable(cand) embed_q = self.embed_B(qu) embed_w1 = self.embed_A(w) embed_w2 = self.embed_C(w) embed_c = self.embed_C(cand) #pdb.set_trace() q_state = torch.sum(embed_q, 1).squeeze(1) w1_state = torch.sum(embed_w1, 1).squeeze(1) w2_state = torch.sum(embed_w2, 1).squeeze(1) for _ in range(self.config.hop): sent_dot = torch.mm(q_state, torch.transpose(w1_state, 0, 1)) sent_att = F.softmax(sent_dot) a_dot = torch.mm(sent_att, w2_state) a_dot = self.H(a_dot) q_state = torch.add(a_dot, q_state) f_feat = torch.mm(q_state, torch.transpose(embed_c, 0, 1)) score = F.log_softmax(f_feat) return score
def forward(self, word_input, last_hidden, encoder_outputs): # Note: we run this one step at a time # TODO: FIX BATCHING # Get the embedding of the current input word (last output word) word_embedded = self.embedding(word_input).view(1, 1, -1) # S=1 x B x N word_embedded = self.dropout(word_embedded) # Calculate attention weights and apply to encoder outputs attn_weights = self.attn(last_hidden[-1], encoder_outputs) context = attn_weights.bmm(encoder_outputs.transpose(0, 1)) # B x 1 x N context = context.transpose(0, 1) # 1 x B x N # Combine embedded input word and attended context, run through RNN rnn_input = torch.cat((word_embedded, context), 2) output, hidden = self.gru(rnn_input, last_hidden) # Final output layer output = output.squeeze(0) # B x N output = F.log_softmax(self.out(torch.cat((output, context), 1))) # Return final output, hidden state, and attention weights (for visualization) return output, hidden, attn_weights
def forward(self, x, lengths): batch_size, seq_length = x.size()[:2] emb = Variable(torch.from_numpy( self.initial_embeddings.take(x.numpy(), 0)), volatile=not self.training) h = Variable(torch.zeros(batch_size, self.model_dim), volatile=not self.training) for t in range(seq_length): inp = emb[:,t,:] h = self.rnn(inp, h) h = F.relu(self.l0(F.dropout(h.squeeze(), 0.5, self.training))) h = F.relu(self.l1(F.dropout(h, 0.5, self.training))) y = F.log_softmax(h) return y
def forward(self, x, lengths): batch_size = x.size(0) max_len = max(lengths) emb = Variable(torch.from_numpy( self.initial_embeddings.take(x.numpy(), 0)), volatile=not self.training) for t in range(max_len): indices = [] for i, l in enumerate(lengths): if l >= max(lengths) - t: indices.append(i) # Build batch. dynamic_batch_size = len(indices) inp = Variable(torch.FloatTensor(dynamic_batch_size, self.word_embedding_dim), volatile=not self.training) h = Variable(torch.FloatTensor(dynamic_batch_size, self.model_dim), volatile=not self.training) output = self.rnn(inp, h) hn = output h = F.relu(self.l0(F.dropout(hn.squeeze(), 0.5, self.training))) h = F.relu(self.l1(F.dropout(h, 0.5, self.training))) y = F.log_softmax(h) return y
def forward(self, x, lengths): batch_size = x.size(0) max_len = max(lengths) emb = Variable(torch.from_numpy( self.initial_embeddings.take(x.numpy(), 0)), volatile=not self.training) inp = Variable(torch.FloatTensor(emb.size()), volatile=not self.training) h0 = Variable(torch.FloatTensor(1, batch_size, self.model_dim), volatile=not self.training) _, hn = self.rnn(emb, h0) h = F.relu(self.l0(F.dropout(hn.squeeze(), 0.5, self.training))) h = F.relu(self.l1(F.dropout(h, 0.5, self.training))) y = F.log_softmax(h) return y
def forward(self, input, hidden, encoder_output, encoder_outputs): embedded = self.embedding(input).view(1, 1, -1) embedded = self.dropout(embedded) attn_weights = F.softmax( self.attn(torch.cat((embedded[0], hidden[0]), 1))) attn_weights = attn_weights.cuda() if use_cuda else attn_weights attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0)) attn_applied = attn_applied.cuda() if use_cuda else attn_applied output = torch.cat((embedded[0], attn_applied[0]), 1) output = output.cuda() if use_cuda else output output = self.attn_combine(output).unsqueeze(0) for i in range(self.n_layers): output = F.relu(output) output = output.cuda() if use_cuda else output output, hidden = self.gru(output, hidden) output = F.log_softmax(self.out(output[0])) output = output.cuda() if use_cuda else output return output, hidden, attn_weights
def sample(self, seed, maximumLength, T = 1): h = self.h0(seed).view(self.layers, 1, self.H) accumulator = ["START"] for _ in range(maximumLength): i = self.targetsOfSymbols([accumulator[-1]])[:,0] output, h = self(i,h) distribution = output.data.view(-1)/T distribution = F.log_softmax(distribution).data distribution = distribution.exp() c = torch.multinomial(distribution,1)[0] if self.lexicon[c] == "END": break accumulator.append(self.lexicon[c]) return accumulator[1:]
def sample(self, features): result = ["START"] # (1,1,F) features = features.view(-1).unsqueeze(0).unsqueeze(0) #features: 1x1x2560 states = None while True: e = self.embedding(variable([symbolToIndex[result[-1]]]).view((1,-1))) recurrentInput = torch.cat((features,e),2) output, states = self.rnn(recurrentInput,states) distribution = self.tokenPrediction(output).view(-1) distribution = F.log_softmax(distribution).data.exp() draw = torch.multinomial(distribution,1)[0] c = LEXICON[draw] if len(result) > 20 or c == "END": return result[1:] else: result.append(c)
def cross_entropy2d(input, target, weight=None, size_average=True): # input: (n, c, h, w), target: (n, h, w) n, c, h, w = input.size() # log_p: (n, c, h, w) log_p = F.log_softmax(input) # log_p: (n*h*w, c) log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c) log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0] log_p = log_p.view(-1, c) # target: (n*h*w,) mask = target >= 0 target = target[mask] loss = F.nll_loss(log_p, target, weight=weight, size_average=False) if size_average: loss /= mask.data.sum() return loss
def forward(self, logits, target): """ :param logits: tensor with shape of [batch_size, seq_len, input_size] :param target: tensor with shape of [batch_size, seq_len] of Long type filled with indexes to gather from logits :return: tensor with shape of [batch_size] with perplexity evaluation """ [batch_size, seq_len, input_size] = logits.size() logits = logits.view(-1, input_size) log_probs = F.log_softmax(logits) del logits log_probs = log_probs.view(batch_size, seq_len, input_size) target = target.unsqueeze(2) out = t.gather(log_probs, dim=2, index=target).squeeze(2).neg() ppl = out.mean(1).exp() return ppl
def forward(self, input, hidden, encoder_outputs): embedded = self.embedding(input).view(1, 1, -1) embedded = self.dropout(embedded) attn_weights = F.softmax( self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1) attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0)) output = torch.cat((embedded[0], attn_applied[0]), 1) output = self.attn_combine(output).unsqueeze(0) for i in range(self.n_layers): output = F.relu(output) output, hidden = self.gru(output, hidden) output = F.log_softmax(self.out(output[0]), dim=1) return output, hidden, attn_weights
def forward(self, batch): # shape of batch (sequence length, batch size) inputs = self.embed(batch.question) # shape (sequence length, batch_size, dimension of embedding) batch_size = inputs.size()[1] state_shape = self.config.n_cells, batch_size, self.config.d_hidden if self.config.rnn_type.lower() == 'gru': h0 = autograd.Variable(inputs.data.new(*state_shape).zero_()) outputs, ht = self.rnn(inputs, h0) else: h0 = c0 = autograd.Variable(inputs.data.new(*state_shape).zero_()) outputs, (ht, ct) = self.rnn(inputs, (h0, c0)) # shape of `outputs` - (sequence length, batch size, hidden size X num directions) tags = self.hidden2tag(outputs.view(-1, outputs.size(2))) # print(tags) scores = F.log_softmax(tags) return scores
def forward(self, x_in): out = F.relu(F.max_pool3d(self.conv(x_in), (1, self.max_document_length,1))) out = out.view(out.size(0), -1) out = F.relu(self.fc1(out)) out = F.dropout(out, training=self.training) out = self.fc2(out) return F.log_softmax(out)
def forward(self, x, *args, **kwargs): action = super(DiscretePolicy, self).forward(x, *args, **kwargs) probs = F.softmax(action.raw) action.value = probs.multinomial().detach() action.prob = lambda: probs.t()[action.value[:, 0]].mean(1) action.compute_log_prob = lambda a: F.log_softmax(action.raw).t()[a[:, 0]].mean(1) action.log_prob = action.compute_log_prob(action.value) action.entropy = -(action.prob() * action.log_prob) return action
def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) x = x.view(-1, 320) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return F.log_softmax(x)
def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) x = x.view(-1, 320) # Register a backward hook x.register_hook(myGradientHook) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return F.log_softmax(x)
def forward(self, x): x = F.elu(F.max_pool2d(self.conv1(x), 2)) x = F.elu(F.max_pool2d(self.bn2(self.conv2(x)), 2)) x = F.elu(F.max_pool2d(self.bn3(self.conv3(x)), 2)) x = F.elu(F.max_pool2d(self.bn4(self.conv4(x)), 2)) x = x.view(-1, 750) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return F.log_softmax(x)
def forward(self, x): x = F.dropout(x, training=self.training) x = self.conv(x) x = self.avgpool(x) x = F.log_softmax(x) x = x.squeeze(dim=3).squeeze(dim=2) return x
def cross_entropy2d(input, target, weight=None, size_average=True): n, c, h, w = input.size() log_p = F.log_softmax(input, dim=1) log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c) log_p = log_p[target.view(n * h * w, 1).repeat(1, c) >= 0] log_p = log_p.view(-1, c) mask = target >= 0 target = target[mask] loss = F.nll_loss(log_p, target, weight=weight, size_average=False) if size_average: loss /= mask.data.sum() return loss
def bootstrapped_cross_entropy2d(input, target, K, weight=None, size_average=True): batch_size = input.size()[0] def _bootstrap_xentropy_single(input, target, K, weight=None, size_average=True): n, c, h, w = input.size() log_p = F.log_softmax(input, dim=1) log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c) log_p = log_p[target.view(n * h * w, 1).repeat(1, c) >= 0] log_p = log_p.view(-1, c) mask = target >= 0 target = target[mask] loss = F.nll_loss(log_p, target, weight=weight, reduce=False, size_average=False) topk_loss, _ = loss.topk(K) reduced_topk_loss = topk_loss.sum() / K return reduced_topk_loss loss = 0.0 # Bootstrap from each image not entire batch for i in range(batch_size): loss += _bootstrap_xentropy_single(input=torch.unsqueeze(input[i], 0), target=torch.unsqueeze(target[i], 0), K=K, weight=weight, size_average=size_average) return loss / float(batch_size)
def cross_entropy2d(pred, target, weight=None, size_average=True): n, num_classes, h, w = pred.size() log_p = F.log_softmax(pred) log_p = channel_first_to_last(log_p).view(-1, num_classes) target = channel_first_to_last(target).view(-1) loss = F.nll_loss(log_p, target, weight=weight, size_average=False) if size_average: loss /= (h * w * n) return loss
def forward(self, input): # TODO perhaps add batch normalization or layer normalization x = F.elu(self.conv1(input)) x = F.elu(self.conv2(x)) x = F.elu(self.conv3(x)) # Next flatten the output to be batched into LSTM layers # The shape of x is batch_size, channels, height, width x = self.pre_lstm_bn(x) x = torch.transpose(x, 1, 3) x = torch.transpose(x, 1, 2) x = x.contiguous() x = x.view(x.size(0), self.batch, self.hidden_dim) x, hidden = self.lstm(x, (self.hidden_state, self.cell_state)) self.hidden_state, self.cell_state = hidden x = torch.transpose(x, 2, 1) x = x.contiguous() x = x.view(x.size(0), self.hidden_dim, self.height, self.width) x = self.lstm_batch_norm(x) x = F.elu(self.conv4(x)) x = F.elu(self.conv5(x)) o_begin = self.begin_conv(x) o_end = self.end_conv(x) o_begin = o_begin.view(o_begin.size(0), -1) o_end = o_end.view(o_end.size(0), -1) o_begin = F.log_softmax(o_begin) o_end = F.log_softmax(o_end) return o_begin, o_end
def forward(self, x): out = self.conv1(x) out = self.trans1(self.dense1(out)) out = self.trans2(self.dense2(out)) out = self.dense3(out) out = torch.squeeze(F.avg_pool2d(F.relu(self.bn1(out)), 8)) out = F.log_softmax(self.fc(out)) return out
def forward(self, outputs, targets): return self.loss(F.log_softmax(outputs), targets)
def forward(self, x): x = F.relu(self.bn1(self.fc1(x))) x = F.log_softmax(self.bn2(self.fc2(x))) return x
def forward(self, lvec, rvec): mult_dist = torch.mul(lvec, rvec) abs_dist = torch.abs(torch.add(lvec, -rvec)) vec_dist = torch.cat((mult_dist, abs_dist), 1) out = F.sigmoid(self.wh(vec_dist)) out = F.log_softmax(self.wp(out)) return out # putting the whole model together
def forward(self, input_data): embeds = self.embed_layer(input_data).view((1, -1)) output = F.relu(self.linear_1(embeds)) output = F.log_softmax(self.linear_2(output)) return output # Helper function
def forward(self, input, hidden): encode = self.lookup_table(input) lstm_out, hidden = self.lstm(encode, hidden) lstm_out = F.dropout(lstm_out, p=self.dropout) out = self.lr(lstm_out.contiguous().view(-1, lstm_out.size(2))) return F.log_softmax(out), hidden
def forward(self, src, src_pos, tgt, tgt_pos): tgt, tgt_pos = tgt[:, :-1], tgt_pos[:, :-1] enc_outputs = self.enc(src, src_pos) dec_output = self.dec(enc_outputs, src, tgt, tgt_pos) out = self.linear(dec_output) return F.log_softmax(out.view(-1, self.dec_vocab_size))
def decode(self, seq, pos): def length_penalty(step, len_penalty_w=1.): return (torch.log(self.torch.FloatTensor([5 + step])) - torch.log(self.torch.FloatTensor([6])))*len_penalty_w top_seqs = [([BOS], 0)] * self.beam_size enc_outputs = self.model.enc(seq, pos) seq_beam = Variable(seq.data.repeat(self.beam_size, 1)) enc_outputs_beam = [Variable(enc_output.data.repeat(self.beam_size, 1, 1)) for enc_output in enc_outputs] input_data = self.init_input() input_pos = torch.arange(1, 2).unsqueeze(0) input_pos = input_pos.repeat(self.beam_size, 1) input_pos = Variable(input_pos.long(), volatile=True) for step in range(1, self.args.max_word_len+1): if self.cuda: input_pos = input_pos.cuda() input_data = input_data.cuda() dec_output = self.model.dec(enc_outputs_beam, seq_beam, input_data, input_pos) dec_output = dec_output[:, -1, :] # word level feature out = F.log_softmax(self.model.linear(dec_output)) lp = length_penalty(step) top_seqs, all_done, un_dones = self.beam_search(out.data+lp, top_seqs) if all_done: break input_data = self.update_input(top_seqs) input_pos, src_seq_beam, enc_outputs_beam = self.update_state(step+1, seq, enc_outputs, un_dones) tgts = [] for seq in top_seqs: cor_idxs, score = seq cor_idxs = cor_idxs[1: -1] tgts += [(" ".join([self.src_idx2word[idx] for idx in cor_idxs]), score)] return tgts
def forward(self, input, hidden): encode = self.lookup_table(input) lstm_out, hidden = self.lstm(encode.transpose(0, 1), hidden) output = self.ln(lstm_out)[-1] return F.log_softmax(self.logistic(output)), hidden