我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.nn.functional.tanh()。
def forward(self, x, hint): v = self.toH(hint) x0 = self.to0(x) x1 = self.to1(x0) x2 = self.to2(x1) x3 = self.to3(torch.cat([x2, v], 1)) x4 = self.to4(x3) x = self.tunnel4(x4) x = self.tunnel3(torch.cat([x, x3.detach()], 1)) x = self.tunnel2(torch.cat([x, x2.detach()], 1)) x = self.tunnel1(torch.cat([x, x1.detach()], 1)) x = F.tanh(self.exit(torch.cat([x, x0.detach()], 1))) return x
def forward(self, input, VGG): x1 = F.leaky_relu(self.down1(input), 0.2, True) x2 = F.leaky_relu(self.down2(x1), 0.2, True) x3 = F.leaky_relu(self.down3(x2), 0.2, True) x4 = F.leaky_relu(self.down4(x3), 0.2, True) x5 = F.leaky_relu(self.down5(x4), 0.2, True) x6 = F.leaky_relu(self.down6(x5), 0.2, True) x7 = F.leaky_relu(self.down7(x6), 0.2, True) x8 = F.relu(self.down8(x7), True) VGG = F.relu(self.linear(VGG), True) x = F.relu(self.up8(torch.cat([x8, VGG.view(-1, 2048, 1, 1)], 1)), True) x = F.relu(self.up7(torch.cat([x, x7], 1)), True) x = F.relu(self.up6(torch.cat([x, x6], 1)), True) x = F.relu(self.up5(torch.cat([x, x5], 1)), True) x = F.relu(self.up4(torch.cat([x, x4], 1)), True) x = F.relu(self.up3(torch.cat([x, x3], 1)), True) x = F.relu(self.up2(torch.cat([x, x2], 1)), True) x = F.tanh(self.up1(torch.cat([x, x1], 1))) return x ############################ # D network ###########################
def node_forward(self, inputs, child_c, child_h): child_h_sum = torch.sum(child_h, dim=0, keepdim=True) iou = self.ioux(inputs) + self.iouh(child_h_sum) i, o, u = torch.split(iou, iou.size(1) // 3, dim=1) i, o, u = F.sigmoid(i), F.sigmoid(o), F.tanh(u) f = F.sigmoid( self.fh(child_h) + self.fx(inputs).repeat(len(child_h), 1) ) fc = torch.mul(f, child_c) c = torch.mul(i, u) + torch.sum(fc, dim=0, keepdim=True) h = torch.mul(o, F.tanh(c)) return c, h
def forward(self, input, source_hids): # input: bsz x input_embed_dim # source_hids: srclen x bsz x output_embed_dim # x: bsz x output_embed_dim x = self.input_proj(input) # compute attention attn_scores = (source_hids * x.unsqueeze(0)).sum(dim=2) attn_scores = F.softmax(attn_scores.t()).t() # srclen x bsz # sum weighted sources x = (attn_scores.unsqueeze(2) * source_hids).sum(dim=0) x = F.tanh(self.output_proj(torch.cat((x, input), dim=1))) return x, attn_scores
def forward(self, x): en0 = self.c0(x) en1 = self.bnc1(self.c1(F.leaky_relu(en0, negative_slope=0.2))) en2 = self.bnc2(self.c2(F.leaky_relu(en1, negative_slope=0.2))) en3 = self.bnc3(self.c3(F.leaky_relu(en2, negative_slope=0.2))) en4 = self.bnc4(self.c4(F.leaky_relu(en3, negative_slope=0.2))) en5 = self.bnc5(self.c5(F.leaky_relu(en4, negative_slope=0.2))) en6 = self.bnc6(self.c6(F.leaky_relu(en5, negative_slope=0.2))) en7 = self.c7(F.leaky_relu(en6, negative_slope=0.2)) de7 = self.bnd7(self.d7(F.relu(en7))) de6 = F.dropout(self.bnd6(self.d6(F.relu(torch.cat((en6, de7),1))))) de5 = F.dropout(self.bnd5(self.d5(F.relu(torch.cat((en5, de6),1))))) de4 = F.dropout(self.bnd4(self.d4(F.relu(torch.cat((en4, de5),1))))) de3 = self.bnd3(self.d3(F.relu(torch.cat((en3, de4),1)))) de2 = self.bnd2(self.d2(F.relu(torch.cat((en2, de3),1)))) de1 = self.bnd1(self.d1(F.relu(torch.cat((en1, de2),1)))) de0 = F.tanh(self.d0(F.relu(torch.cat((en0, de1),1)))) return de0
def forward(self, inputs, states, masks): x = self.v_fc1(inputs) x = F.tanh(x) x = self.v_fc2(x) x = F.tanh(x) x = self.v_fc3(x) value = x x = self.a_fc1(inputs) x = F.tanh(x) x = self.a_fc2(x) x = F.tanh(x) return value, x, states
def forward(self, x): x = self.embed(x) # (N,W,D) x = self.dropout_embed(x) x = x.unsqueeze(1) # (N,Ci,W,D) if self.args.batch_normalizations is True: x = [self.convs1_bn(F.tanh(conv(x))).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks) else: # x = [self.dropout(F.relu(conv(x)).squeeze(3)) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) # x = [self.dropout(F.tanh(conv(x)).squeeze(3)) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) # x = [F.tanh(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks) x = torch.cat(x, 1) x = self.dropout(x) # (N,len(Ks)*Co) if self.args.batch_normalizations is True: x = self.fc1_bn(self.fc1(x)) logit = self.fc2_bn(self.fc2(F.tanh(x))) else: logit = self.fc(x) return logit
def forward(self, input): self.hidden = self.init_hidden(self.num_layers, input.size(1)) embed = self.embed(input) input = embed.view(len(input), embed.size(1), -1) # lstm # print(input) # print("a", self.hidden) lstm_out, hidden = self.gru(input, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) # pooling lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) lstm_out = F.tanh(lstm_out) # linear y = self.hidden2label(lstm_out) logit = y return logit
def forward(self, x): x = self.embed(x) x = self.dropout_embed(x) # x = x.view(len(x), x.size(1), -1) # x = embed.view(len(x), embed.size(1), -1) bilstm_out, self.hidden = self.bilstm(x, self.hidden) # print(self.hidden) bilstm_out = torch.transpose(bilstm_out, 0, 1) bilstm_out = torch.transpose(bilstm_out, 1, 2) bilstm_out = F.tanh(bilstm_out) bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2) bilstm_out = F.tanh(bilstm_out) # bilstm_out = self.dropout(bilstm_out) # bilstm_out = self.hidden2label1(bilstm_out) # logit = self.hidden2label2(F.tanh(bilstm_out)) logit = self.hidden2label(bilstm_out) return logit
def forward(self, x): embed = self.embed(x) # CNN cnn_x = embed cnn_x = self.dropout(cnn_x) cnn_x = cnn_x.unsqueeze(1) cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 0) cnn_x = torch.transpose(cnn_x, 1, 2) # LSTM lstm_out, self.hidden = self.lstm(cnn_x, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) # linear cnn_lstm_out = self.hidden2label1(F.tanh(lstm_out)) cnn_lstm_out = self.hidden2label2(F.tanh(cnn_lstm_out)) # output logit = cnn_lstm_out return logit
def forward(self, x): embed = self.embed(x) # CNN embed = self.dropout(embed) cnn_x = embed cnn_x = cnn_x.unsqueeze(1) cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 0) cnn_x = torch.transpose(cnn_x, 1, 2) # BiLSTM bilstm_out, self.hidden = self.bilstm(cnn_x, self.hidden) bilstm_out = torch.transpose(bilstm_out, 0, 1) bilstm_out = torch.transpose(bilstm_out, 1, 2) bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2) # linear cnn_bilstm_out = self.hidden2label1(F.tanh(bilstm_out)) cnn_bilstm_out = self.hidden2label2(F.tanh(cnn_bilstm_out)) # dropout logit = self.dropout(cnn_bilstm_out) return logit
def forward(self, input): embed = self.embed(input) embed = self.dropout(embed) # add this reduce the acc input = embed.view(len(input), embed.size(1), -1) # gru gru_out, hidden = self.bigru(input, self.hidden) gru_out = torch.transpose(gru_out, 0, 1) gru_out = torch.transpose(gru_out, 1, 2) # pooling # gru_out = F.tanh(gru_out) gru_out = F.max_pool1d(gru_out, gru_out.size(2)).squeeze(2) gru_out = F.tanh(gru_out) # linear y = self.hidden2label(gru_out) logit = y return logit
def forward(self, x): embed = self.embed(x) # CNN cnn_x = embed cnn_x = self.dropout(cnn_x) cnn_x = cnn_x.unsqueeze(1) cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 0) cnn_x = torch.transpose(cnn_x, 1, 2) # GRU lstm_out, self.hidden = self.gru(cnn_x, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) # linear cnn_lstm_out = self.hidden2label1(F.tanh(lstm_out)) cnn_lstm_out = self.hidden2label2(F.tanh(cnn_lstm_out)) # output logit = cnn_lstm_out return logit
def forward(self, v, u): """ Input: - v: N x D x H x W - u: N x D Returns: - next_u: N x D """ N, K = v.size(0), self.hidden_dim D, H, W = v.size(1), v.size(2), v.size(3) v_proj = self.Wv(v) # N x K x H x W u_proj = self.Wu(u) # N x K u_proj_expand = u_proj.view(N, K, 1, 1).expand(N, K, H, W) h = F.tanh(v_proj + u_proj_expand) p = F.softmax(self.Wp(h).view(N, H * W)).view(N, 1, H, W) self.attention_maps = p.data.clone() v_tilde = (p.expand_as(v) * v).sum(2).sum(3).view(N, D) next_u = u + v_tilde return next_u
def _combine_last(self, r, h_t): ''' inputs: r : batch x n_dim h_t : batch x n_dim (this is the output from the gru unit) params : W_x : n_dim x n_dim W_p : n_dim x n_dim out : h_star : batch x n_dim ''' W_p_r = torch.mm(r, self.W_p) # batch x n_dim W_x_h = torch.mm(h_t, self.W_x) # batch x n_dim h_star = F.tanh(W_p_r + W_x_h) # batch x n_dim return h_star
def KrauseLSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None): # Terminology matchup: # - This implementation uses the trick of having all gates concatenated # together into a single tensor, so you can do one matrix multiply to # compute all the gates. # - Thus, w_ih holds W_hx, W_ix, W_ox, W_fx # and w_hh holds W_hh, W_ih, W_oh, W_fh # - Notice that the indices are swapped, because F.linear has swapped # arguments. "Cancelling" indices are always next to each other. hx, cx = hidden gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh) ingate, forgetgate, hiddengate, outgate = gates.chunk(4, 1) ingate = F.sigmoid(ingate) outgate = F.sigmoid(outgate) forgetgate = F.sigmoid(forgetgate) cy = (forgetgate * cx) + (ingate * hiddengate) hy = F.tanh(cy * outgate) return hy, cy
def MultiplicativeLSTMCell(input, hidden, w_xm, w_hm, w_ih, w_mh, b_xm=None, b_hm=None, b_ih=None, b_mh=None): # w_ih holds W_hx, W_ix, W_ox, W_fx # w_mh holds W_hm, W_im, W_om, W_fm hx, cx = hidden # Key difference: m = F.linear(input, w_xm, b_xm) * F.linear(hx, w_hm, b_hm) gates = F.linear(input, w_ih, b_ih) + F.linear(m, w_mh, b_mh) ingate, forgetgate, hiddengate, outgate = gates.chunk(4, 1) ingate = F.sigmoid(ingate) outgate = F.sigmoid(outgate) forgetgate = F.sigmoid(forgetgate) cy = (forgetgate * cx) + (ingate * hiddengate) hy = F.tanh(cy * outgate) return hy, cy
def forward(self, x): upblock = True # Downsizing layer - Large Kernel ensures large receptive field on the residual blocks h = F.relu(self.b2(self.c1(x))) # Residual Layers for r in self.rs: h = r(h) # will go through all residual blocks in this loop if upblock: # Upsampling Layers - improvement suggested by [2] to remove "checkerboard pattern" for u in self.up: h = u(h) # will go through all upsampling blocks in this loop else: # As recommended by [1] h = F.relu(self.bc2(self.dc2(h))) h = F.relu(self.bc3(self.dc3(h))) # Last layer and scaled tanh activation - Scaled from 0 to 1 instead of 0 - 255 h = F.tanh(self.c3(h)) h = torch.add(h, 1.) h = torch.mul(h, 0.5) return h
def forward(self, input, context): """Propogate input through the network. input: batch x dim context: batch x sourceL x dim """ target = self.linear_in(input).unsqueeze(2) # batch x dim x 1 # Get attention attn = torch.bmm(context, target).squeeze(2) # batch x sourceL attn = self.sm(attn) attn3 = attn.view(attn.size(0), 1, attn.size(1)) # batch x 1 x sourceL weighted_context = torch.bmm(attn3, context).squeeze(1) # batch x dim h_tilde = torch.cat((weighted_context, input), 1) h_tilde = self.tanh(self.linear_out(h_tilde)) return h_tilde, attn
def decoder(self, z, sc_feat32, sc_feat16, sc_feat8, sc_feat4): x = z.view(-1, self.hidden_size, 1, 1) x = self.dec_upsamp1(x) x = torch.cat([x, sc_feat4], 1) x = F.relu(self.dec_conv1(x)) x = self.dec_bn1(x) x = self.dec_upsamp2(x) x = torch.cat([x, sc_feat8], 1) x = F.relu(self.dec_conv2(x)) x = self.dec_bn2(x) x = self.dec_upsamp3(x) x = torch.cat([x, sc_feat16], 1) x = F.relu(self.dec_conv3(x)) x = self.dec_bn3(x) x = self.dec_upsamp4(x) x = torch.cat([x, sc_feat32], 1) x = F.relu(self.dec_conv4(x)) x = self.dec_bn4(x) x = self.dec_upsamp5(x) x = F.tanh(self.dec_conv5(x)) return x #define forward pass
def calc_score(self, att_query, att_keys): """ att_query is: b x t_q x n att_keys is b x t_k x n return b x t_q x t_k scores """ b, t_k, n = list(att_keys.size()) t_q = att_query.size(1) if self.mode == 'bahdanau': att_query = att_query.unsqueeze(2).expand(b, t_q, t_k, n) att_keys = att_keys.unsqueeze(1).expand(b, t_q, t_k, n) sum_qk = att_query + att_keys sum_qk = sum_qk.view(b * t_k * t_q, n) out = self.linear_att(F.tanh(sum_qk)).view(b, t_q, t_k) elif self.mode == 'dot_prod': out = torch.bmm(att_query, att_keys.transpose(1, 2)) if self.normalize: out.div_(n ** 0.5) return out
def forward(self, prior): prior = prior.cuda() fc_layer = leaky_relu(self.linear1(prior).view(-1, 512, 4, 4), negative_slope = 0.2) deconv_layer1 = self.bn1(leaky_relu(self.deconv1(fc_layer), negative_slope = 0.2)) deconv_layer2 = self.bn2(leaky_relu(self.deconv2(deconv_layer1), negative_slope = 0.2)) deconv_layer3 = tanh(self.deconv3(deconv_layer2)) return deconv_layer3 # Infer without batch normalization cannot improve image quality # def infer(self, prior): # prior = prior.cuda() # fc_layer = leaky_relu(self.linear1(prior).view(-1, 512, 4, 4), negative_slope = 0.2) # deconv_layer1 = leaky_relu(self.deconv1(fc_layer), negative_slope = 0.2) # deconv_layer2 = leaky_relu(self.deconv2(deconv_layer1), negative_slope = 0.2) # deconv_layer3 = tanh(self.deconv3(deconv_layer2)) # return deconv_layer3
def forward(self, inp): #if inp.dim() > 2: # inp = inp.permute(0, 2, 1) #inp = inp.contiguous().view(-1, self.L) if not (type(inp) == Variable): inp = Variable(inp[0]) if hasattr(self.arguments, 'pack_num'): N = inp.size(0) Ncut = int(N/self.arguments.pack_num) split = torch.split(inp, Ncut, dim=0) inp = torch.cat(split, dim=1) h1 = F.tanh((self.l1(inp))) #h2 = F.tanh(self.l2_bn(self.l2(h1))) if self.arguments.tr_method == 'adversarial_wasserstein': output = (self.l3(h1)) else: output = F.sigmoid(self.l3(h1)) return output, h1
def __init__(self, ngpu, **kwargs): super(netG_images, self).__init__() self.ngpu = ngpu pl = 0 self.L1 = kwargs['L1'] self.L2 = kwargs['L2'] self.K = kwargs['K'] self.arguments = kwargs['arguments'] self.l1 = nn.Linear(self.L1, self.K+pl, bias=True) initializationhelper(self.l1, 'tanh') self.l2 = nn.Linear(self.K+pl, self.L2, bias=True) initializationhelper(self.l2, 'relu') self.smooth_output = self.arguments.smooth_output if self.smooth_output: self.sml = nn.Conv2d(1, 1, 5, padding=2) initializationhelper(self.sml, 'relu')
def __init__(self, ngpu, **kwargs): super(netD_images, self).__init__() self.ngpu = ngpu self.L = kwargs['L'] self.K = kwargs['K'] self.arguments = kwargs['arguments'] self.l1 = nn.Linear(self.L, self.K, bias=True) initializationhelper(self.l1, 'tanh') self.l1_bn = nn.BatchNorm1d(self.K) self.l2 = nn.Linear(self.K, self.K, bias=True) initializationhelper(self.l2, 'relu') #self.l2_bn = nn.BatchNorm1d(self.K) self.l3 = nn.Linear(self.K, 1, bias=True) initializationhelper(self.l3, 'relu')
def _transform_decoder_init_state(self, hn): if isinstance(hn, tuple): hn, cn = hn # hn [2 * num_layers, batch, hidden_size] num_dir, batch, hidden_size = cn.size() # first convert cn t0 [batch, 2 * num_layers, hidden_size] cn = cn.transpose(0, 1).contiguous() # then view to [batch, num_layers, 2 * hidden_size] --> [num_layer, batch, 2 * num_layers] cn = cn.view(batch, num_dir / 2, 2 * hidden_size).transpose(0, 1) # take hx_dense to [num_layers, batch, hidden_size] cn = self.hx_dense(cn) # hn is tanh(cn) hn = F.tanh(cn) hn = (hn, cn) else: # hn [2 * num_layers, batch, hidden_size] num_dir, batch, hidden_size = hn.size() # first convert hn t0 [batch, 2 * num_layers, hidden_size] hn = hn.transpose(0, 1).contiguous() # then view to [batch, num_layers, 2 * hidden_size] --> [num_layer, batch, 2 * num_layers] hn = hn.view(batch, num_dir / 2, 2 * hidden_size).transpose(0, 1) # take hx_dense to [num_layers, batch, hidden_size] hn = F.tanh(self.hx_dense(hn)) return hn
def SkipConnectGRUCell(input, hidden, hidden_skip, w_ih, w_hh, b_ih=None, b_hh=None, noise_in=None, noise_hidden=None): input = input.expand(3, *input.size()) if noise_in is None else input.unsqueeze(0) * noise_in hx = torch.cat([hidden, hidden_skip], dim=1) hx = hx.expand(3, *hx.size()) if noise_hidden is None else hx.unsqueeze(0) * noise_hidden gi = torch.baddbmm(b_ih.unsqueeze(1), input, w_ih) gh = torch.baddbmm(b_hh.unsqueeze(1), hx, w_hh) i_r, i_i, i_n = gi h_r, h_i, h_n = gh resetgate = F.sigmoid(i_r + h_r) inputgate = F.sigmoid(i_i + h_i) newgate = F.tanh(i_n + resetgate * h_n) hy = newgate + inputgate * (hidden - newgate) return hy
def SkipConnectFastGRUCell(input, hidden, hidden_skip, w_ih, w_hh, b_ih=None, b_hh=None, noise_in=None, noise_hidden=None): if noise_in is not None: input = input * noise_in hx = torch.cat([hidden, hidden_skip], dim=1) if noise_hidden is not None: hx = hx * noise_hidden if input.is_cuda: gi = F.linear(input, w_ih) gh = F.linear(hx, w_hh) state = fusedBackend.GRUFused() return state(gi, gh, hidden) if b_ih is None else state(gi, gh, hidden, b_ih, b_hh) gi = F.linear(input, w_ih, b_ih) gh = F.linear(hx, w_hh, b_hh) i_r, i_i, i_n = gi.chunk(3, 1) h_r, h_i, h_n = gh.chunk(3, 1) resetgate = F.sigmoid(i_r + h_r) inputgate = F.sigmoid(i_i + h_i) newgate = F.tanh(i_n + resetgate * h_n) hy = newgate + inputgate * (hidden - newgate) return hy
def VarLSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None, noise_in=None, noise_hidden=None): input = input.expand(4, *input.size()) if noise_in is None else input.unsqueeze(0) * noise_in hx, cx = hidden hx = hx.expand(4, *hx.size()) if noise_hidden is None else hx.unsqueeze(0) * noise_hidden gates = torch.baddbmm(b_ih.unsqueeze(1), input, w_ih) + torch.baddbmm(b_hh.unsqueeze(1), hx, w_hh) ingate, forgetgate, cellgate, outgate = gates ingate = F.sigmoid(ingate) forgetgate = F.sigmoid(forgetgate) cellgate = F.tanh(cellgate) outgate = F.sigmoid(outgate) cy = (forgetgate * cx) + (ingate * cellgate) hy = outgate * F.tanh(cy) return hy, cy
def VarFastGRUCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None, noise_in=None, noise_hidden=None): if noise_in is not None: input = input * noise_in hx = hidden if noise_hidden is None else hidden * noise_hidden if input.is_cuda: gi = F.linear(input, w_ih) gh = F.linear(hx, w_hh) state = fusedBackend.GRUFused() return state(gi, gh, hidden) if b_ih is None else state(gi, gh, hidden, b_ih, b_hh) gi = F.linear(input, w_ih, b_ih) gh = F.linear(hx, w_hh, b_hh) i_r, i_i, i_n = gi.chunk(3, 1) h_r, h_i, h_n = gh.chunk(3, 1) resetgate = F.sigmoid(i_r + h_r) inputgate = F.sigmoid(i_i + h_i) newgate = F.tanh(i_n + resetgate * h_n) hy = newgate + inputgate * (hidden - newgate) return hy
def forward(self, output, context): batch_size = output.size(0) hidden_size = output.size(2) input_size = context.size(1) # (batch, out_len, dim) * (batch, in_len, dim) -> (batch, out_len, in_len) attn = torch.bmm(output, context.transpose(1, 2)) if self.mask is not None: attn.data.masked_fill_(self.mask, -float('inf')) attn = F.softmax(attn.view(-1, input_size)).view(batch_size, -1, input_size) # (batch, out_len, in_len) * (batch, in_len, dim) -> (batch, out_len, dim) mix = torch.bmm(attn, context) # concat -> (batch, out_len, 2*dim) combined = torch.cat((mix, output), dim=2) # output -> (batch, out_len, dim) output = F.tanh(self.linear_out(combined.view(-1, 2 * hidden_size))).view(batch_size, -1, hidden_size) return output, attn
def forward(self, xt, state): all_input_sums = self.i2h(xt) + self.h2h(state[0][-1]) sigmoid_chunk = all_input_sums.narrow(1, 0, 3 * self.rnn_size) sigmoid_chunk = F.sigmoid(sigmoid_chunk) in_gate = sigmoid_chunk.narrow(1, 0, self.rnn_size) forget_gate = sigmoid_chunk.narrow(1, self.rnn_size, self.rnn_size) out_gate = sigmoid_chunk.narrow(1, self.rnn_size * 2, self.rnn_size) in_transform = torch.max(\ all_input_sums.narrow(1, 3 * self.rnn_size, self.rnn_size), all_input_sums.narrow(1, 4 * self.rnn_size, self.rnn_size)) next_c = forget_gate * state[1][-1] + in_gate * in_transform next_h = out_gate * F.tanh(next_c) next_h = self.dropout(next_h) output = next_h state = (next_h.unsqueeze(0), next_c.unsqueeze(0)) return output, state
def forward(self, h, att_feats, p_att_feats): # The p_att_feats here is already projected att_size = att_feats.numel() // att_feats.size(0) // self.rnn_size att = p_att_feats.view(-1, att_size, self.att_hid_size) att_h = self.h2att(h) # batch * att_hid_size att_h = att_h.unsqueeze(1).expand_as(att) # batch * att_size * att_hid_size dot = att + att_h # batch * att_size * att_hid_size dot = F.tanh(dot) # batch * att_size * att_hid_size dot = dot.view(-1, self.att_hid_size) # (batch * att_size) * att_hid_size dot = self.alpha_net(dot) # (batch * att_size) * 1 dot = dot.view(-1, att_size) # batch * att_size weight = F.softmax(dot) # batch * att_size att_feats_ = att_feats.view(-1, att_size, self.rnn_size) # batch * att_size * att_feat_size att_res = torch.bmm(weight.unsqueeze(1), att_feats_).squeeze(1) # batch * att_feat_size return att_res
def node_forward(self, inputs, child_c, child_h): child_h_sum = F.torch.sum(torch.squeeze(child_h, 1), 0) i = F.sigmoid(self.ix(inputs) + self.ih(child_h_sum)) o = F.sigmoid(self.ox(inputs) + self.oh(child_h_sum)) u = F.tanh(self.ux(inputs) + self.uh(child_h_sum)) # add extra singleton dimension fx = F.torch.unsqueeze(self.fx(inputs), 1) f = F.torch.cat([self.fh(child_hi) + fx for child_hi in child_h], 0) f = F.sigmoid(f) # removing extra singleton dimension f = F.torch.unsqueeze(f, 1) fc = F.torch.squeeze(F.torch.mul(f, child_c), 1) c = F.torch.mul(i, u) + F.torch.sum(fc, 0) h = F.torch.mul(o, F.tanh(c)) return c, h
def __init__(self, n_in, n_out, batchnorm=False, preactivation=True, gate_style='add_split', kernel_size=7): super(SMASHLayer, self).__init__() self.n_out = n_out self.n_in = n_in self.batchnorm = batchnorm self.preactivation = preactivation self.gate_style = gate_style ''' may want to make n_in and n_out more dynamic here''' self.op = nn.ModuleList([SMASHseq(n_in=n_in if not i%2 else n_out, n_out=n_out, dilation=1, batchnorm=self.batchnorm, preactivation=self.preactivation, kernel_size=kernel_size) for i in range(4)]) # Op represents the op definition, gate whether to use tanh-sig mult gates, # dilation the individual dilation factors, and NL the particular # activation to use at each ungated conv. # Groups is currently unactivated, we'd need to make sure we slice differently # if using variable group.
def forward(self, query, ref): """ Args: query: is the hidden state of the decoder at the current time step. batch x dim ref: the set of hidden states from the encoder. sourceL x batch x hidden_dim """ # ref is now [batch_size x hidden_dim x sourceL] ref = ref.permute(1, 2, 0) q = self.project_query(query).unsqueeze(2) # batch x dim x 1 e = self.project_ref(ref) # batch_size x hidden_dim x sourceL # expand the query by sourceL # batch x dim x sourceL expanded_q = q.repeat(1, 1, e.size(2)) # batch x 1 x hidden_dim v_view = self.v.unsqueeze(0).expand( expanded_q.size(0), len(self.v)).unsqueeze(1) # [batch_size x 1 x hidden_dim] * [batch_size x hidden_dim x sourceL] u = torch.bmm(v_view, self.tanh(expanded_q + e)).squeeze(1) if self.use_tanh: logits = self.C * self.tanh(u) else: logits = u return e, logits
def _step(self, H_t, T_t, C_t, h0, h_mask, t_mask, c_mask): s_lm1, rnns = h0, [self.rnn_h, self.rnn_t, self.rnn_c] for l, (rnn_h, rnn_t, rnn_c) in enumerate(zip(*rnns)): s_lm1_H = h_mask.expand_as(s_lm1) * s_lm1 s_lm1_T = t_mask.expand_as(s_lm1) * s_lm1 s_lm1_C = c_mask.expand_as(s_lm1) * s_lm1 if l == 0: H_t = F.tanh(H_t + rnn_h(s_lm1_H)) T_t = F.sigmoid(T_t + rnn_t(s_lm1_T)) C_t = F.sigmoid(C_t + rnn_t(s_lm1_C)) else: H_t = F.tanh(rnn_h(s_lm1_H)) T_t = F.sigmoid(rnn_t(s_lm1_T)) C_t = F.sigmoid(rnn_t(s_lm1_C)) s_l = H_t * T_t + s_lm1 * C_t s_lm1 = s_l return s_l
def forward(self, dec_out, enc_outs, enc_att=None, mask=None): """ Parameters: ----------- - dec_out: torch.Tensor(batch_size x hid_dim) - enc_outs: torch.Tensor(seq_len x batch_size x hid_dim) - enc_att: (optional), torch.Tensor(seq_len x batch_size x att_dim) - mask: (optional), torch.ByteTensor(batch_size x seq_len) """ # (batch x seq_len) weights = self.scorer(dec_out, enc_outs, enc_att=enc_att) if mask is not None: # weights = weights * mask.float() weights.data.masked_fill_(1 - mask.data, -float('inf')) weights = F.softmax(weights, dim=1) # (eq 7) context = weights.unsqueeze(1).bmm(enc_outs.transpose(0, 1)).squeeze(1) # (eq 5) linear out combining context and hidden context = F.tanh(self.linear_out(torch.cat([context, dec_out], 1))) return context, weights
def forward(self, x): """ A model for non-linear data that works off of mixing multiple Gaussian distributions together. Uses linear projections of a given input to generate a set of N Gaussian models' mixture components, means and standard deviations. :param x: (num. samples, input dim.) :return: Mixture components, means, and standard deviations in the form (num. samples, num. mixtures) """ x = F.tanh(self.projection(x)) weights = F.softmax(self.weights_projection(x)) means = self.mean_projection(x) stds = torch.exp(self.std_projection(x)) return weights, means, stds
def forward(self, x): """ Conditional Image Generation with PixelCNN Decoders http://arxiv.org/abs/1606.05328 1D gated activation unit that models the forget gates and real gates of an activation unit using convolutions. :param x: (batch size, # channels, height) :return: tanh(conv(Wr, x)) * sigmoid(conv(Wf, x)) """ real_gate_weights, forget_gate_weights = self.weights.split(self.kernel_size, dim=2) real_gate_weights = real_gate_weights.contiguous() forget_gate_weights = forget_gate_weights.contiguous() real_gate = F.tanh(F.conv1d(input=x, weight=real_gate_weights, stride=1)) forget_gate = F.sigmoid(F.conv1d(input=x, weight=forget_gate_weights, stride=1)) return real_gate * forget_gate
def forward(self, *hidden_states): if len(hidden_states) == 1: hidden_state = hidden_states[0] return F.softmax(F.tanh(self.projection(hidden_state))) * hidden_state elif len(hidden_states) == 2: left_hidden_state, right_hidden_state = hidden_states if self.mode == 0 or self.mode == 1: if self.mode == 0: left_attention_weights = F.softmax(F.tanh(self.projection(left_hidden_state))) right_attention_weights = F.softmax(F.tanh(self.projection(right_hidden_state))) elif self.mode == 1: left_attention_weights = F.softmax(F.tanh(self.left_projection(left_hidden_state))) right_attention_weights = F.softmax(F.tanh(self.right_projection(right_hidden_state))) return left_attention_weights * left_hidden_state, right_attention_weights * right_hidden_state elif self.mode == 2: hidden_state = torch.cat([left_hidden_state, right_hidden_state], dim=1) attention_weights = F.softmax(F.tanh(self.projection(hidden_state))) return attention_weights * left_hidden_state, attention_weights * right_hidden_state
def forward(self, last_state, states, mask=None): sequence_length, batch_size, hidden_dim = states.size() last_state = last_state.unsqueeze(0).expand(sequence_length, batch_size, last_state.size(1)) if self.mode == "dot": energies = last_state * states energies = energies.sum(dim=2).squeeze() elif self.mode == "general": expanded_projection = self.projection.expand(sequence_length, *self.projection.size()) energies = last_state * states.bmm(expanded_projection) energies = energies.sum(dim=2).squeeze() elif self.mode == "concat": expanded_reduction = self.reduction.expand(sequence_length, *self.reduction.size()) expanded_projection = self.projection.expand(sequence_length, *self.projection.size()) energies = F.tanh(torch.cat([last_state, states], dim=2).bmm(expanded_reduction)) energies = energies.bmm(expanded_projection).squeeze() if type(mask) == torch.autograd.Variable: energies = energies + ((mask == 0).float() * -10000) attention_weights = F.softmax(energies) return attention_weights
def forward(self, input, hx): h, c = hx pre = F.linear(input, self.weight_ih, self.bias) \ + F.linear(h, self.weight_hh) pre = sparsify_grad(pre, self.k, self.simplified) if self.grad_clip: pre = clip_grad(pre, -self.grad_clip, self.grad_clip) i = F.sigmoid(pre[:, :self.hidden_size]) f = F.sigmoid(pre[:, self.hidden_size: self.hidden_size * 2]) g = F.tanh(pre[:, self.hidden_size * 2: self.hidden_size * 3]) o = F.sigmoid(pre[:, self.hidden_size * 3:]) c = f * c + i * g h = o * F.tanh(c) return h, c