我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.transpose()。
def forward(self, inp, hidden): outp = self.bilstm.forward(inp, hidden)[0] size = outp.size() # [bsz, len, nhid] compressed_embeddings = outp.view(-1, size[2]) # [bsz*len, nhid*2] transformed_inp = torch.transpose(inp, 0, 1).contiguous() # [bsz, len] transformed_inp = transformed_inp.view(size[0], 1, size[1]) # [bsz, 1, len] concatenated_inp = [transformed_inp for i in range(self.attention_hops)] concatenated_inp = torch.cat(concatenated_inp, 1) # [bsz, hop, len] hbar = self.tanh(self.ws1(self.drop(compressed_embeddings))) # [bsz*len, attention-unit] alphas = self.ws2(hbar).view(size[0], size[1], -1) # [bsz, len, hop] alphas = torch.transpose(alphas, 1, 2).contiguous() # [bsz, hop, len] penalized_alphas = alphas + ( -10000 * (concatenated_inp == self.dictionary.word2idx['<pad>']).float()) # [bsz, hop, len] + [bsz, hop, len] alphas = self.softmax(penalized_alphas.view(-1, size[1])) # [bsz*hop, len] alphas = alphas.view(size[0], self.attention_hops, size[1]) # [bsz, hop, len] return torch.bmm(alphas, outp), alphas
def forward(self, input): self.hidden = self.init_hidden(self.num_layers, input.size(1)) embed = self.embed(input) input = embed.view(len(input), embed.size(1), -1) # lstm # print(input) # print("a", self.hidden) lstm_out, hidden = self.gru(input, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) # pooling lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) lstm_out = F.tanh(lstm_out) # linear y = self.hidden2label(lstm_out) logit = y return logit
def forward(self, x): x = self.embed(x) x = self.dropout_embed(x) # x = x.view(len(x), x.size(1), -1) # x = embed.view(len(x), embed.size(1), -1) bilstm_out, self.hidden = self.bilstm(x, self.hidden) # print(self.hidden) bilstm_out = torch.transpose(bilstm_out, 0, 1) bilstm_out = torch.transpose(bilstm_out, 1, 2) bilstm_out = F.tanh(bilstm_out) bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2) bilstm_out = F.tanh(bilstm_out) # bilstm_out = self.dropout(bilstm_out) # bilstm_out = self.hidden2label1(bilstm_out) # logit = self.hidden2label2(F.tanh(bilstm_out)) logit = self.hidden2label(bilstm_out) return logit
def forward(self, x): embed = self.embed(x) # CNN cnn_x = embed cnn_x = self.dropout(cnn_x) cnn_x = cnn_x.unsqueeze(1) cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 0) cnn_x = torch.transpose(cnn_x, 1, 2) # LSTM lstm_out, self.hidden = self.lstm(cnn_x, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) # linear cnn_lstm_out = self.hidden2label1(F.tanh(lstm_out)) cnn_lstm_out = self.hidden2label2(F.tanh(cnn_lstm_out)) # output logit = cnn_lstm_out return logit
def forward(self, x): embed = self.embed(x) # CNN embed = self.dropout(embed) cnn_x = embed cnn_x = cnn_x.unsqueeze(1) cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 0) cnn_x = torch.transpose(cnn_x, 1, 2) # BiLSTM bilstm_out, self.hidden = self.bilstm(cnn_x, self.hidden) bilstm_out = torch.transpose(bilstm_out, 0, 1) bilstm_out = torch.transpose(bilstm_out, 1, 2) bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2) # linear cnn_bilstm_out = self.hidden2label1(F.tanh(bilstm_out)) cnn_bilstm_out = self.hidden2label2(F.tanh(cnn_bilstm_out)) # dropout logit = self.dropout(cnn_bilstm_out) return logit
def forward(self, input): embed = self.embed(input) embed = self.dropout(embed) # add this reduce the acc input = embed.view(len(input), embed.size(1), -1) # gru gru_out, hidden = self.bigru(input, self.hidden) gru_out = torch.transpose(gru_out, 0, 1) gru_out = torch.transpose(gru_out, 1, 2) # pooling # gru_out = F.tanh(gru_out) gru_out = F.max_pool1d(gru_out, gru_out.size(2)).squeeze(2) gru_out = F.tanh(gru_out) # linear y = self.hidden2label(gru_out) logit = y return logit
def forward(self, x): one_layer = self.embed(x) # (N,W,D) # torch.Size([64, 43, 300]) # one_layer = self.dropout(one_layer) one_layer = one_layer.unsqueeze(1) # (N,Ci,W,D) # torch.Size([64, 1, 43, 300]) # one layer one_layer = [torch.transpose(F.relu(conv(one_layer)).squeeze(3), 1, 2) for conv in self.convs1] # torch.Size([64, 100, 36]) # two layer two_layer = [F.relu(conv(one_layer.unsqueeze(1))).squeeze(3) for (conv, one_layer) in zip(self.convs2, one_layer)] print("two_layer {}".format(two_layer[0].size())) # pooling output = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in two_layer] # torch.Size([64, 100]) torch.Size([64, 100]) output = torch.cat(output, 1) # torch.Size([64, 300]) # dropout output = self.dropout(output) # linear output = self.fc1(F.relu(output)) logit = self.fc2(F.relu(output)) return logit
def forward(self, x): embed = self.embed(x) # CNN cnn_x = embed cnn_x = self.dropout(cnn_x) cnn_x = cnn_x.unsqueeze(1) cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 0) cnn_x = torch.transpose(cnn_x, 1, 2) # GRU lstm_out, self.hidden = self.gru(cnn_x, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) # linear cnn_lstm_out = self.hidden2label1(F.tanh(lstm_out)) cnn_lstm_out = self.hidden2label2(F.tanh(cnn_lstm_out)) # output logit = cnn_lstm_out return logit
def channel_shuffle(x, groups): batchsize, num_channels, height, width = x.data.size() channels_per_group = num_channels // groups # reshape x = x.view(batchsize, groups, channels_per_group, height, width) # transpose # - contiguous() required if transpose() is used before view(). # See https://github.com/pytorch/pytorch/issues/764 x = torch.transpose(x, 1, 2).contiguous() # flatten x = x.view(batchsize, -1, height, width) return x
def forward(self, input1): self.input1 = input1 output = torch.zeros(torch.Size([input1.size(0)]) + self.grid.size()) self.batchgrid = torch.zeros(torch.Size([input1.size(0)]) + self.grid.size()) for i in range(input1.size(0)): self.batchgrid[i] = self.grid if input1.is_cuda: self.batchgrid = self.batchgrid.cuda() output = output.cuda() batchgrid_temp = self.batchgrid.view(-1, self.height*self.width, 3) batchgrid_temp.contiguous() input_temp = torch.transpose(input1, 1, 2) input_temp.contiguous() output_temp = torch.bmm(batchgrid_temp, input_temp) output = output_temp.view(-1, self.height, self.width, 2) output.contiguous() return output
def backward(self, grad_output): grad_input1 = torch.zeros(self.input1.size()) if grad_output.is_cuda: self.batchgrid = self.batchgrid.cuda() grad_input1 = grad_input1.cuda() #print('gradout:',grad_output.size()) grad_output_temp = grad_output.contiguous() grad_output_view = grad_output_temp.view(-1, self.height*self.width, 2) grad_output_view.contiguous() grad_output_temp = torch.transpose(grad_output_view, 1, 2) grad_output_temp.contiguous() batchgrid_temp = self.batchgrid.view(-1, self.height*self.width, 3) batchgrid_temp.contiguous() grad_input1 = torch.baddbmm(grad_input1, grad_output_temp, batchgrid_temp) return grad_input1
def _viterbi_decode(self, feats): backpointers = [] init_alphas = torch.Tensor(self.tagset_size, 1).fill_(0.).type(self.dtype) forward_var = autograd.Variable(init_alphas).type(self.dtype) for ix,feat in enumerate(feats): if ix == 0: forward_var += feat.view(self.tagset_size, 1) + self.initial_weights else: viterbi_vars, viterbi_idx = torch.max(self.transitions + torch.transpose( forward_var.repeat(1, self.tagset_size), 0 ,1), 1) forward_var = feat.view(self.tagset_size,1) + viterbi_vars backpointers.append(viterbi_idx) terminal_var = forward_var + self.final_weights _ , best_tag_id = torch.max(terminal_var,0) best_tag_id = to_scalar(best_tag_id) path_score = terminal_var[best_tag_id] best_path = [best_tag_id] for bptrs_t in reversed(backpointers): best_tag_id = to_scalar(bptrs_t[best_tag_id]) best_path.append(best_tag_id) best_path.reverse() return path_score, best_path
def _viterbi_decode(self, feats): backpointers = [] init_vvars = torch.Tensor(self.tagset_size, 1).fill_(-10000.).type(self.dtype) init_vvars[self.tag_to_ix[self.START_TAG]][0] = 0 forward_var = autograd.Variable(init_vvars).type(self.dtype) for feat in feats: viterbi_vars, viterbi_idx = torch.max(self.transitions + torch.transpose(forward_var.expand(forward_var.size(0), self.tagset_size), 0, 1), 1) forward_var = feat.view(self.tagset_size, 1) + viterbi_vars backpointers.append(viterbi_idx) terminal_var = forward_var + self.transitions[self.tag_to_ix[self.STOP_TAG]].view(self.tagset_size, 1) _, best_tag_id = torch.max(terminal_var, 0, keepdim=True) best_tag_id = to_scalar(best_tag_id) path_score = terminal_var[best_tag_id] best_path = [best_tag_id] for bptrs_t in reversed(backpointers): best_tag_id = to_scalar(bptrs_t[best_tag_id]) best_path.append(best_tag_id) start = best_path.pop() assert start == self.tag_to_ix[self.START_TAG] # Sanity check best_path.reverse() return path_score, best_path
def forward(self, qu, w, cand): qu = Variable(qu) w = Variable(w) cand = Variable(cand) embed_q = self.embed_B(qu) embed_w1 = self.embed_A(w) embed_w2 = self.embed_C(w) embed_c = self.embed_C(cand) #pdb.set_trace() q_state = torch.sum(embed_q, 1).squeeze(1) w1_state = torch.sum(embed_w1, 1).squeeze(1) w2_state = torch.sum(embed_w2, 1).squeeze(1) for _ in range(self.config.hop): sent_dot = torch.mm(q_state, torch.transpose(w1_state, 0, 1)) sent_att = F.softmax(sent_dot) a_dot = torch.mm(sent_att, w2_state) a_dot = self.H(a_dot) q_state = torch.add(a_dot, q_state) f_feat = torch.mm(q_state, torch.transpose(embed_c, 0, 1)) score = F.log_softmax(f_feat) return score
def encode(self, x): x = x.unsqueeze(1) x = self.conv(x) # At this point x should have shape # (batch, channels, time, freq) x = torch.transpose(x, 1, 2).contiguous() # Reshape x to be (batch, time, freq * channels) # for the RNN b, t, f, c = x.size() x = x.view((b, t, f * c)) x, h = self.rnn(x) if self.rnn.bidirectional: half = x.size()[-1] // 2 x = x[:, :, :half] + x[:, :, half:] return x
def r_duvenaud(self, h): # layers aux = [] for l in range(len(h)): param_sz = self.learn_args[l].size() parameter_mat = torch.t(self.learn_args[l])[None, ...].expand(h[l].size(0), param_sz[1], param_sz[0]) aux.append(torch.transpose(torch.bmm(parameter_mat, torch.transpose(h[l], 1, 2)), 1, 2)) for j in range(0, aux[l].size(1)): # Mask whole 0 vectors aux[l][:, j, :] = nn.Softmax()(aux[l][:, j, :].clone())*(torch.sum(aux[l][:, j, :] != 0, 1) > 0).expand_as(aux[l][:, j, :]).type_as(aux[l]) aux = torch.sum(torch.sum(torch.stack(aux, 3), 3), 1) return self.learn_modules[0](torch.squeeze(aux))
def m_ggnn(self, h_v, h_w, e_vw, opt={}): m = Variable(torch.zeros(h_w.size(0), h_w.size(1), self.args['out']).type_as(h_w.data)) for w in range(h_w.size(1)): if torch.nonzero(e_vw[:, w, :].data).size(): for i, el in enumerate(self.args['e_label']): ind = (el == e_vw[:,w,:]).type_as(self.learn_args[0][i]) parameter_mat = self.learn_args[0][i][None, ...].expand(h_w.size(0), self.learn_args[0][i].size(0), self.learn_args[0][i].size(1)) m_w = torch.transpose(torch.bmm(torch.transpose(parameter_mat, 1, 2), torch.transpose(torch.unsqueeze(h_w[:, w, :], 1), 1, 2)), 1, 2) m_w = torch.squeeze(m_w) m[:,w,:] = ind.expand_as(m_w)*m_w return m
def forward(self, inp, hidden): emb = self.drop(self.encoder(inp)) outp = self.bilstm(emb, hidden)[0] if self.pooling == 'mean': outp = torch.mean(outp, 0).squeeze() elif self.pooling == 'max': outp = torch.max(outp, 0)[0].squeeze() elif self.pooling == 'all' or self.pooling == 'all-word': outp = torch.transpose(outp, 0, 1).contiguous() return outp, emb
def forward(self, input): # TODO perhaps add batch normalization or layer normalization x = F.elu(self.conv1(input)) x = F.elu(self.conv2(x)) x = F.elu(self.conv3(x)) # Next flatten the output to be batched into LSTM layers # The shape of x is batch_size, channels, height, width x = self.pre_lstm_bn(x) x = torch.transpose(x, 1, 3) x = torch.transpose(x, 1, 2) x = x.contiguous() x = x.view(x.size(0), self.batch, self.hidden_dim) x, hidden = self.lstm(x, (self.hidden_state, self.cell_state)) self.hidden_state, self.cell_state = hidden x = torch.transpose(x, 2, 1) x = x.contiguous() x = x.view(x.size(0), self.hidden_dim, self.height, self.width) x = self.lstm_batch_norm(x) x = F.elu(self.conv4(x)) x = F.elu(self.conv5(x)) o_begin = self.begin_conv(x) o_end = self.end_conv(x) o_begin = o_begin.view(o_begin.size(0), -1) o_end = o_end.view(o_end.size(0), -1) o_begin = F.log_softmax(o_begin) o_end = F.log_softmax(o_end) return o_begin, o_end
def forward(self, input): x = F.elu(self.conv1(input)) x = F.elu(self.conv2(x)) x = F.elu(self.conv3(x)) # Next flatten the output to be batched into LSTM layers # The shape of x is batch_size, channels, height, width x = self.pre_lstm_bn(x) x = torch.transpose(x, 1, 3) x = torch.transpose(x, 1, 2) x = x.contiguous() x = x.view(x.size(0), self.batch, self.hidden_dim) x, hidden = self.lstm(x, (self.hidden_state, self.cell_state)) self.hidden_state, self.cell_state = hidden x = torch.transpose(x, 2, 1) x = x.contiguous() x = x.view(x.size(0), self.hidden_dim, self.height, self.width) x = self.lstm_batch_norm(x) x = F.elu(self.conv4(x)) x = F.elu(self.conv5(x)) logit = self.move_conv(x) logit = logit.view(logit.size(0), -1) x = self.value_conv(x) x = x.view(x.size(0), self.hidden_dim, self.batch) x = F.max_pool1d(x, self.batch) x = x.squeeze() val = self.value_linear(x) return val, logit
def forward(self, tokens: torch.Tensor, mask: torch.Tensor): # pylint: disable=arguments-differ if mask is not None: tokens = tokens * mask.unsqueeze(-1).float() # Our input is expected to have shape `(batch_size, num_tokens, embedding_dim)`. The # convolution layers expect input of shape `(batch_size, in_channels, sequence_length)`, # where the conv layer `in_channels` is our `embedding_dim`. We thus need to transpose the # tensor first. tokens = torch.transpose(tokens, 1, 2) # Each convolution layer returns output of size `(batch_size, num_filters, pool_length)`, # where `pool_length = num_tokens - ngram_size + 1`. We then do an activation function, # then do max pooling over each filter for the whole input sequence. Because our max # pooling is simple, we just use `torch.max`. The resultant tensor of has shape # `(batch_size, num_conv_layers * num_filters)`, which then gets projected using the # projection layer, if requested. filter_outputs = [self._activation(convolution_layer(tokens)).max(dim=2)[0] for convolution_layer in self._convolution_layers] # Now we have a list of `num_conv_layers` tensors of shape `(batch_size, num_filters)`. # Concatenating them gives us a tensor of shape `(batch_size, num_filters * num_conv_layers)`. maxpool_output = torch.cat(filter_outputs, dim=1) if len(filter_outputs) > 1 else filter_outputs[0] if self.projection_layer: result = self.projection_layer(maxpool_output) else: result = maxpool_output return result
def _load_cnn_weights(self): cnn_options = self._options['char_cnn'] filters = cnn_options['filters'] char_embed_dim = cnn_options['embedding']['dim'] convolutions = [] for i, (width, num) in enumerate(filters): conv = torch.nn.Conv1d( in_channels=char_embed_dim, out_channels=num, kernel_size=width, bias=True ) # load the weights with h5py.File(cached_path(self._weight_file), 'r') as fin: weight = fin['CNN']['W_cnn_{}'.format(i)][...] bias = fin['CNN']['b_cnn_{}'.format(i)][...] w_reshaped = numpy.transpose(weight.squeeze(axis=0), axes=(2, 1, 0)) if w_reshaped.shape != tuple(conv.weight.data.shape): raise ValueError("Invalid weight file") conv.weight.data.copy_(torch.FloatTensor(w_reshaped)) conv.bias.data.copy_(torch.FloatTensor(bias)) conv.weight.requires_grad = False conv.bias.requires_grad = False convolutions.append(conv) self.add_module('char_conv_{}'.format(i), conv) self._convolutions = convolutions
def _load_highway(self): # pylint: disable=protected-access # the highway layers have same dimensionality as the number of cnn filters cnn_options = self._options['char_cnn'] filters = cnn_options['filters'] n_filters = sum(f[1] for f in filters) n_highway = cnn_options['n_highway'] # create the layers, and load the weights self._highways = Highway(n_filters, n_highway, activation=torch.nn.functional.relu) for k in range(n_highway): # The AllenNLP highway is one matrix multplication with concatenation of # transform and carry weights. with h5py.File(cached_path(self._weight_file), 'r') as fin: # The weights are transposed due to multiplication order assumptions in tf # vs pytorch (tf.matmul(X, W) vs pytorch.matmul(W, X)) w_transform = numpy.transpose(fin['CNN_high_{}'.format(k)]['W_transform'][...]) # -1.0 since AllenNLP is g * x + (1 - g) * f(x) but tf is (1 - g) * x + g * f(x) w_carry = -1.0 * numpy.transpose(fin['CNN_high_{}'.format(k)]['W_carry'][...]) weight = numpy.concatenate([w_transform, w_carry], axis=0) self._highways._layers[k].weight.data.copy_(torch.FloatTensor(weight)) self._highways._layers[k].weight.requires_grad = False b_transform = fin['CNN_high_{}'.format(k)]['b_transform'][...] b_carry = -1.0 * fin['CNN_high_{}'.format(k)]['b_carry'][...] bias = numpy.concatenate([b_transform, b_carry], axis=0) self._highways._layers[k].bias.data.copy_(torch.FloatTensor(bias)) self._highways._layers[k].bias.requires_grad = False
def _load_projection(self): cnn_options = self._options['char_cnn'] filters = cnn_options['filters'] n_filters = sum(f[1] for f in filters) self._projection = torch.nn.Linear(n_filters, self.output_dim, bias=True) with h5py.File(cached_path(self._weight_file), 'r') as fin: weight = fin['CNN_proj']['W_proj'][...] bias = fin['CNN_proj']['b_proj'][...] self._projection.weight.data.copy_(torch.FloatTensor(numpy.transpose(weight))) self._projection.bias.data.copy_(torch.FloatTensor(bias)) self._projection.weight.requires_grad = False self._projection.bias.requires_grad = False
def enumerate_support(self): """ Returns the categorical distribution's support, as a tensor along the first dimension. Note that this returns support values of all the batched RVs in lock-step, rather than the full cartesian product. To iterate over the cartesian product, you must construct univariate Categoricals and use itertools.product() over all univariate variables (but this is very expensive). :param ps: Tensor where the last dimension denotes the event probabilities, *p_k*, which must sum to 1. The remaining dimensions are considered batch dimensions. :type ps: torch.autograd.Variable :param vs: Optional parameter, enumerating the items in the support. This could either have a numeric or string type. This should have the same dimension as ``ps``. :type vs: list or numpy.ndarray or torch.autograd.Variable :param one_hot: Denotes whether one hot encoding is enabled. This is True by default. When set to false, and no explicit `vs` is provided, the last dimension gives the one-hot encoded value from the support. :type one_hot: boolean :return: Torch variable or numpy array enumerating the support of the categorical distribution. Each item in the return value, when enumerated along the first dimensions, yields a value from the distribution's support which has the same dimension as would be returned by sample. If ``one_hot=True``, the last dimension is used for the one-hot encoding. :rtype: torch.autograd.Variable or numpy.ndarray. """ sample_shape = self.batch_shape() + (1,) support_samples_size = (self.event_shape()) + sample_shape vs = self.vs if vs is not None: if isinstance(vs, np.ndarray): return vs.transpose().reshape(*support_samples_size) else: return torch.transpose(vs, 0, -1).contiguous().view(support_samples_size) if self.one_hot: return Variable(torch.stack([t.expand_as(self.ps) for t in torch_eye(*self.event_shape())])) else: LongTensor = torch.cuda.LongTensor if self.ps.is_cuda else torch.LongTensor return Variable( torch.stack([LongTensor([t]).expand(sample_shape) for t in torch.arange(0, *self.event_shape()).long()]))
def forward(self, x): # print("fffff",x) embed = self.embed(x) # CNN cnn_x = embed cnn_x = torch.transpose(cnn_x, 0, 1) cnn_x = cnn_x.unsqueeze(1) cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in cnn_x] # [(N,Co), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 1) cnn_x = self.dropout(cnn_x) # LSTM lstm_x = embed.view(len(x), embed.size(1), -1) lstm_out, self.hidden = self.lstm(lstm_x, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) # lstm_out = F.tanh(lstm_out) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) # CNN and LSTM cat cnn_x = torch.transpose(cnn_x, 0, 1) lstm_out = torch.transpose(lstm_out, 0, 1) cnn_lstm_out = torch.cat((cnn_x, lstm_out), 0) cnn_lstm_out = torch.transpose(cnn_lstm_out, 0, 1) # linear cnn_lstm_out = self.hidden2label1(F.tanh(cnn_lstm_out)) cnn_lstm_out = self.hidden2label2(F.tanh(cnn_lstm_out)) # output logit = cnn_lstm_out return logit
def forward(self, x): x_no_static = self.embed_no_static(x) # x_no_static = self.dropout(x_no_static) x_static = self.embed_static(x) # fix the embedding x_static = Variable(x_static.data) # x_static = self.dropout(x_static) x = torch.stack([x_static, x_no_static], 1) one_layer = x # (N,W,D) # torch.Size([64, 43, 300]) # print("one_layer {}".format(one_layer.size())) # one_layer = self.dropout(one_layer) # one_layer = one_layer.unsqueeze(1) # (N,Ci,W,D) # torch.Size([64, 1, 43, 300]) # one layer one_layer = [torch.transpose(F.relu(conv(one_layer)).squeeze(3), 1, 2).unsqueeze(1) for conv in self.convs1] # torch.Size([64, 100, 36]) # one_layer = [F.relu(conv(one_layer)).squeeze(3).unsqueeze(1) for conv in self.convs1] # torch.Size([64, 100, 36]) # print(one_layer[0].size()) # print(one_layer[1].size()) # two layer two_layer = [F.relu(conv(one_layer)).squeeze(3) for (conv, one_layer) in zip(self.convs2, one_layer)] # print("two_layer {}".format(two_layer[0].size())) # print("two_layer {}".format(two_layer[1].size())) # pooling output = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in two_layer] # torch.Size([64, 100]) torch.Size([64, 100]) output = torch.cat(output, 1) # torch.Size([64, 300]) # dropout output = self.dropout(output) # linear output = self.fc1(output) logit = self.fc2(F.relu(output)) return logit
def forward(self, x): embed = self.embed(x) # CNN cnn_x = embed cnn_x = torch.transpose(cnn_x, 0, 1) cnn_x = cnn_x.unsqueeze(1) # cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = [conv(cnn_x).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) # cnn_x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in cnn_x] # [(N,Co), ...]*len(Ks) cnn_x = [F.tanh(F.max_pool1d(i, i.size(2)).squeeze(2)) for i in cnn_x] # [(N,Co), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 1) cnn_x = self.dropout(cnn_x) # BiLSTM bilstm_x = embed.view(len(x), embed.size(1), -1) bilstm_out, self.hidden = self.bilstm(bilstm_x, self.hidden) bilstm_out = torch.transpose(bilstm_out, 0, 1) bilstm_out = torch.transpose(bilstm_out, 1, 2) # bilstm_out = F.tanh(bilstm_out) bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2) bilstm_out = F.tanh(bilstm_out) # CNN and BiLSTM CAT cnn_x = torch.transpose(cnn_x, 0, 1) bilstm_out = torch.transpose(bilstm_out, 0, 1) cnn_bilstm_out = torch.cat((cnn_x, bilstm_out), 0) cnn_bilstm_out = torch.transpose(cnn_bilstm_out, 0, 1) # linear cnn_bilstm_out = self.hidden2label1(F.tanh(cnn_bilstm_out)) # cnn_bilstm_out = F.tanh(self.hidden2label1(cnn_bilstm_out)) cnn_bilstm_out = self.hidden2label2(F.tanh(cnn_bilstm_out)) # cnn_bilstm_out = self.hidden2label2(cnn_bilstm_out) # output logit = cnn_bilstm_out return logit
def forward(self, x): embed = self.embed(x) x = embed.view(len(x), embed.size(1), -1) bilstm_out, self.hidden = self.bilstm(x, self.hidden) bilstm_out = torch.transpose(bilstm_out, 0, 1) bilstm_out = torch.transpose(bilstm_out, 1, 2) bilstm_out = F.tanh(bilstm_out) bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2) y = self.hidden2label1(bilstm_out) y = self.hidden2label2(y) logit = y return logit
def forward(self, x): embed = self.embed(x) embed = self.dropout(embed) # CNN cnn_x = embed cnn_x = torch.transpose(cnn_x, 0, 1) cnn_x = cnn_x.unsqueeze(1) # cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = [conv(cnn_x).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) # cnn_x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in cnn_x] # [(N,Co), ...]*len(Ks) cnn_x = [F.tanh(F.max_pool1d(i, i.size(2)).squeeze(2)) for i in cnn_x] # [(N,Co), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 1) cnn_x = self.dropout(cnn_x) # BiGRU bigru_x = embed.view(len(x), embed.size(1), -1) bigru_x, self.hidden = self.bigru(bigru_x, self.hidden) bigru_x = torch.transpose(bigru_x, 0, 1) bigru_x = torch.transpose(bigru_x, 1, 2) # bilstm_out = F.tanh(bilstm_out) bigru_x = F.max_pool1d(bigru_x, bigru_x.size(2)).squeeze(2) bigru_x = F.tanh(bigru_x) # CNN and BiGRU CAT cnn_x = torch.transpose(cnn_x, 0, 1) bigru_x = torch.transpose(bigru_x, 0, 1) cnn_bigru_out = torch.cat((cnn_x, bigru_x), 0) cnn_bigru_out = torch.transpose(cnn_bigru_out, 0, 1) # linear cnn_bigru_out = self.hidden2label1(F.tanh(cnn_bigru_out)) logit = self.hidden2label2(F.tanh(cnn_bigru_out)) return logit
def forward(self, x): x = self.embed(x) x = self.dropout(x) # x = x.view(len(x), x.size(1), -1) # x = embed.view(len(x), embed.size(1), -1) bilstm_out, self.hidden = self.bilstm(x, self.hidden) bilstm_out = torch.transpose(bilstm_out, 0, 1) bilstm_out = torch.transpose(bilstm_out, 1, 2) # bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2) bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)) bilstm_out = bilstm_out.squeeze(2) hidden2lable = self.hidden2label1(F.tanh(bilstm_out)) gate_layer = F.sigmoid(self.gate_layer(bilstm_out)) # calculate highway layer values gate_hidden_layer = torch.mul(hidden2lable, gate_layer) # if write like follow ,can run,but not equal the HighWay NetWorks formula # gate_input = torch.mul((1 - gate_layer), hidden2lable) gate_input = torch.mul((1 - gate_layer), bilstm_out) highway_output = torch.add(gate_hidden_layer, gate_input) logit = self.logit_layer(highway_output) return logit
def forward(self, inputs): batch_sz = inputs.size(0) # should be batch_sz (~200 in old set-up) inputs = torch.transpose(inputs,0,1) h0 = self.init_hidden_state(batch_sz) rnn_output, h_n = self.rnn.forward(inputs, h0) # get proposals output (L x N x h_width) ==> (N x L x K) output = self.lin_out.forward(rnn_output.view(rnn_output.size(0)*rnn_output.size(1), rnn_output.size(2))) lin_out = output.view(rnn_output.size(0), rnn_output.size(1), output.size(1)) final_out = self.nonlin_final(torch.transpose(lin_out,0,1)) return final_out, rnn_output
def forward(self, inputs): batch_sz = inputs.size(0) # should be batch_sz (~200 in old set-up) inputs = torch.transpose(inputs,0,1) h0 = self.init_hidden_state(batch_sz) rnn_output, h_n = self.rnn.forward(inputs, h0) # get "output" after linear layer. output = self.lin_out.forward(rnn_output.view(rnn_output.size(0)*rnn_output.size(1), rnn_output.size(2))) L, N = rnn_output.size(0), rnn_output.size(1) C = output.size(1) assert L*N == output.size(0), "ERROR: mismatch in output tensor dimensions" fin_out = output.view(L, N, C) fin_out = torch.transpose(fin_out,0,1) fin_out = fin_out.contiguous().view(N*L, C) return fin_out, rnn_output
def convert_to_batch_order(self, output, N, L, K, C): output = output.view(L, N, K, C) output = torch.transpose(output, 0,1) return output.contiguous().view(N*L*K, C)
def th_repeat(a, repeats, axis=0): """Torch version of np.repeat for 1D""" assert len(a.size()) == 1 return th_flatten(torch.transpose(a.repeat(repeats, 1), 0, 1))
def forward(self, input1): self.batchgrid = torch.zeros(torch.Size([input1.size(0)]) + self.grid.size()) for i in range(input1.size(0)): self.batchgrid[i] = self.grid self.batchgrid = Variable(self.batchgrid) if input1.is_cuda: self.batchgrid = self.batchgrid.cuda() output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2) return output
def _forward_alg(self, feats): init_alphas = torch.Tensor(self.tagset_size, 1).fill_(0.).type(self.dtype) forward_var = autograd.Variable(init_alphas).type(self.dtype) for ix,feat in enumerate(feats): if ix == 0: forward_var += feat.view(self.tagset_size,1) + self.initial_weights else: forward_var = feat.view(self.tagset_size,1) + log_sum_exp_mat( self.transitions + torch.transpose(forward_var.repeat(1, self.tagset_size), 0, 1), 1) terminal_var = forward_var + self.final_weights alpha = log_sum_exp_mat(terminal_var, 0 ) return alpha
def _forward_alg(self, feats): init_alphas = torch.Tensor(self.tagset_size, 1).fill_(-10000.).type(self.dtype) init_alphas[self.tag_to_ix[self.START_TAG]][0] = 0. forward_var = autograd.Variable(init_alphas).type(self.dtype) for feat in feats: forward_var = feat.view(self.tagset_size, 1) + log_sum_exp_mat(self.transitions + torch.transpose(forward_var.expand(forward_var.size(0), self.tagset_size), 0, 1), 1) terminal_var = forward_var + self.transitions[self.tag_to_ix[self.STOP_TAG]].view(self.tagset_size, 1) alpha = log_sum_exp_mat(terminal_var, 0) return alpha
def forward(self, qu, w, cand): qu = Variable(qu) w = Variable(w) cand = Variable(cand) embed_q = self.embed_B(qu) embed_w1 = self.embed_A(w) embed_c = self.embed_C(cand) #pdb.set_trace() q_state = torch.sum(embed_q, 1).squeeze(1) w1_state = torch.sum(embed_w1, 1).squeeze(1) sent_dot = torch.mm(q_state, torch.transpose(w1_state, 0, 1)) sent_att = F.softmax(sent_dot) q_rnn_state = self.rnn_qus(embed_q, self.h0_q)[-1].squeeze(0) #pdb.set_trace() action = sent_att.multinomial() sent = embed_w1[action.data[0]] sent_state = self.rnn_doc(sent, self.h0_doc)[-1].squeeze(0) q_state = torch.add(q_state, sent_state) f_feat = torch.mm(q_state, torch.transpose(embed_c, 0, 1)) reward_prob = F.log_softmax(f_feat).squeeze(0) return action, reward_prob
def forward(self, qu, w, cand): qu = Variable(qu) cand = Variable(cand) embed_q = self.embed(qu) embed_cand = self.embed(cand) out, (self.h0, self.c0) = self.rnn(embed_q, (self.h0, self.c0)) self.h0.detach_() self.c0.detach_() q_state = out[:,-1,:] f_fea_v = torch.mm(q_state, torch.transpose(embed_cand,0,1)) score_n = F.log_softmax(f_fea_v) return score_n
def forward(self, qu, key, value, cand): qu = Variable(qu) key = Variable(key) value = Variable(value) cand = Variable(cand) embed_q = self.embed_B(qu) embed_w1 = self.embed_A(key) embed_w2 = self.embed_C(value) embed_c = self.embed_C(cand) #pdb.set_trace() q_state = torch.sum(embed_q, 1).squeeze(1) w1_state = torch.sum(embed_w1, 1).squeeze(1) w2_state = embed_w2 for _ in range(self.config.hop): sent_dot = torch.mm(q_state, torch.transpose(w1_state, 0, 1)) sent_att = F.softmax(sent_dot) a_dot = torch.mm(sent_att, w2_state) a_dot = self.H(a_dot) q_state = torch.add(a_dot, q_state) f_feat = torch.mm(q_state, torch.transpose(embed_c, 0, 1)) score = F.log_softmax(f_feat) return score
def forward(self, input1, input2): is_cuda = next(self.parameters()).is_cuda device_id = next(self.parameters()).get_device() if is_cuda else None out_size = self.out_features batch_size, len1, dim1 = input1.size() if self._use_bias[0]: ones = torch.ones(batch_size, len1, 1) if is_cuda: ones = ones.cuda(device_id) input1 = torch.cat((input1, Variable(ones)), dim=2) dim1 += 1 len2, dim2 = input2.size()[1:] if self._use_bias[1]: ones = torch.ones(batch_size, len2, 1) if is_cuda: ones = ones.cuda(device_id) input2 = torch.cat((input2, Variable(ones)), dim=2) dim2 += 1 input1_reshaped = input1.contiguous().view(batch_size * len1, dim1) W_reshaped = torch.transpose(self.weight, 1, 2) \ .contiguous().view(dim1, out_size * dim2) affine = torch.mm(input1_reshaped, W_reshaped) \ .view(batch_size, len1 * out_size, dim2) biaffine = torch.transpose( torch.bmm(affine, torch.transpose(input2, 1, 2)) .view(batch_size, len1, out_size, len2), 2, 3) if self._use_bias[2]: biaffine += self.bias.expand_as(biaffine) return biaffine
def __call__(self, *inputs): outputs = [] for idx, _input in enumerate(inputs): _input = th.transpose(_input, self.dim1, self.dim2) outputs.append(_input) return outputs if idx > 1 else outputs[0]
def forward(self, input1): self.input1 = input1 output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() for i in range(input1.size(0)): self.batchgrid[i] = self.grid.astype(self.batchgrid[i]) # if input1.is_cuda: # self.batchgrid = self.batchgrid.cuda() # output = output.cuda() for i in range(input1.size(0)): output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2) return output
def backward(self, grad_output): grad_input1 = self.input1.new(self.input1.size()).zero_() # if grad_output.is_cuda: # self.batchgrid = self.batchgrid.cuda() # grad_input1 = grad_input1.cuda() grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3)) return grad_input1
def u_duvenaud(self, h_v, m_v, opt): param_sz = self.learn_args[0][opt['deg']].size() parameter_mat = torch.t(self.learn_args[0][opt['deg']])[None, ...].expand(m_v.size(0), param_sz[1], param_sz[0]) aux = torch.bmm(parameter_mat, torch.transpose(m_v, 1, 2)) return torch.transpose(torch.nn.Sigmoid()(aux), 1, 2)
def u_ggnn(self, h_v, m_v, opt={}): h_v.contiguous() m_v.contiguous() h_new = self.learn_modules[0](torch.transpose(m_v, 0, 1), torch.unsqueeze(h_v, 0))[0] # 0 or 1??? return torch.transpose(h_new, 0, 1)