我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用torch.nn.RNN。
def __init__(self, embedding, batch_size, sequence_length, n_in_rnn, n_hidden_rnn, n_in_mlp, n_hidden_mlp, n_out): """ RNN model init. :param embedding: word embedding :param batch_size: mini_batch size :param sequence_length: sequence_length :param n_in_rnn: input_size for rnn(emb_dim) :param n_hidden_rnn: hidden_size for rnn :param n_in_mlp: input_size for mlp :param n_hidden_mlp: hidden_size for mlp :param n_out: out_size for mlp """ super(QARNNModel, self).__init__() self.embedding = embedding self.batch_size = batch_size self.sequence_length = sequence_length self.rnn_input_size = n_in_rnn self.rnn_hidden_size = n_hidden_rnn self.mlp_input_size = n_in_mlp self.mlp_hidden_size = n_hidden_mlp self.mlp_out_size = n_out self.rnn_layer = GRUModule(self.batch_size, self.rnn_input_size, self.rnn_hidden_size) self.interact_layer = InteractLayer(self.rnn_hidden_size, self.rnn_hidden_size, self.mlp_input_size) self.bn_layer = BatchNormLayer(self.mlp_input_size) self.mlp = MLPDropout(self.mlp_input_size, self.mlp_hidden_size, self.mlp_out_size)
def __init__(self, word_dim, num_words, char_dim, num_chars, num_filters, kernel_size, rnn_mode, hidden_size, num_layers, num_labels, tag_space=0, embedd_word=None, embedd_char=None, p_in=0.2, p_rnn=0.5): super(BiVarRecurrentConv, self).__init__(word_dim, num_words, char_dim, num_chars, num_filters, kernel_size, rnn_mode, hidden_size, num_layers, num_labels, tag_space=tag_space, embedd_word=embedd_word, embedd_char=embedd_char, p_in=p_in, p_rnn=p_rnn) self.dropout_in = None self.dropout_rnn = nn.Dropout2d(p_rnn) if rnn_mode == 'RNN': RNN = VarMaskedRNN elif rnn_mode == 'LSTM': RNN = VarMaskedLSTM elif rnn_mode == 'GRU': RNN = VarMaskedGRU else: raise ValueError('Unknown RNN mode: %s' % rnn_mode) self.rnn = RNN(word_dim + num_filters, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=(p_in, p_rnn))
def __init__(self, input_size, hidden_size, num_layers, output_size=0, rnntype='RNN'): super(CharModel, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.rnntype = rnntype if rnntype == 'RNN': self.rnn = nn.RNN(self.input_size, self.hidden_size, self.num_layers) elif rnntype == 'LSTM': self.rnn = nn.LSTM(self.input_size, self.hidden_size, self.num_layers) elif rnntype == 'GRU': self.rnn = nn.GRU(self.input_size, self.hidden_size, self.num_layers) else: raise ValueError('Wrong RNN type, {} is not supported'. format(rnntype)) if output_size > 0: self.output = nn.Linear(hidden_size, output_size) num = hidden_size * output_size self.output.weight.data.normal_(0, math.sqrt(2. / num))
def __init__(self, input_shape, base_filters, num_hidden, num_actions): super(CNN, self).__init__() num_input = int(np.prod(input_shape)) self.num_hidden = num_hidden self.convs = nn.Sequential( nn.Conv2d(input_shape[0], base_filters, 5, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf) x 32 x 32 nn.Conv2d(base_filters, base_filters * 2, 5, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*2) x 16 x 16 nn.Conv2d(base_filters * 2, base_filters * 2, 5, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*2) x 9 x 9 ) # for p in self.convs.parameters(): # p.requires_grad = False # use random conv features #self.convs.apply(weights_init) self.conv_out_size = base_filters * 2 * 11 * 11 self.rnn = nn.RNN(self.conv_out_size, self.num_hidden, batch_first=True) self.classifier = nn.Sequential( nn.Linear(num_hidden, num_actions), nn.Softmax() )
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False): super(RNNModel, self).__init__() self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) if rnn_type in ['LSTM', 'GRU']: self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) else: try: nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type] except KeyError: raise ValueError( """An invalid option for `--model` was supplied, options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""") self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: if nhid != ninp: raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers
def forward(self, x1, x1_f, x1_mask, x2, x2_mask): """Inputs: x1 = document word indices [batch * len_d] x1_f = document word features indices [batch * len_d * nfeat] x1_mask = document padding mask [batch * len_d] x2 = question word indices [batch * len_q] x2_mask = question padding mask [batch * len_q] """ # Embed both document and question x1_emb = self.embedding(x1) x2_emb = self.embedding(x2) # Dropout on embeddings if self.opt['dropout_emb'] > 0: x1_emb = nn.functional.dropout(x1_emb, p=self.opt['dropout_emb'], training=self.training) x2_emb = nn.functional.dropout(x2_emb, p=self.opt['dropout_emb'], training=self.training) # Add attention-weighted question representation if self.opt['use_qemb']: x2_weighted_emb = self.qemb_match(x1_emb, x2_emb, x2_mask) drnn_input = torch.cat([x1_emb, x2_weighted_emb, x1_f], 2) else: drnn_input = torch.cat([x1_emb, x1_f], 2) # Encode document with RNN doc_hiddens = self.doc_rnn(drnn_input, x1_mask) # Encode question with RNN + merge hiddens question_hiddens = self.question_rnn(x2_emb, x2_mask) if self.opt['question_merge'] == 'avg': q_merge_weights = layers.uniform_weights(question_hiddens, x2_mask) elif self.opt['question_merge'] == 'self_attn': q_merge_weights = self.self_attn(question_hiddens, x2_mask) question_hidden = layers.weighted_avg(question_hiddens, q_merge_weights) # Predict start and end positions start_scores = self.start_attn(doc_hiddens, question_hidden, x1_mask) end_scores = self.end_attn(doc_hiddens, question_hidden, x1_mask) return start_scores, end_scores
def forward(self, z_t_1, h_rnn): """ Given the latent z at at a particular time step t-1 as well as the hidden state of the RNN `h(x_{t:T})` we return the mean and sigma vectors that parameterize the (diagonal) gaussian distribution `q(z_t | z_{t-1}, x_{t:T})` """ # combine the rnn hidden state with a transformed version of z_t_1 h_combined = 0.5 * (self.tanh(self.lin_z_to_hidden(z_t_1)) + h_rnn) # use the combined hidden state to compute the mean used to sample z_t mu = self.lin_hidden_to_mu(h_combined) # use the combined hidden state to compute the sigma used to sample z_t sigma = self.softplus(self.lin_hidden_to_sigma(h_combined)) # return mu, sigma which can be fed into Normal return mu, sigma
def __init__(self, input_dim=88, z_dim=100, emission_dim=100, transition_dim=200, rnn_dim=600, rnn_dropout_rate=0.0, num_iafs=0, iaf_dim=50, use_cuda=False): super(DMM, self).__init__() # instantiate PyTorch modules used in the model and guide below self.emitter = Emitter(input_dim, z_dim, emission_dim) self.trans = GatedTransition(z_dim, transition_dim) self.combiner = Combiner(z_dim, rnn_dim) self.rnn = nn.RNN(input_size=input_dim, hidden_size=rnn_dim, nonlinearity='relu', batch_first=True, bidirectional=False, num_layers=1, dropout=rnn_dropout_rate) # if we're using normalizing flows, instantiate those too iafs = [InverseAutoregressiveFlow(z_dim, iaf_dim) for _ in range(num_iafs)] self.iafs = nn.ModuleList(iafs) # define a (trainable) parameters z_0 and z_q_0 that help define the probability # distributions p(z_1) and q(z_1) # (since for t = 1 there are no previous latents to condition on) self.z_0 = nn.Parameter(torch.zeros(z_dim)) self.z_q_0 = nn.Parameter(torch.zeros(z_dim)) # define a (trainable) parameter for the initial hidden state of the rnn self.h_0 = nn.Parameter(torch.zeros(1, 1, rnn_dim)) self.use_cuda = use_cuda # if on gpu cuda-ize all PyTorch (sub)modules if use_cuda: self.cuda() # the model p(x_{1:T} | z_{1:T}) p(z_{1:T})
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False): super(RNNModel, self).__init__() self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) if rnn_type in ['LSTM', 'GRU']: self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) else: try: nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type] except KeyError: raise ValueError("""An invalid option for `--model` was supplied, options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""") self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout) self.decoder = nn.Linear(nhid, ntoken) """ Optionally tie weights as in: Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) https://arxiv.org/abs/1608.05859 and "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) https://arxiv.org/abs/1611.01462 """ if tie_weights: self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers
def test_rnn_initial_hidden_state(self): rnn_modes = ['RNN', 'GRU', 'LSTM'] for mode in rnn_modes: rnn = getattr(nn, mode)(30, 20, 2) input = Variable(torch.randn(10, 32, 30)) hidden = Variable(torch.Tensor(2, 32, 20).zero_()) if mode is 'LSTM': hidden = (hidden, hidden) output1, hidden1 = rnn(input, hidden) output2, hidden2 = rnn(input) self.assertEqual(output1, output2) self.assertEqual(hidden1, hidden2)
def test_RNN_dropout(self): # checking the assumption that cuDNN sticks dropout in between # RNN layers for p in (0, 0.276, 0.731, 1): for train in (True, False): for cuda in (True, False): rnn = nn.RNN(10, 1000, 2, bias=False, dropout=p, nonlinearity='relu') if cuda: rnn.cuda() if train: rnn.train() else: rnn.eval() rnn.weight_ih_l0.data.fill_(1) rnn.weight_hh_l0.data.fill_(1) rnn.weight_ih_l1.data.fill_(1) rnn.weight_hh_l1.data.fill_(1) input = Variable(torch.Tensor(1, 1, 10).fill_(1)) hx = Variable(torch.Tensor(2, 1, 1000).fill_(0)) if cuda: input = input.cuda() hx = hx.cuda() output, hy = rnn(input, hx) self.assertEqual(output.data.min(), output.data.max()) output_val = output.data[0][0][0] if p == 0 or not train: self.assertEqual(output_val, 10000) elif p == 1: self.assertEqual(output_val, 0) else: self.assertGreater(output_val, 8000) self.assertLess(output_val, 12000) denorm_mod = (output_val * (1 - p)) % 10 self.assertLess(min(denorm_mod, 10 - denorm_mod), 1e-2) self.assertEqual(hy[0].data.min(), hy[0].data.max()) self.assertEqual(hy[1].data.min(), hy[1].data.max()) self.assertEqual(hy.data[0][0][0], 10) self.assertEqual(hy.data[1][0][0], output_val)
def test_RNN_dropout_state(self): import sys if sys.version_info[0] == 2: import cPickle as pickle else: import pickle for p in (0, 0.1234): for train in (True, False): for cuda in (True, False): rnn = nn.RNN(100, 100, 2, bias=False, dropout=p, nonlinearity='relu') if cuda: rnn.cuda() if train: rnn.train() else: rnn.eval() input = Variable(torch.Tensor(1, 1, 100).uniform_()) hx = Variable(torch.Tensor(2, 1, 100).uniform_()) if cuda: input = input.cuda() hx = hx.cuda() output1, hy1 = rnn(input, hx) output2, hy2 = rnn(input, hx) rnn_pickle = pickle.dumps(rnn) rnn2 = pickle.loads(rnn_pickle) output3, hy3 = rnn2(input, hx) if p == 0 or not train: self.assertEqual(output1, output2) self.assertEqual(output1, output3) self.assertEqual(hy1, hy2) self.assertEqual(hy1, hy3) else: self.assertNotEqual(output1, output2) self.assertNotEqual(output1, output3) self.assertNotEqual(hy1, hy2) self.assertNotEqual(hy1, hy3)
def test_RNN_change_dropout(self): for train, cuda in product((True, False), repeat=2): rnn = nn.RNN(100, 100, 2, dropout=0, nonlinearity='relu') input = Variable(torch.Tensor(3, 2, 100).uniform_()) if cuda: input.data = input.data.cuda() rnn.cuda() if train: rnn.train() else: rnn.eval() prev_output = None for p in (0, 0.5, 0, 0.7, 0.2, 1, 0.2, 0): rnn.dropout = p output1, hy1 = rnn(input) output2, hy2 = rnn(input) if p == 0 or p == 1 or not train: self.assertEqual(output1, output2) self.assertEqual(hy1, hy2) else: self.assertNotEqual(output1, output2) self.assertNotEqual(hy1, hy2) if prev_output is not None: if not train: self.assertEqual(output1.data, prev_output) self.assertEqual(output2.data, prev_output) else: self.assertNotEqual(output1.data, prev_output) self.assertNotEqual(output2.data, prev_output) prev_output = output1.data
def prepare(self, p): def get_rnn(): if p.rnn_type in ['LSTM', 'GRU']: return getattr(nn, p.rnn_type)(p.embedding_size, p.hidden_size, p.num_layers, dropout=p.dropout) else: nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[p.rnn_type] return nn.RNN(p.embedding_size, p.hidden_size, p.num_layers, nonlinearity=nonlinearity, dropout=p.dropout) class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.drop = nn.Dropout(p.dropout) self.rnn = get_rnn() self.encoder = nn.Embedding(p.num_tokens, p.embedding_size) self.decoder = nn.Linear(p.hidden_size, p.num_tokens) def forward(self, input): emb = self.drop(self.encoder(input)) output, hidden = self.rnn(emb) output = self.drop(output) decoded = self.decoder(output.view(output.size(0) * output.size(1), output.size(2))) return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden def cast(tensor): return tensor.long().cuda() if p.cuda else tensor.long() self.model = Model() self.criterion = nn.CrossEntropyLoss() self.data_batches = [Variable(cast(torch.zeros(p.bptt, p.batch_size))) for _ in range(p.num_batches)] self.target_batches = [Variable(cast(torch.zeros(p.bptt * p.batch_size))) for _ in range(p.num_batches)] if p.cuda: self.model.cuda() self.criterion.cuda()
def _test_rnn_retain_variables(self, dtype): rnns = [nn.LSTM(10, 20, num_layers=2).type(dtype), nn.GRU(10, 20, num_layers=2).type(dtype), nn.RNN(10, 20, num_layers=2).type(dtype)] for rnn in rnns: input = Variable(torch.randn(5, 6, 10).type(dtype), requires_grad=True) output = rnn(input) output[0].sum().backward(retain_graph=True) grads = [input.grad.data.clone()] + [p.grad.data.clone() for p in rnn.parameters()] for i in range(4): rnn.zero_grad() input.grad.data.zero_() output[0].sum().backward(retain_graph=True) grads2 = [input.grad.data] + [p.grad.data for p in rnn.parameters()] self.assertEqual(grads, grads2)
def test_RNN_dropout_state(self): import sys if sys.version_info[0] == 2: import cPickle as pickle else: import pickle for p in (0, 0.1234): for train in (True, False): for cuda in (True, False): rnn = nn.RNN(100, 100, 2, bias=False, dropout=p, nonlinearity='relu') if cuda: rnn.cuda() if train: rnn.train() else: rnn.eval() input = Variable(torch.Tensor(1, 1, 100).uniform_()) hx = Variable(torch.Tensor(2, 1, 100).uniform_()) if cuda: input = input.cuda() hx = hx.cuda() output1, hy1 = rnn(input, hx) output2, hy2 = rnn(input, hx) rnn_pickle = pickle.dumps(rnn) rnn2 = pickle.loads(rnn_pickle) rnn2.flatten_parameters() output3, hy3 = rnn2(input, hx) if p == 0 or not train: self.assertEqual(output1, output2) self.assertEqual(output1, output3) self.assertEqual(hy1, hy2) self.assertEqual(hy1, hy3) else: self.assertNotEqual(output1, output2) self.assertNotEqual(output1, output3) self.assertNotEqual(hy1, hy2) self.assertNotEqual(hy1, hy3)
def __init__(self, batch_size, n_in, n_hidden, num_layers=1): """ rnn module init. :param batch_size: mini_batch size :param n_in: rnn input size :param n_hidden: rnn hidden size :param num_layers: num of layers """ super(RNNModule, self).__init__() self.batch_size = batch_size self.num_layers = num_layers self.input_size = n_in self.hidden_size = n_hidden self.rnn = nn.RNN(input_size=n_in, hidden_size=n_hidden, num_layers=num_layers, batch_first=True)
def __init__(self, word_dim, num_words, char_dim, num_chars, num_filters, kernel_size, rnn_mode, hidden_size, num_layers, num_labels, tag_space=0, embedd_word=None, embedd_char=None, p_in=0.2, p_rnn=0.5): super(BiRecurrentConv, self).__init__() self.word_embedd = Embedding(num_words, word_dim, init_embedding=embedd_word) self.char_embedd = Embedding(num_chars, char_dim, init_embedding=embedd_char) self.conv1d = nn.Conv1d(char_dim, num_filters, kernel_size, padding=kernel_size - 1) self.dropout_in = nn.Dropout(p=p_in) self.dropout_rnn = nn.Dropout(p_rnn) if rnn_mode == 'RNN': RNN = nn.RNN elif rnn_mode == 'LSTM': RNN = nn.LSTM elif rnn_mode == 'GRU': RNN = nn.GRU else: raise ValueError('Unknown RNN mode: %s' % rnn_mode) self.rnn = RNN(word_dim + num_filters, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=p_rnn) self.dense = None out_dim = hidden_size * 2 if tag_space: self.dense = nn.Linear(out_dim, tag_space) out_dim = tag_space self.dense_softmax = nn.Linear(out_dim, num_labels) # TODO set dim for log_softmax and set reduce=False to NLLLoss self.logsoftmax = nn.LogSoftmax() self.nll_loss = nn.NLLLoss(size_average=False)
def __init__(self, model_dim=None, mlp_dim=None, num_classes=None, word_embedding_dim=None, initial_embeddings=None, **kwargs): super(Net, self).__init__() self.word_embedding_dim = word_embedding_dim self.model_dim = model_dim self.initial_embeddings = initial_embeddings self.rnn = nn.RNN(word_embedding_dim, model_dim, batch_first=True) self.l0 = nn.Linear(model_dim, mlp_dim) self.l1 = nn.Linear(mlp_dim, num_classes)
def __init__(self, model_dim=None, mlp_dim=None, num_classes=None, word_embedding_dim=None, initial_embeddings=None, **kwargs): super(Net, self).__init__() self.model_dim = model_dim self.initial_embeddings = initial_embeddings self.rnn = nn.RNN(word_embedding_dim, model_dim, batch_first=True) self.l0 = nn.Linear(model_dim, mlp_dim) self.l1 = nn.Linear(mlp_dim, num_classes)
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, capacity, dropout=0.5, tie_weights=False): super(RNNModel, self).__init__() self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) if rnn_type in ['LSTM', 'GRU']: self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) elif rnn_type == 'URNN': self.rnn = EURNN(ninp, nhid, nlayers, dropout=dropout, capacity=2) elif rnn_type == 'GORU': self.rnn = GORU(ninp, nhid, nlayers, dropout=dropout, capacity=2) else: try: nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type] except KeyError: raise ValueError( """An invalid option for `--model` was supplied, options are ['URNN', 'GORU', LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""") self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: if nhid != ninp: raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False): super(RNNModel, self).__init__() self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) if rnn_type in ['LSTM', 'GRU']: self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) elif rnn_type == 'URNN': self.rnn = EURNN(ninp, nhid, capacity=2) elif rnn_type == 'GORU': self.rnn = GORU(ninp, nhid, capacity=2) else: try: nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type] except KeyError: raise ValueError( """An invalid option for `--model` was supplied, options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""") self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: if nhid != ninp: raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers
def test_cudnn_weight_tying(self): rnns = [ nn.LSTM(10, 20, batch_first=True, bidirectional=True), nn.GRU(10, 20, batch_first=True, bidirectional=True), nn.RNN(10, 20, batch_first=True, bidirectional=True) ] for rnn in rnns: rnn.bias_ih_l0_reverse = rnn.bias_ih_l0 rnn.cuda() input = Variable(torch.randn(5, 4, 10).cuda(), requires_grad=True) hx = Variable(torch.randn(2, 5, 20).cuda(), requires_grad=True) all_vars = [input, hx] + list(rnn.parameters()) opt = torch.optim.SGD(rnn.parameters(), lr=0.1) opt.zero_grad() if isinstance(rnn, nn.LSTM): cx = Variable(torch.randn(2, 5, 20).cuda(), requires_grad=True) all_vars[2:2] = [cx] hx = (hx, cx) with warnings.catch_warnings(record=True) as w: output = rnn(input, hx) output[0].sum().backward() opt.step() with warnings.catch_warnings(record=True) as w: output_cuda = rnn(input, hx) rnn.cpu() hx = (hx[0].cpu(), hx[1].cpu()) if isinstance(rnn, nn.LSTM) else hx.cpu() output_cpu = rnn(input.cpu(), hx) self.assertEqual(output_cuda, output_cpu)
def build(self, input_dim): self.input_dim = input_dim self.layer = TorchRecurrent(self.input_dim, self.units, self.length)
def __init__(self): super(VanillaRNN, self).__init__() self.rnn = nn.RNN(input_size=feature_size, hidden_size=state_size, nonlinearity='relu') self.fc1 = nn.Linear(state_size, n_classes)
def __init__(self, input_shape, base_filters, num_hidden, num_actions): super(CNN, self).__init__() num_input = int(np.prod(input_shape)) self.num_hidden = num_hidden self.convs = nn.Sequential( nn.Conv2d(input_shape[0], base_filters, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf) x 32 x 32 nn.Conv2d(base_filters, base_filters * 2, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*2) x 16 x 16 nn.Conv2d(base_filters * 2, base_filters * 2, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(base_filters * 2, base_filters * 2, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*2) x 9 x 9 ) # for p in self.convs.parameters(): # p.requires_grad = False # use random conv features #self.convs.apply(weights_init) self.conv_out_size = base_filters * 2 * 6 * 6 self.rnn = nn.RNN(self.conv_out_size, self.num_hidden, batch_first=True) self.classifier = nn.Sequential( nn.Linear(num_hidden, num_actions), nn.Softmax() )
def __init__(self, input_shape, base_filters, num_hidden, num_actions): super(MLP, self).__init__() num_input = int(np.prod(input_shape)) self.num_hidden = num_hidden self.rnn = nn.RNN(num_input, self.num_hidden, batch_first=True) self.classifier = nn.Sequential( nn.Linear(num_hidden, num_actions), nn.Softmax() )
def forward(self, xes, hidden, enc_out, attn_mask=None): if self.attention == 'none': return xes if type(hidden) == tuple: # for lstms use the "hidden" state not the cell state hidden = hidden[0] last_hidden = hidden[-1] # select hidden state from last RNN layer if self.attention == 'local': if enc_out.size(1) > self.max_length: offset = enc_out.size(1) - self.max_length enc_out = enc_out.narrow(1, offset, self.max_length) h_merged = torch.cat((xes.squeeze(1), last_hidden), 1) attn_weights = F.softmax(self.attn(h_merged), dim=1) if attn_weights.size(1) > enc_out.size(1): attn_weights = attn_weights.narrow(1, 0, enc_out.size(1)) else: hid = last_hidden.unsqueeze(1) if self.attention == 'concat': hid = hid.expand(last_hidden.size(0), enc_out.size(1), last_hidden.size(1)) h_merged = torch.cat((enc_out, hid), 2) active = F.tanh(self.attn(h_merged)) attn_w_premask = self.attn_v(active).squeeze(2) elif self.attention == 'dot': attn_w_premask = ( torch.bmm(hid, enc_out.transpose(1, 2)).squeeze(1)) elif self.attention == 'general': hid = self.attn(hid) attn_w_premask = ( torch.bmm(hid, enc_out.transpose(1, 2)).squeeze(1)) # calculate activation scores if attn_mask is not None: # remove activation from NULL symbols attn_w_premask -= (1 - attn_mask) * 1e20 attn_weights = F.softmax(attn_w_premask, dim=1) attn_applied = torch.bmm(attn_weights.unsqueeze(1), enc_out) merged = torch.cat((xes.squeeze(1), attn_applied.squeeze(1)), 1) output = F.tanh(self.attn_combine(merged).unsqueeze(1)) return output
def forward(self, x1, x1_f, x1_pos, x1_ner, x1_mask, x2, x2_mask): """Inputs: x1 = document word indices [batch * len_d] x1_f = document word features indices [batch * len_d * nfeat] x1_pos = document POS tags [batch * len_d] x1_ner = document entity tags [batch * len_d] x1_mask = document padding mask [batch * len_d] x2 = question word indices [batch * len_q] x2_mask = question padding mask [batch * len_q] """ # Embed both document and question x1_emb = self.embedding(x1) x2_emb = self.embedding(x2) if self.opt['dropout_emb'] > 0: x1_emb = nn.functional.dropout(x1_emb, p=self.opt['dropout_emb'], training=self.training) x2_emb = nn.functional.dropout(x2_emb, p=self.opt['dropout_emb'], training=self.training) drnn_input_list = [x1_emb, x1_f] # Add attention-weighted question representation if self.opt['use_qemb']: x2_weighted_emb = self.qemb_match(x1_emb, x2_emb, x2_mask) drnn_input_list.append(x2_weighted_emb) if self.opt['pos']: x1_pos_emb = self.pos_embedding(x1_pos) if self.opt['dropout_emb'] > 0: x1_pos_emb = nn.functional.dropout(x1_pos_emb, p=self.opt['dropout_emb'], training=self.training) drnn_input_list.append(x1_pos_emb) if self.opt['ner']: x1_ner_emb = self.ner_embedding(x1_ner) if self.opt['dropout_emb'] > 0: x1_ner_emb = nn.functional.dropout(x1_ner_emb, p=self.opt['dropout_emb'], training=self.training) drnn_input_list.append(x1_ner_emb) drnn_input = torch.cat(drnn_input_list, 2) # Encode document with RNN doc_hiddens = self.doc_rnn(drnn_input, x1_mask) # Encode question with RNN + merge hiddens question_hiddens = self.question_rnn(x2_emb, x2_mask) if self.opt['question_merge'] == 'avg': q_merge_weights = layers.uniform_weights(question_hiddens, x2_mask) elif self.opt['question_merge'] == 'self_attn': q_merge_weights = self.self_attn(question_hiddens, x2_mask) question_hidden = layers.weighted_avg(question_hiddens, q_merge_weights) # Predict start and end positions start_scores = self.start_attn(doc_hiddens, question_hidden, x1_mask) end_scores = self.end_attn(doc_hiddens, question_hidden, x1_mask) return start_scores, end_scores
def forward(self, x1, x1_f, x1_mask, x2, x2_mask): """Inputs: x1 = document word indices [batch * len_d] x1_f = document word features indices [batch * len_d * nfeat] x1_mask = document padding mask [batch * len_d] x2 = question word indices [batch * len_q] x2_mask = question padding mask [batch * len_q] """ # Embed both document and question x1_emb = self.embedding(x1) x2_emb = self.embedding(x2) # Dropout on embeddings if self.args.dropout_emb > 0: x1_emb = nn.functional.dropout(x1_emb, p=self.args.dropout_emb, training=self.training) x2_emb = nn.functional.dropout(x2_emb, p=self.args.dropout_emb, training=self.training) # Form document encoding inputs drnn_input = [x1_emb] # Add attention-weighted question representation if self.args.use_qemb: x2_weighted_emb = self.qemb_match(x1_emb, x2_emb, x2_mask) drnn_input.append(x2_weighted_emb) # Add manual features if self.args.num_features > 0: drnn_input.append(x1_f) # Encode document with RNN doc_hiddens = self.doc_rnn(torch.cat(drnn_input, 2), x1_mask) # Encode question with RNN + merge hiddens question_hiddens = self.question_rnn(x2_emb, x2_mask) if self.args.question_merge == 'avg': q_merge_weights = layers.uniform_weights(question_hiddens, x2_mask) elif self.args.question_merge == 'self_attn': q_merge_weights = self.self_attn(question_hiddens, x2_mask) question_hidden = layers.weighted_avg(question_hiddens, q_merge_weights) # Predict start and end positions start_scores = self.start_attn(doc_hiddens, question_hidden, x1_mask) end_scores = self.end_attn(doc_hiddens, question_hidden, x1_mask) return start_scores, end_scores
def test_cudnn_weight_format(self): rnns = [ nn.LSTM(10, 20, batch_first=True), nn.GRU(10, 20, batch_first=True), nn.RNN(10, 20, batch_first=True) ] first_warn = True for rnn in rnns: rnn.cuda() input = Variable(torch.randn(5, 4, 10).cuda(), requires_grad=True) hx = Variable(torch.randn(1, 5, 20).cuda(), requires_grad=True) all_vars = [input, hx] + list(rnn.parameters()) if isinstance(rnn, nn.LSTM): cx = Variable(torch.randn(1, 5, 20).cuda(), requires_grad=True) all_vars[2:2] = [cx] hx = (hx, cx) output = rnn(input, hx) output[0].sum().backward() grads = [v.grad.data.clone() for v in all_vars] for v in all_vars: v.grad.data.zero_() # Weights will no longer view onto the same chunk of memory weight = all_vars[4] weight_data = weight.data.clone() weight.data.set_(weight_data) for i in range(2): with warnings.catch_warnings(record=True) as w: output_noncontig = rnn(input, hx) if first_warn: self.assertEqual(len(w), 1) self.assertIn('weights are not part of single contiguous chunk of memory', w[0].message.args[0]) first_warn = False output_noncontig[0].sum().backward() grads_noncontig = [v.grad.data.clone() for v in all_vars] for v in all_vars: v.grad.data.zero_() self.assertEqual(output, output_noncontig) self.assertEqual(grads_noncontig, grads) # Make sure these still share storage weight_data[:] = 4 self.assertEqual(weight_data, all_vars[4].data)
def forward(self, x1, x1_f, x1_pos, x1_ner, x1_mask, x2, x2_mask): """Inputs: x1 = document word indices [batch * len_d] x1_f = document word features indices [batch * len_d * nfeat] x1_pos = document POS tags [batch * len_d] x1_ner = document entity tags [batch * len_d] x1_mask = document padding mask [batch * len_d] x2 = question word indices [batch * len_q] x2_mask = question padding mask [batch * len_q] """ # Embed both document and question x1_emb = self.embedding(x1) x2_emb = self.embedding(x2) # Dropout on embeddings if self.opt['dropout_emb'] > 0: x1_emb = nn.functional.dropout(x1_emb, p=self.opt['dropout_emb'], training=self.training) x2_emb = nn.functional.dropout(x2_emb, p=self.opt['dropout_emb'], training=self.training) drnn_input_list = [x1_emb, x1_f] # Add attention-weighted question representation if self.opt['use_qemb']: x2_weighted_emb = self.qemb_match(x1_emb, x2_emb, x2_mask) drnn_input_list.append(x2_weighted_emb) if self.opt['pos']: drnn_input_list.append(x1_pos) if self.opt['ner']: drnn_input_list.append(x1_ner) drnn_input = torch.cat(drnn_input_list, 2) # Encode document with RNN doc_hiddens = self.doc_rnn(drnn_input, x1_mask) # Encode question with RNN + merge hiddens question_hiddens = self.question_rnn(x2_emb, x2_mask) if self.opt['question_merge'] == 'avg': q_merge_weights = layers.uniform_weights(question_hiddens, x2_mask) elif self.opt['question_merge'] == 'self_attn': q_merge_weights = self.self_attn(question_hiddens, x2_mask) question_hidden = layers.weighted_avg(question_hiddens, q_merge_weights) # Predict start and end positions start_scores = self.start_attn(doc_hiddens, question_hidden, x1_mask) end_scores = self.end_attn(doc_hiddens, question_hidden, x1_mask) return start_scores, end_scores