我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.nn.functional.dropout()。
def forward(self, x): """ Compute the forward pass of the composite transformation H(x), where x is the concatenation of the current and all preceding feature maps. """ if self.bottleneck: out = self.conv1(F.relu(self.bn1(x))) if self.p > 0: out = F.dropout(out, p=self.p, training=self.training) out = self.conv2(F.relu(self.bn2(out))) if self.p > 0: out = F.dropout(out, p=self.p, training=self.training) else: out = self.conv2(F.relu(self.bn2(x))) if self.p > 0: out = F.dropout(out, p=self.p, training=self.training) return torch.cat((x, out), 1)
def _word_repre_layer(self, input): """ args: - input: (q_sentence, q_words)|(a_sentence, a_words) q_sentence - [batch_size, sent_length] q_words - [batch_size, sent_length, words_len] return: - output: [batch_size, sent_length, context_dim] """ sentence, words = input # [batch_size, sent_length, corpus_emb_dim] s_encode = self.corpus_emb(sentence) # [batch_size, sent_length, word_lstm_dim] w_encode = self._word_repre_forward(words) w_encode = F.dropout(w_encode, p=self.dropout, training=True, inplace=False) out = torch.cat((s_encode, w_encode), 2) return out
def __init__(self): super(C3D_net,self).__init__() self.conv1=nn.Conv3d(3,64,kernel_size=(3,3,3),stride=1,padding=(1,1,1)) self.relu=nn.ReLU() self.maxpool1=nn.MaxPool3d(kernel_size=(1,2,2),stride=(1,2,2)) self.conv2=nn.Conv3d(64,128,kernel_size=(3,3,3),stride=1,padding=(1,1,1)) self.maxpool2=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2)) self.conv3=nn.Conv3d(128,256,kernel_size=(3,3,3),stride=1,padding=(1,1,1)) self.maxpool3=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2)) self.conv4=nn.Conv3d(256,256,kernel_size=(3,3,3),stride=1,padding=(1,1,1)) self.maxpool4=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2)) self.conv5=nn.Conv3d(256,256,kernel_size=(3,3,3),stride=1,padding=(1,1,1)) self.maxpool5=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2)) self.num_out_maxpool5=2304 self.fc6=nn.Linear(self.num_out_maxpool5,2048)#TBA self.fc7=nn.Linear(2048,2048) #self.dropout=nn.Dropout(p=0.5) self.fc8=nn.Linear(2048,101) self._initialize_weights()
def __init__(self, dictionary, encoder_embed_dim=512, embed_dim=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention=True): super().__init__() self.dictionary = dictionary self.dropout_in = dropout_in self.dropout_out = dropout_out num_embeddings = len(dictionary) padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) self.layers = nn.ModuleList([ LSTMCell(encoder_embed_dim + embed_dim if layer == 0 else embed_dim, embed_dim) for layer in range(num_layers) ]) self.attention = AttentionLayer(encoder_embed_dim, embed_dim) if embed_dim != out_embed_dim: self.additional_fc = Linear(embed_dim, out_embed_dim) self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
def build_model(args, src_dict, dst_dict): encoder = FConvEncoder( src_dict, embed_dim=args.encoder_embed_dim, convolutions=eval(args.encoder_layers), dropout=args.dropout, max_positions=args.max_source_positions, ) decoder = FConvDecoder( dst_dict, embed_dim=args.decoder_embed_dim, convolutions=eval(args.decoder_layers), out_embed_dim=args.decoder_out_embed_dim, attention=eval(args.decoder_attention), dropout=args.dropout, max_positions=args.max_target_positions, ) return FConvModel(encoder, decoder)
def __init__(self, input_size, hidden_size, num_layers, dropout_rate=0, dropout_output=False, rnn_type=nn.LSTM, concat_layers=False, padding=False): super(StackedBRNN, self).__init__() self.padding = padding self.dropout_output = dropout_output self.dropout_rate = dropout_rate self.num_layers = num_layers self.concat_layers = concat_layers self.rnns = nn.ModuleList() for i in range(num_layers): input_size = input_size if i == 0 else 2 * hidden_size #self.rnns.append(rnn_type(input_size, hidden_size, # num_layers=1, # bidirectional=True)) self.rnns.append(MF.SRUCell(input_size, hidden_size, dropout=dropout_rate, rnn_dropout=dropout_rate, use_tanh=1, bidirectional=True))
def forward(self, x): en0 = self.c0(x) en1 = self.bnc1(self.c1(F.leaky_relu(en0, negative_slope=0.2))) en2 = self.bnc2(self.c2(F.leaky_relu(en1, negative_slope=0.2))) en3 = self.bnc3(self.c3(F.leaky_relu(en2, negative_slope=0.2))) en4 = self.bnc4(self.c4(F.leaky_relu(en3, negative_slope=0.2))) en5 = self.bnc5(self.c5(F.leaky_relu(en4, negative_slope=0.2))) en6 = self.bnc6(self.c6(F.leaky_relu(en5, negative_slope=0.2))) en7 = self.c7(F.leaky_relu(en6, negative_slope=0.2)) de7 = self.bnd7(self.d7(F.relu(en7))) de6 = F.dropout(self.bnd6(self.d6(F.relu(torch.cat((en6, de7),1))))) de5 = F.dropout(self.bnd5(self.d5(F.relu(torch.cat((en5, de6),1))))) de4 = F.dropout(self.bnd4(self.d4(F.relu(torch.cat((en4, de5),1))))) de3 = self.bnd3(self.d3(F.relu(torch.cat((en3, de4),1)))) de2 = self.bnd2(self.d2(F.relu(torch.cat((en2, de3),1)))) de1 = self.bnd1(self.d1(F.relu(torch.cat((en1, de2),1)))) de0 = F.tanh(self.d0(F.relu(torch.cat((en0, de1),1)))) return de0
def emit_RNNs(self, IR_node, func): raise NotImplementedError() # for Keras if "dropout" in IR_node.IR_layer.attr: dropout_str = ",dropout = {}, recurrent_dropout = {}".format( IR_node.IR_layer.attr['dropout'].f, IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format( IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, dropout_str, IR_node.in_edges[0]) return code
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None): features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas) if self.training: roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes) rois = roi_data[0] # roi pool pooled_features = self.roi_pool(features, rois) x = pooled_features.view(pooled_features.size()[0], -1) x = self.fc6(x) x = F.dropout(x, training=self.training) x = self.fc7(x) x = F.dropout(x, training=self.training) cls_score = self.score_fc(x) cls_prob = F.softmax(cls_score) bbox_pred = self.bbox_fc(x) if self.training: self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data) return cls_prob, bbox_pred, rois
def __init__(self, dictionary, embed_dim=512, max_positions=1024, convolutions=((512, 3),) * 20, dropout=0.1): super().__init__() self.dictionary = dictionary self.dropout = dropout self.num_attention_layers = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) self.embed_positions = Embedding(max_positions, embed_dim, padding_idx) in_channels = convolutions[0][0] self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) self.projections = nn.ModuleList() self.convolutions = nn.ModuleList() for (out_channels, kernel_size) in convolutions: pad = (kernel_size - 1) / 2 self.projections.append(Linear(in_channels, out_channels) if in_channels != out_channels else None) self.convolutions.append( ConvTBC(in_channels, out_channels * 2, kernel_size, padding=pad, dropout=dropout)) in_channels = out_channels self.fc2 = Linear(in_channels, embed_dim)
def __init__(self, fea_size, dropout=False, gate_width=128, use_region=True, use_kernel_function=False): super(Hierarchical_Message_Passing_Structure_base, self).__init__() #self.w_object = Parameter() if use_kernel_function: Message_Passing_Unit = Message_Passing_Unit_v2 else: Message_Passing_Unit = Message_Passing_Unit_v1 self.gate_sub2pred = Message_Passing_Unit(fea_size, gate_width) self.gate_obj2pred = Message_Passing_Unit(fea_size, gate_width) self.gate_pred2sub = Message_Passing_Unit(fea_size, gate_width) self.gate_pred2obj = Message_Passing_Unit(fea_size, gate_width) self.GRU_object = Gated_Recurrent_Unit(fea_size, dropout) # nn.GRUCell(fea_size, fea_size) # self.GRU_phrase = Gated_Recurrent_Unit(fea_size, dropout) if use_region: self.gate_pred2reg = Message_Passing_Unit(fea_size, gate_width) self.gate_reg2pred = Message_Passing_Unit(fea_size, gate_width) self.GRU_region = Gated_Recurrent_Unit(fea_size, dropout)
def forward(self, x): if not self.active: self.eval() if not self.equalInOut: x = self.relu1(self.bn1(x)) else: out = self.relu1(self.bn1(x)) out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x))) if self.droprate > 0: out = F.dropout(out, p=self.droprate, training=self.training) out = self.conv2(out) out = torch.add(x if self.equalInOut else self.convShortcut(x), out) if self.active: return out else: return out.detach() # note: we call it DenseNet for simple compatibility with the training code. # similar we call it growthRate instead of widen_factor
def forward(self, input_v, input_q): # visual (cnn features) if 'dim_v' in self.opt: x_v = F.dropout(input_v, p=self.opt['dropout_v'], training=self.training) x_v = self.linear_v(x_v) if 'activation_v' in self.opt: x_v = getattr(F, self.opt['activation_v'])(x_v) else: x_v = input_v # question (rnn features) if 'dim_q' in self.opt: x_q = F.dropout(input_q, p=self.opt['dropout_q'], training=self.training) x_q = self.linear_q(x_q) if 'activation_q' in self.opt: x_q = getattr(F, self.opt['activation_q'])(x_q) else: x_q = input_q # hadamard product x_mm = torch.mul(x_q, x_v) return x_mm
def forward(self, input): lengths = process_lengths(input) x = self.embedding(input) # seq2seq x = getattr(F, 'tanh')(x) x_0, hn = self.rnn_0(x) vec_0 = select_last(x_0, lengths) # x_1 = F.dropout(x_0, p=0.3, training=self.training) # print(x_1.size()) x_1, hn = self.rnn_1(x_0) vec_1 = select_last(x_1, lengths) vec_0 = F.dropout(vec_0, p=0.3, training=self.training) vec_1 = F.dropout(vec_1, p=0.3, training=self.training) output = torch.cat((vec_0, vec_1), 1) return output
def factory(vocab_words, opt): if opt['arch'] == 'skipthoughts': st_class = getattr(skipthoughts, opt['type']) seq2vec = st_class(opt['dir_st'], vocab_words, dropout=opt['dropout'], fixed_emb=opt['fixed_emb']) elif opt['arch'] == '2-lstm': seq2vec = TwoLSTM(vocab_words, opt['emb_size'], opt['hidden_size']) elif opt['arch'] == 'lstm': seq2vec = TwoLSTM(vocab_words, opt['emb_size'], opt['hidden_size'], opt['num_layers']) else: raise NotImplementedError return seq2vec
def forward(self, x, hidden): h, c = hidden h = h.view(h.size(1), -1) c = c.view(c.size(1), -1) x = x.view(x.size(1), -1) # Linear mappings i_t = th.mm(x, self.w_xi) + th.mm(h, self.w_hi) + self.b_i f_t = th.mm(x, self.w_xf) + th.mm(h, self.w_hf) + self.b_f o_t = th.mm(x, self.w_xo) + th.mm(h, self.w_ho) + self.b_o # activations i_t.sigmoid_() f_t.sigmoid_() o_t.sigmoid_() # cell computations c_t = th.mm(x, self.w_xc) + th.mm(h, self.w_hc) + self.b_c c_t.tanh_() c_t = th.mul(c, f_t) + th.mul(i_t, c_t) h_t = th.mul(o_t, th.tanh(c_t)) # Reshape for compatibility h_t = h_t.view(1, h_t.size(0), -1) c_t = c_t.view(1, c_t.size(0), -1) if self.dropout > 0.0: F.dropout(h_t, p=self.dropout, training=self.training, inplace=True) return h_t, (h_t, c_t)
def forward(self, x): # layer1 h = F.relu(self.conv1(x)) h = F.max_pool2d(h, 3, stride=2) # layer2 h = F.relu(self.conv2(h)) h = F.max_pool2d(h, 3, stride=2) # layer3-5 h = F.relu(self.conv3(h)) h = F.relu(self.conv4(h)) h = F.relu(self.conv5(h)) h = F.max_pool2d(h, 3, stride=2) h = h.view(-1, 256*6*6) # layer6-8 h = F.dropout(F.relu(self.fc6(h)), training=self.training) h = F.dropout(F.relu(self.fc7(h)), training=self.training) h = self.fc8(h) return h.view(-1, self.Nj, 2)
def forward(self, embeddings_supervised, speeds, is_reverse, steering_wheel, steering_wheel_raw, multiactions_vecs): def act(x): return F.leaky_relu(x, negative_slope=0.2, inplace=True) x_emb_sup = embeddings_supervised # 512x3x5 x_emb_sup = act(self.emb_sup_c1_sd(self.emb_sup_c1_bn(self.emb_sup_c1(x_emb_sup)))) # 1024x1x3 x_emb_sup = x_emb_sup.view(-1, 1024*1*3) x_emb_sup = add_white_noise(x_emb_sup, 0.005, self.training) x_emb_add = torch.cat([speeds, is_reverse, steering_wheel, steering_wheel_raw, multiactions_vecs], 1) x_emb_add = act(self.emb_add_fc1_bn(self.emb_add_fc1(x_emb_add))) x_emb_add = add_white_noise(x_emb_add, 0.005, self.training) x_emb = torch.cat([x_emb_sup, x_emb_add], 1) x_emb = F.dropout(x_emb, p=0.05, training=self.training) embs = F.relu(self.emb_fc1_bn(self.emb_fc1(x_emb))) # this is currently always on, to decrease the likelihood of systematic # errors that are repeated over many frames embs = add_white_noise(embs, 0.005, True) return embs
def forward(self, embeddings, return_v_adv=False): def act(x): return F.leaky_relu(x, negative_slope=0.2, inplace=True) B, _ = embeddings.size() x = act(self.fc1_bn(self.fc1(embeddings))) x = add_white_noise(x, 0.005, self.training) x = F.dropout(x, p=0.1, training=self.training) x_v = self.fc_v(x) x_v_expanded = x_v.expand(B, 9) x_adv = self.fc_advantage(x) x_adv_mean = x_adv.mean(dim=1) x_adv_mean = x_adv_mean.expand(B, 9) x_adv = x_adv - x_adv_mean x = x_v_expanded + x_adv if return_v_adv: return x, (x_v, x_adv) else: return x
def __init__(self): super(SuccessorPredictor, self).__init__() def identity(v): return lambda x: x bn2d = nn.InstanceNorm2d bn1d = identity self.input_size = 9 self.hidden_size = 512 self.nb_layers = 1 self.hidden_fc1 = nn.Linear(512, self.nb_layers*2*self.hidden_size) self.hidden_fc1_bn = bn1d(self.nb_layers*2*self.hidden_size) self.rnn = nn.LSTM(self.input_size, self.hidden_size, self.nb_layers, dropout=0.1, batch_first=False) self.fc1 = nn.Linear(self.hidden_size, 512) init_weights(self)
def forward(self, xt, fc_feats, att_feats, p_att_feats, state): prev_h = state[0][-1] att_lstm_input = torch.cat([prev_h, fc_feats, xt], 1) h_att, c_att = self.att_lstm(att_lstm_input, (state[0][0], state[1][0])) att = self.attention(h_att, att_feats, p_att_feats) lang_lstm_input = torch.cat([att, h_att], 1) # lang_lstm_input = torch.cat([att, F.dropout(h_att, self.drop_prob_lm, self.training)], 1) ????? h_lang, c_lang = self.lang_lstm(lang_lstm_input, (state[0][1], state[1][1])) output = F.dropout(h_lang, self.drop_prob_lm, self.training) state = (torch.stack([h_att, h_lang]), torch.stack([c_att, c_lang])) return output, state
def __init__(self, opt): super(Att2in2Core, self).__init__() self.input_encoding_size = opt.input_encoding_size #self.rnn_type = opt.rnn_type self.rnn_size = opt.rnn_size #self.num_layers = opt.num_layers self.drop_prob_lm = opt.drop_prob_lm self.fc_feat_size = opt.fc_feat_size self.att_feat_size = opt.att_feat_size self.att_hid_size = opt.att_hid_size # Build a LSTM self.a2c = nn.Linear(self.rnn_size, 2 * self.rnn_size) self.i2h = nn.Linear(self.input_encoding_size, 5 * self.rnn_size) self.h2h = nn.Linear(self.rnn_size, 5 * self.rnn_size) self.dropout = nn.Dropout(self.drop_prob_lm) self.attention = Attention(opt)
def forward(self, xt, fc_feats, att_feats, p_att_feats, state): att_res = self.attention(state[0][-1], att_feats, p_att_feats) all_input_sums = self.i2h(xt) + self.h2h(state[0][-1]) sigmoid_chunk = all_input_sums.narrow(1, 0, 3 * self.rnn_size) sigmoid_chunk = F.sigmoid(sigmoid_chunk) in_gate = sigmoid_chunk.narrow(1, 0, self.rnn_size) forget_gate = sigmoid_chunk.narrow(1, self.rnn_size, self.rnn_size) out_gate = sigmoid_chunk.narrow(1, self.rnn_size * 2, self.rnn_size) in_transform = all_input_sums.narrow(1, 3 * self.rnn_size, 2 * self.rnn_size) + \ self.a2c(att_res) in_transform = torch.max(\ in_transform.narrow(1, 0, self.rnn_size), in_transform.narrow(1, self.rnn_size, self.rnn_size)) next_c = forget_gate * state[1][-1] + in_gate * in_transform next_h = out_gate * F.tanh(next_c) output = self.dropout(next_h) state = (next_h.unsqueeze(0), next_c.unsqueeze(0)) return output, state
def forward(self, x, lengths): batch_size, seq_length = x.size()[:2] emb = Variable(torch.from_numpy( self.initial_embeddings.take(x.numpy(), 0)), volatile=not self.training) h = Variable(torch.zeros(batch_size, self.model_dim), volatile=not self.training) for t in range(seq_length): inp = emb[:,t,:] h = self.rnn(inp, h) h = F.relu(self.l0(F.dropout(h.squeeze(), 0.5, self.training))) h = F.relu(self.l1(F.dropout(h, 0.5, self.training))) y = F.log_softmax(h) return y
def forward(self, x, lengths): batch_size = x.size(0) max_len = max(lengths) emb = Variable(torch.from_numpy( self.initial_embeddings.take(x.numpy(), 0)), volatile=not self.training) for t in range(max_len): indices = [] for i, l in enumerate(lengths): if l >= max(lengths) - t: indices.append(i) # Build batch. dynamic_batch_size = len(indices) inp = Variable(torch.FloatTensor(dynamic_batch_size, self.word_embedding_dim), volatile=not self.training) h = Variable(torch.FloatTensor(dynamic_batch_size, self.model_dim), volatile=not self.training) output = self.rnn(inp, h) hn = output h = F.relu(self.l0(F.dropout(hn.squeeze(), 0.5, self.training))) h = F.relu(self.l1(F.dropout(h, 0.5, self.training))) y = F.log_softmax(h) return y
def forward(self, x, lengths): batch_size = x.size(0) max_len = max(lengths) emb = Variable(torch.from_numpy( self.initial_embeddings.take(x.numpy(), 0)), volatile=not self.training) inp = Variable(torch.FloatTensor(emb.size()), volatile=not self.training) h0 = Variable(torch.FloatTensor(1, batch_size, self.model_dim), volatile=not self.training) _, hn = self.rnn(emb, h0) h = F.relu(self.l0(F.dropout(hn.squeeze(), 0.5, self.training))) h = F.relu(self.l1(F.dropout(h, 0.5, self.training))) y = F.log_softmax(h) return y
def forward(self, x): batchSize = x.size()[0] x1 = torch.zeros(batchSize, 1, 1, 21) x2 = torch.zeros(batchSize, 1, 1, 21) x3 = torch.zeros(batchSize, 1, 1, 21) for b in range(batchSize): for t in range(21): x1[b,0,0,t] = x.data[b,0,0,t] x2[b,0,0,t] = x.data[b,0,1,t] x3[b,0,0,t] = x.data[b,0,2,t] x1, x2, x3 = Variable(x1), Variable(x2), Variable(x3) x1, x2, x3 = self.br1.forward(x1), self.br2.forward(x2), self.br2.forward(x3) x = torch.cat([x1, x2, x3], 1) x = self.bn1(x) x = F.dropout(x, p=self.dropout) x = self.fc1(x) x = self.bn2(x) x = F.dropout(x, p=self.dropout) x = self.fc2(x) return x
def forward(self, x): x = self.conv2d_1a(x) x = self.conv2d_2a(x) x = self.conv2d_2b(x) x = self.maxpool_3a(x) x = self.conv2d_3b(x) x = self.conv2d_4a(x) x = self.maxpool_5a(x) x = self.mixed_5b(x) x = self.repeat(x) x = self.mixed_6a(x) x = self.repeat_1(x) x = self.mixed_7a(x) x = self.repeat_2(x) x = self.block8(x) x = self.conv2d_7b(x) #x = F.avg_pool2d(x, 8, count_include_pad=False)] x = adaptive_avgmax_pool2d(x, self.global_pool, count_include_pad=False) x = x.view(x.size(0), -1) if self.drop_rate > 0: x = F.dropout(x, p=self.drop_rate, training=self.training) x = self.classif(x) return x
def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) if self.drop_rate > 0.: out = F.dropout(out, p=self.drop_rate, training=self.training) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out
def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.global_pool(x) x = x.view(x.size(0), -1) if self.drop_rate > 0.: x = F.dropout(x, p=self.drop_rate, training=self.training) x = self.fc(x) return x
def forward(self, inp): """ :param inp: torch.FloatTensor (batch_size x inp_size) :return: torch.FloatTensor (batch_size x nb_classes) """ # hidden layers for layer in self.layers: out = layer(inp) if self.act is not None: out = getattr(F, self.act)(out) if self.dropout > 0: out = F.dropout(out, p=self.dropout, training=self.training) inp = out # output projection out = self.output(out) return out
def __init__(self, cell, num_layers, in_dim, hid_dim, dropout=0.0, **kwargs): """ cell: str or custom cell class """ super(BaseStackedRNN, self).__init__() self.in_dim = in_dim self.hid_dim = hid_dim self.has_dropout = False if dropout: self.has_dropout = True self.dropout = nn.Dropout(dropout) self.num_layers = num_layers self.layers = nn.ModuleList() if isinstance(cell, str): cell = getattr(nn, cell) for i in range(num_layers): self.layers.append(cell(in_dim, hid_dim, **kwargs)) in_dim = hid_dim
def forward(self, x_in): out = F.relu(F.max_pool3d(self.conv(x_in), (1, self.max_document_length,1))) out = out.view(out.size(0), -1) out = F.relu(self.fc1(out)) out = F.dropout(out, training=self.training) out = self.fc2(out) return F.log_softmax(out)
def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) x = x.view(-1, 320) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return F.log_softmax(x)
def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) x = x.view(-1, 320) # Register a backward hook x.register_hook(myGradientHook) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return F.log_softmax(x)
def forward(self, x): x = F.elu(F.max_pool2d(self.conv1(x), 2)) x = F.elu(F.max_pool2d(self.bn2(self.conv2(x)), 2)) x = F.elu(F.max_pool2d(self.bn3(self.conv3(x)), 2)) x = F.elu(F.max_pool2d(self.bn4(self.conv4(x)), 2)) x = x.view(-1, 750) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return F.log_softmax(x)
def forward(self, x): x = F.dropout(x, training=self.training) x = self.conv(x) x = self.avgpool(x) x = F.log_softmax(x) x = x.squeeze(dim=3).squeeze(dim=2) return x
def __init__(self, in_channels, out_channels, bottleneck, p): """ Initialize the different parts of the SubBlock. Params ------ - in_channels: number of input channels in the convolution. - out_channels: number of output channels in the convolution. - bottleneck: if true, applies the bottleneck variant of H(x). - p: if greater than 0, applies dropout after the convolution. """ super(SubBlock, self).__init__() self.bottleneck = bottleneck self.p = p in_channels_2 = in_channels out_channels_2 = out_channels if bottleneck: in_channels_1 = in_channels out_channels_1 = out_channels * 4 in_channels_2 = out_channels_1 self.bn1 = nn.BatchNorm2d(in_channels_1) self.conv1 = nn.Conv2d(in_channels_1, out_channels_1, kernel_size=1) self.bn2 = nn.BatchNorm2d(in_channels_2) self.conv2 = nn.Conv2d(in_channels_2, out_channels_2, kernel_size=3, padding=1)
def forward(self, x): out = self.pool(self.conv(F.relu(self.bn(x)))) if self.p > 0: out = F.dropout(out, p=self.p, training=self.training) return out
def __init__(self, args): super().__init__() for k, v in args.__dict__.items(): self.__setattr__(k, v) self.num_directions = 2 if self.bidirectional else 1 self.lookup_table = nn.Embedding(self.vocab_size, self.embed_dim) self.lstm = nn.LSTM(self.embed_dim, self.hidden_size, self.lstm_layers, batch_first=True, dropout=self.dropout, bidirectional=self.bidirectional) self.lr = nn.Linear(self.hidden_size*self.num_directions, self.vocab_size) self._init_weights()
def forward(self, input, hidden): encode = self.lookup_table(input) lstm_out, hidden = self.lstm(encode, hidden) lstm_out = F.dropout(lstm_out, p=self.dropout) out = self.lr(lstm_out.contiguous().view(-1, lstm_out.size(2))) return F.log_softmax(out), hidden
def __init__(self, d_k, dropout): super().__init__() self.temper = np.power(d_k, 0.5) self.dropout = nn.Dropout(dropout) self.softmax = nn.Softmax()
def forward(self, q, k, v, attn_mask): attn = torch.bmm(q, k.transpose(1, 2)) / self.temper attn.data.masked_fill_(attn_mask, -float('inf')) attn = self.softmax(attn.view(-1, attn.size(2))).view(*attn.size()) attn = self.dropout(attn) return torch.bmm(attn, v)
def __init__(self, n_head, d_model, dropout): super().__init__() self.n_head = n_head self.d_v = self.d_k = d_k = d_model // n_head for name in ["w_qs", "w_ks", "w_vs"]: self.__setattr__(name, nn.Parameter(torch.FloatTensor(n_head, d_model, d_k))) self.attention = ScaledDotProductAttention(d_k, dropout) self.lm = LayerNorm(d_model) self.w_o = nn.Linear(d_model, d_model, bias=False) self.dropout = dropout self._init_weight()
def __init__(self, d_model, d_ff, dropout): super().__init__() self.seq = nn.Sequential( nn.Conv1d(d_model, d_ff, 1), nn.ReLU(), nn.Conv1d(d_ff, d_model, 1), nn.Dropout(dropout) ) self.lm = LayerNorm(d_model)
def __init__(self, d_model, d_ff, n_head, dropout): super().__init__() self.mh = MultiHeadAtt(n_head, d_model, dropout) self.pw = PositionWise(d_model, d_ff, dropout)
def __init__(self, d_model, d_ff, n_head, dropout=0.1): super().__init__() self.slf_mh = MultiHeadAtt(n_head, d_model, dropout) self.dec_mh = MultiHeadAtt(n_head, d_model, dropout) self.pw = PositionWise(d_model, d_ff, dropout)