我们从Python开源项目中,提取了以下37个代码示例,用于说明如何使用torch.nn.init.xavier_normal()。
def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1): super(MultiHeadAttention, self).__init__() self.n_head = n_head self.d_k = d_k self.d_v = d_v self.w_qs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k)) self.w_ks = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k)) self.w_vs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_v)) self.attention = ScaledDotProductAttention(d_model) self.layer_norm = LayerNormalization(d_model) self.proj = Linear(n_head*d_v, d_model) self.dropout = nn.Dropout(dropout) init.xavier_normal(self.w_qs) init.xavier_normal(self.w_ks) init.xavier_normal(self.w_vs)
def test_xavier_normal(self): for as_variable in [True, False]: for use_gain in [True, False]: for dims in [2, 4]: input_tensor = self._create_random_nd_tensor(dims, size_min=20, size_max=25, as_variable=as_variable) gain = 1 if use_gain: gain = self._random_float(0.1, 2) init.xavier_normal(input_tensor, gain=gain) else: init.xavier_normal(input_tensor) if as_variable: input_tensor = input_tensor.data fan_in = input_tensor.size(1) fan_out = input_tensor.size(0) if input_tensor.dim() > 2: fan_in *= input_tensor[0, 0].numel() fan_out *= input_tensor[0, 0].numel() expected_std = gain * math.sqrt(2.0 / (fan_in + fan_out)) assert self._is_normal(input_tensor, 0, expected_std)
def reset_parameters(self) -> None: # Because we are doing so many torch.bmm calls, which is fast but unstable, # it is critically important to intitialise the parameters correctly such # that these matrix multiplications are well conditioned initially. # Without this initialisation, this (non-deterministically) produces # NaNs and overflows. init.xavier_normal(self._query_projections) init.xavier_normal(self._key_projections) init.xavier_normal(self._value_projections)
def _init_weight(self): init.xavier_normal(self.w_qs) init.xavier_normal(self.w_ks) init.xavier_normal(self.w_vs) init.xavier_normal(self.w_o.weight)
def _init_weight(self): if self.share_linear: self.linear.weight = self.dec.dec_ebd.weight else: init.xavier_normal(self.linear.weight)
def _init_weight(self): init.xavier_normal(self._enc_mu.weight) init.xavier_normal(self._enc_log_sigma.weight)
def _init_weight(self): stdv = 1. / math.sqrt(self.hsz) self.gate.weight.data.uniform_(-stdv, stdv) self.gate.bias.data.fill_(-1) if active.__name__ == "relu": init.xavier_normal(self.h.weight) else: self.h.weight.data.uniform_(-stdv, stdv)
def reset_parameters(self): I.normal(self.embeddings.weight.data, mean=0, std=0.01) I.xavier_normal(self.W_i.weight.data) I.xavier_normal(self.W_o.weight.data) init_rnn_cell(self.encoder) for i in range(self.n_decoders): decoder = getattr(self, "decoder{}".format(i)) init_rnn_cell(decoder)
def __init__(self, d_in, d_out, bias=True): super(Linear, self).__init__() self.linear = nn.Linear(d_in, d_out, bias=bias) init.xavier_normal(self.linear.weight)
def _init_weights(self): for m in self.modules(): if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d): nnInit.xavier_normal(m.weight) if m.bias is not None: m.bias.data.zero_()
def _init_weights(self): for m in self.modules(): if isinstance(m, nn.Linear): nnInit.xavier_normal(m.weight) if m.bias is not None: m.bias.data.zero_()
def __init__(self, input_size, hidden_size): super(AttentionGRUCell, self).__init__() self.hidden_size = hidden_size self.Wr = nn.Linear(input_size, hidden_size) init.xavier_normal(self.Wr.state_dict()['weight']) self.Ur = nn.Linear(hidden_size, hidden_size) init.xavier_normal(self.Ur.state_dict()['weight']) self.W = nn.Linear(input_size, hidden_size) init.xavier_normal(self.W.state_dict()['weight']) self.U = nn.Linear(hidden_size, hidden_size) init.xavier_normal(self.U.state_dict()['weight'])
def __init__(self, hidden_size): super(EpisodicMemory, self).__init__() self.AGRU = AttentionGRU(hidden_size, hidden_size) self.z1 = nn.Linear(4 * hidden_size, hidden_size) self.z2 = nn.Linear(hidden_size, 1) self.next_mem = nn.Linear(3 * hidden_size, hidden_size) init.xavier_normal(self.z1.state_dict()['weight']) init.xavier_normal(self.z2.state_dict()['weight']) init.xavier_normal(self.next_mem.state_dict()['weight'])
def __init__(self, vocab_size, hidden_size): super(InputModule, self).__init__() self.hidden_size = hidden_size self.gru = nn.GRU(hidden_size, hidden_size, bidirectional=True, batch_first=True) for name, param in self.gru.state_dict().items(): if 'weight' in name: init.xavier_normal(param) self.dropout = nn.Dropout(0.1)
def __init__(self, vocab_size, hidden_size): super(AnswerModule, self).__init__() self.z = nn.Linear(2 * hidden_size, vocab_size) init.xavier_normal(self.z.state_dict()['weight']) self.dropout = nn.Dropout(0.1)
def __init__(self, args): super(BiLSTM_1, self).__init__() self.args = args self.hidden_dim = args.lstm_hidden_dim self.num_layers = args.lstm_num_layers V = args.embed_num D = args.embed_dim C = args.class_num self.dropout = nn.Dropout(args.dropout) self.dropout_embed = nn.Dropout(args.dropout_embed) if args.max_norm is not None: print("max_norm = {} ".format(args.max_norm)) self.embed = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) else: print("max_norm = {} |||||".format(args.max_norm)) self.embed = nn.Embedding(V, D, scale_grad_by_freq=True) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) self.bilstm = nn.LSTM(D, self.hidden_dim, num_layers=self.num_layers, bias=True, bidirectional=True, dropout=self.args.dropout) print(self.bilstm) if args.init_weight: print("Initing W .......") init.xavier_normal(self.bilstm.all_weights[0][0], gain=np.sqrt(args.init_weight_value)) init.xavier_normal(self.bilstm.all_weights[0][1], gain=np.sqrt(args.init_weight_value)) init.xavier_normal(self.bilstm.all_weights[1][0], gain=np.sqrt(args.init_weight_value)) init.xavier_normal(self.bilstm.all_weights[1][1], gain=np.sqrt(args.init_weight_value)) self.hidden2label = nn.Linear(self.hidden_dim * 2, C) self.hidden = self.init_hidden(self.num_layers, args.batch_size) print("self.hidden", self.hidden)
def __init__(self, args): super(LSTM, self).__init__() self.args = args # print(args) self.hidden_dim = args.lstm_hidden_dim self.num_layers = args.lstm_num_layers V = args.embed_num D = args.embed_dim C = args.class_num if args.max_norm is not None: print("max_norm = {} ".format(args.max_norm)) self.embed = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) else: print("max_norm = {} |||||".format(args.max_norm)) self.embed = nn.Embedding(V, D, scale_grad_by_freq=True) # word embedding if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) # lstm self.lstm = nn.LSTM(D, self.hidden_dim, dropout=args.dropout, num_layers=self.num_layers) if args.init_weight: print("Initing W .......") # n = self.lstm.input_size * self.lstm init.xavier_normal(self.lstm.all_weights[0][0], gain=np.sqrt(args.init_weight_value)) init.xavier_normal(self.lstm.all_weights[0][1], gain=np.sqrt(args.init_weight_value)) # linear self.hidden2label = nn.Linear(self.hidden_dim, C) # hidden self.hidden = self.init_hidden(self.num_layers, args.batch_size) # dropout self.dropout = nn.Dropout(args.dropout) self.dropout_embed = nn.Dropout(args.dropout_embed)
def weight_init(m): if isinstance(m, nn.Conv2d): init.xavier_normal(m.weight) init.constant(m.bias, 0)
def test_xavier_normal_errors_on_inputs_smaller_than_2d(self): for as_variable in [True, False]: for dims in [0, 1]: tensor = self._create_random_nd_tensor(dims, size_min=1, size_max=1, as_variable=as_variable) with self.assertRaises(ValueError): init.xavier_normal(tensor)
def xavier_normal(w, gain=1): return nn.xavier_normal(w, gain=gain)
def xavier_init(net): '''Init layer parameters.''' for m in net.modules(): if isinstance(m, nn.Conv2d): init.xavier_normal(m.weight) if m.bias is not None: init.constant(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): init.constant(m.weight, 1) init.constant(m.bias, 0) elif isinstance(m, nn.Linear): init.normal(m.weight, std=1e-3) if m.bias is not None: init.constant(m.bias, 0)
def initWeights(net, scheme='orthogonal'): print('Initializing weights. Warning: may overwrite sensitive bias parameters (e.g. batchnorm)') for e in net.parameters(): if scheme == 'orthogonal': if len(e.size()) >= 2: init.orthogonal(e) elif scheme == 'normal': init.normal(e, std=1e-2) elif scheme == 'xavier': init.xavier_normal(e)
def weights_init_xavier(m): classname = m.__class__.__name__ # print(classname) if classname.find('Conv') != -1: init.xavier_normal(m.weight.data, gain=0.02) elif classname.find('Linear') != -1: init.xavier_normal(m.weight.data, gain=0.02) elif classname.find('BatchNorm2d') != -1: init.normal(m.weight.data, 1.0, 0.02) init.constant(m.bias.data, 0.0)
def reset_parameters(self): for i in range(self.n_feats): embeddings = getattr(self, "embeddings_{}".format(i)) I.xavier_normal(embeddings.weight.data) I.xavier_normal(self.input_layer.weight.data) I.xavier_normal(self.output_layer.weight.data) self.crf.reset_parameters() self.lstm.reset_parameters()
def _initialize_weights(self, layer): init.xavier_normal(layer.weight)
def __init__(self): super(C3D, self).__init__() self.group1 = nn.Sequential( nn.Conv3d(3, 64, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))) #init.xavier_normal(self.group1.state_dict()['weight']) self.group2 = nn.Sequential( nn.Conv3d(64, 128, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))) #init.xavier_normal(self.group2.state_dict()['weight']) self.group3 = nn.Sequential( nn.Conv3d(128, 256, kernel_size=3, padding=1), nn.ReLU(), nn.Conv3d(256, 256, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))) #init.xavier_normal(self.group3.state_dict()['weight']) self.group4 = nn.Sequential( nn.Conv3d(256, 512, kernel_size=3, padding=1), nn.ReLU(), nn.Conv3d(512, 512, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))) #init.xavier_normal(self.group4.state_dict()['weight']) self.group5 = nn.Sequential( nn.Conv3d(512, 512, kernel_size=3, padding=1), nn.ReLU(), nn.Conv3d(512, 512, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))) #init.xavier_normal(self.group5.state_dict()['weight']) self.fc1 = nn.Sequential( nn.Linear(512 * 3 * 3, 2048), # nn.ReLU(), nn.Dropout(0.5)) #init.xavier_normal(self.fc1.state_dict()['weight']) self.fc2 = nn.Sequential( nn.Linear(2048, 2048), nn.ReLU(), nn.Dropout(0.5)) #init.xavier_normal(self.fc2.state_dict()['weight']) self.fc3 = nn.Sequential( nn.Linear(2048, 32)) #101 self._features = nn.Sequential( self.group1, self.group2, self.group3, self.group4, self.group5 ) self._classifier = nn.Sequential( self.fc1, self.fc2 )
def __init__(self, args): super(DEEP_CNN_MUI, self).__init__() self.args = args V = args.embed_num V_mui = args.embed_num_mui D = args.embed_dim C = args.class_num Ci = 2 Co = args.kernel_num Ks = args.kernel_sizes if args.max_norm is not None: print("max_norm = {} ".format(args.max_norm)) self.embed_no_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) self.embed_static = nn.Embedding(V_mui, D, max_norm=args.max_norm, scale_grad_by_freq=True) else: print("max_norm = {} ".format(args.max_norm)) self.embed_no_static = nn.Embedding(V, D, scale_grad_by_freq=True) self.embed_static = nn.Embedding(V_mui, D, scale_grad_by_freq=True) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed_no_static.weight.data.copy_(torch.from_numpy(pretrained_weight)) pretrained_weight_static = np.array(args.pretrained_weight_static) self.embed_static.weight.data.copy_(torch.from_numpy(pretrained_weight_static)) # whether to fixed the word embedding self.embed_no_static.weight.requires_grad = True # cons layer self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K//2, 0), bias=True) for K in Ks] self.convs2 = [nn.Conv2d(1, Co, (K, D), stride=1, padding=(K//2, 0), bias=True) for K in Ks] print(self.convs1) print(self.convs2) if args.init_weight: print("Initing W .......") for (conv1, conv2) in zip(self.convs1, self.convs2): init.xavier_normal(conv1.weight.data, gain=np.sqrt(args.init_weight_value)) init.uniform(conv1.bias, 0, 0) init.xavier_normal(conv2.weight.data, gain=np.sqrt(args.init_weight_value)) init.uniform(conv2.bias, 0, 0) # dropout self.dropout = nn.Dropout(args.dropout) # linear in_fea = len(Ks) * Co self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea // 2, bias=True) self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True)
def __init__(self, args): super(DEEP_CNN, self).__init__() self.args = args V = args.embed_num D = args.embed_dim C = args.class_num Ci = 1 Co = args.kernel_num Ks = args.kernel_sizes if args.max_norm is not None: print("max_norm = {} ".format(args.max_norm)) self.embed = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) # self.embed.weight.data.uniform(-0.1, 0.1) else: print("max_norm = {} ".format(args.max_norm)) self.embed = nn.Embedding(V, D, scale_grad_by_freq=True) # word embedding if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) # fixed the word embedding self.embed.weight.requires_grad = True # cons layer self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K//2, 0), bias=True) for K in Ks] self.convs2 = [nn.Conv2d(Ci, Co, (K, D), stride=1, padding=(K//2, 0), bias=True) for K in Ks] print(self.convs1) print(self.convs2) if args.init_weight: print("Initing W .......") for (conv1, conv2) in zip(self.convs1, self.convs2): init.xavier_normal(conv1.weight.data, gain=np.sqrt(args.init_weight_value)) init.uniform(conv1.bias, 0, 0) init.xavier_normal(conv2.weight.data, gain=np.sqrt(args.init_weight_value)) init.uniform(conv2.bias, 0, 0) # dropout self.dropout = nn.Dropout(args.dropout) # linear in_fea = len(Ks) * Co self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea // 2, bias=True) self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True)
def make_initializer( linear={'type': 'uniform', 'args': {'a': -0.05, 'b': 0.05}}, linear_bias={'type': 'constant', 'args': {'val': 0.}}, rnn={'type': 'xavier_uniform', 'args': {'gain': 1.}}, rnn_bias={'type': 'constant', 'args': {'val': 0.}}, cnn_bias={'type': 'constant', 'args': {'val': 0.}}, emb={'type': 'normal', 'args': {'mean': 0, 'std': 1}}, default={'type': 'uniform', 'args': {'a': -0.05, 'b': 0.05}}): rnns = (torch.nn.LSTM, torch.nn.GRU, torch.nn.LSTMCell, torch.nn.GRUCell, StackedGRU, StackedLSTM, NormalizedGRU, NormalizedGRUCell, StackedNormalizedGRU) convs = (torch.nn.Conv1d, torch.nn.Conv2d) def initializer(m): if isinstance(m, (rnns)): # RNNs for p_name, p in m.named_parameters(): if hasattr(p, 'custom'): continue if is_bias(p_name): getattr(init, rnn_bias['type'])(p, **rnn_bias['args']) else: getattr(init, rnn['type'])(p, **rnn['args']) elif isinstance(m, torch.nn.Linear): # linear for p_name, p in m.named_parameters(): if hasattr(p, 'custom'): continue if is_bias(p_name): getattr(init, linear_bias['type'])(p, **linear_bias['args']) else: getattr(init, linear['type'])(p, **linear['args']) elif isinstance(m, torch.nn.Embedding): # embedding for p in m.parameters(): if hasattr(p, 'custom'): continue getattr(init, emb['type'])(p, **emb['args']) elif isinstance(m, convs): for p_name, p in m.named_parameters(): if hasattr(p, 'custom'): continue if is_bias(p_name): getattr(init, cnn_bias['type'])(p, **cnn_bias['args']) else: # Karpathy: http://cs231n.github.io/neural-networks-2/#init # -> scale weight vector by square root of its fan-in... # fan_in, _ = init._calculate_fan_in_and_fan_out(p) # init.normal(p, mean=0, std=math.sqrt(fan_in)) init.xavier_normal(p) return initializer