我们从Python开源项目中,提取了以下24个代码示例,用于说明如何使用torch.nn.init.orthogonal()。
def reset_parameters(self): """ Initialize parameters following the way proposed in the paper. """ # The input-to-hidden weight matrix is initialized orthogonally. init.orthogonal(self.weight_ih.data) # The hidden-to-hidden weight matrix is initialized as an identity # matrix. weight_hh_data = torch.eye(self.hidden_size) weight_hh_data = weight_hh_data.repeat(1, 4) self.weight_hh.data.set_(weight_hh_data) # The bias is just set to zero vectors. init.constant(self.bias.data, val=0) # Initialization of BN parameters. self.bn_ih.reset_parameters() self.bn_hh.reset_parameters() self.bn_c.reset_parameters() self.bn_ih.bias.data.fill_(0) self.bn_hh.bias.data.fill_(0) self.bn_ih.weight.data.fill_(0.1) self.bn_hh.weight.data.fill_(0.1) self.bn_c.weight.data.fill_(0.1)
def __init__(self, vocab_dict, dropout_rate, embed_dim, hidden_dim, bidirectional=True): super(AoAReader, self).__init__() self.vocab_dict = vocab_dict self.hidden_dim = hidden_dim self.embed_dim = embed_dim self.dropout_rate = dropout_rate self.embedding = nn.Embedding(vocab_dict.size(), self.embed_dim, padding_idx=Constants.PAD) self.embedding.weight.data.uniform_(-0.05, 0.05) input_size = self.embed_dim self.gru = nn.GRU(input_size, hidden_size=self.hidden_dim, dropout=dropout_rate, bidirectional=bidirectional, batch_first=True) # try independent gru #self.query_gru = nn.GRU(input_size, hidden_size=self.hidden_dim, dropout=dropout_rate, # bidirectional=bidirectional, batch_first=True) for weight in self.gru.parameters(): if len(weight.size()) > 1: weigth_init.orthogonal(weight.data)
def reset_parameters(self): if self.use_leaf_rnn: init.kaiming_normal(self.leaf_rnn_cell.weight_ih.data) init.orthogonal(self.leaf_rnn_cell.weight_hh.data) init.constant(self.leaf_rnn_cell.bias_ih.data, val=0) init.constant(self.leaf_rnn_cell.bias_hh.data, val=0) # Set forget bias to 1 self.leaf_rnn_cell.bias_ih.data.chunk(4)[1].fill_(1) if self.bidirectional: init.kaiming_normal(self.leaf_rnn_cell_bw.weight_ih.data) init.orthogonal(self.leaf_rnn_cell_bw.weight_hh.data) init.constant(self.leaf_rnn_cell_bw.bias_ih.data, val=0) init.constant(self.leaf_rnn_cell_bw.bias_hh.data, val=0) # Set forget bias to 1 self.leaf_rnn_cell_bw.bias_ih.data.chunk(4)[1].fill_(1) else: init.kaiming_normal(self.word_linear.weight.data) init.constant(self.word_linear.bias.data, val=0) self.treelstm_layer.reset_parameters() init.normal(self.comp_query.data, mean=0, std=0.01)
def _initialize_weights(self): init.orthogonal(self.conv1.weight, init.calculate_gain('relu')) init.orthogonal(self.conv2.weight, init.calculate_gain('relu')) init.orthogonal(self.conv3.weight, init.calculate_gain('relu')) init.orthogonal(self.conv4.weight)
def __init__(self, observation_space, non_rgb_rgb_state_size, action_space, hidden_size): super(ActorCritic, self).__init__() self.rgb_state_size = (6, 128, 128) self.action_size = 5 self.relu = nn.ReLU(inplace=True) self.softmax = nn.Softmax() # the archtecture is adapted from Sim2Real (Rusu et. al., 2016) self.conv1 = nn.Conv2d( self.rgb_state_size[0], 16, 8, stride=4, padding=1) self.conv2 = nn.Conv2d(16, 32, 5, stride=2) self.fc1 = nn.Linear(1152 + non_rgb_rgb_state_size, hidden_size) self.lstm = nn.LSTMCell(hidden_size, hidden_size) self.fc_actor1 = nn.Linear(hidden_size, self.action_size) self.fc_actor2 = nn.Linear(hidden_size, self.action_size) self.fc_actor3 = nn.Linear(hidden_size, self.action_size) self.fc_actor4 = nn.Linear(hidden_size, self.action_size) self.fc_actor5 = nn.Linear(hidden_size, self.action_size) self.fc_actor6 = nn.Linear(hidden_size, self.action_size) self.fc_critic = nn.Linear(hidden_size, 1) # Orthogonal weight initialisation for name, p in self.named_parameters(): if 'weight' in name: init.orthogonal(p) elif 'bias' in name: init.constant(p, 0)
def __init__(self, orthogonal_gain=1.): super(OrthogonalWeightsZeroBias, self)\ .__init__(weight_initializer=partial(init.orthogonal, gain=orthogonal_gain), bias_initializer=Constant(0.))
def init_gru(cell, gain=1): cell.reset_parameters() # orthogonal initialization of recurrent weights for _, hh, _, _ in cell.all_weights: for i in range(0, hh.size(0), cell.hidden_size): I.orthogonal(hh[i:i + cell.hidden_size], gain=gain)
def reset_parameters(self): """ Initialize parameters following the way proposed in the paper. """ init.orthogonal(self.weight_ih.data) weight_hh_data = torch.eye(self.hidden_size) weight_hh_data = weight_hh_data.repeat(1, 4) self.weight_hh.data.set_(weight_hh_data) # The bias is just set to zero vectors. if self.use_bias: init.constant(self.bias.data, val=0)
def __init__(self, hidden_size): super(ActorCritic, self).__init__() self.state_size = STATE_SIZE[0] * STATE_SIZE[1] * STATE_SIZE[2] self.elu = nn.ELU(inplace=True) self.softmax = nn.Softmax() self.sigmoid = nn.Sigmoid() # Pass state into model body self.conv1 = nn.Conv2d(STATE_SIZE[0], 32, 4, stride=2) self.conv2 = nn.Conv2d(32, 32, 3) self.fc1 = nn.Linear(1152, hidden_size) # Pass previous action, reward and timestep directly into LSTM self.lstm = nn.LSTMCell(hidden_size + ACTION_SIZE + 2, hidden_size) self.fc_actor1 = nn.Linear(hidden_size, ACTION_SIZE) self.fc_critic1 = nn.Linear(hidden_size, ACTION_SIZE) self.fc_actor2 = nn.Linear(hidden_size, ACTION_SIZE) self.fc_critic2 = nn.Linear(hidden_size, ACTION_SIZE) self.fc_class = nn.Linear(hidden_size, 1) # Orthogonal weight initialisation for name, p in self.named_parameters(): if 'weight' in name: init.orthogonal(p) elif 'bias' in name: init.constant(p, 0) # Set LSTM forget gate bias to 1 for name, p in self.lstm.named_parameters(): if 'bias' in name: n = p.size(0) forget_start_idx, forget_end_idx = n // 4, n // 2 init.constant(p[forget_start_idx:forget_end_idx], 1)
def test_orthogonal(self): for as_variable in [True, False]: for use_gain in [True, False]: for tensor_size in [[3, 4], [4, 3], [20, 2, 3, 4], [2, 3, 4, 5]]: input_tensor = torch.zeros(tensor_size) gain = 1.0 if as_variable: input_tensor = Variable(input_tensor) if use_gain: gain = self._random_float(0.1, 2) init.orthogonal(input_tensor, gain=gain) else: init.orthogonal(input_tensor) if as_variable: input_tensor = input_tensor.data rows, cols = tensor_size[0], reduce(mul, tensor_size[1:]) flattened_tensor = input_tensor.view(rows, cols) if rows > cols: self.assertEqual(torch.mm(flattened_tensor.t(), flattened_tensor), torch.eye(cols) * gain ** 2, prec=1e-6) else: self.assertEqual(torch.mm(flattened_tensor, flattened_tensor.t()), torch.eye(rows) * gain ** 2, prec=1e-6)
def orthogonal(w, gain=1): return nn.orthogonal(w, gain=gain)
def initWeights(net, scheme='orthogonal'): print('Initializing weights. Warning: may overwrite sensitive bias parameters (e.g. batchnorm)') for e in net.parameters(): if scheme == 'orthogonal': if len(e.size()) >= 2: init.orthogonal(e) elif scheme == 'normal': init.normal(e, std=1e-2) elif scheme == 'xavier': init.xavier_normal(e)
def weights_init_orthogonal(m): classname = m.__class__.__name__ print(classname) if classname.find('Conv') != -1: init.orthogonal(m.weight.data, gain=1) elif classname.find('Linear') != -1: init.orthogonal(m.weight.data, gain=1) elif classname.find('BatchNorm2d') != -1: init.normal(m.weight.data, 1.0, 0.02) init.constant(m.bias.data, 0.0)
def init_weights(net, init_type='normal'): print('initialization method [%s]' % init_type) if init_type == 'normal': net.apply(weights_init_normal) elif init_type == 'xavier': net.apply(weights_init_xavier) elif init_type == 'kaiming': net.apply(weights_init_kaiming) elif init_type == 'orthogonal': net.apply(weights_init_orthogonal) else: raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
def reset_parameters(self): """ Initialize parameters TO DO """ init.uniform(self.thetaA, a=-0.1, b=0.1) init.uniform(self.thetaB, a=-0.1, b=0.1) init.uniform(self.U, a=-0.1, b=0.1) init.orthogonal(self.gate_U.data) gate_W_data = torch.eye(self.hidden_size) gate_W_data = gate_W_data.repeat(1, 2) self.gate_W.data.set_(gate_W_data) init.constant(self.bias.data, val=0) init.constant(self.gate_bias.data, val=0)
def test_orthogonal(self): for as_variable in [True, False]: for use_gain in [True, False]: for tensor_size in [[3, 4], [4, 3], [20, 2, 3, 4], [2, 3, 4, 5]]: input_tensor = torch.zeros(tensor_size) gain = 1.0 if as_variable: input_tensor = Variable(input_tensor) if use_gain: gain = self._random_float(0.1, 2) init.orthogonal(input_tensor, gain=gain) else: init.orthogonal(input_tensor) if as_variable: input_tensor = input_tensor.data rows, cols = tensor_size[0], reduce(mul, tensor_size[1:]) flattened_tensor = input_tensor.view(rows, cols) if rows > cols: self.assertEqual(torch.mm(flattened_tensor.t(), flattened_tensor), torch.eye(cols) * gain ** 2, prec=1e-6) else: self.assertEqual(torch.mm(flattened_tensor, flattened_tensor.t()), torch.eye(rows) * gain ** 2, prec=1e-6) # Generates rand tensor with non-equal values. This ensures that duplicate # values won't be causing test failure for modules like MaxPooling. # size should be small, otherwise randperm fails / long overflows.
def __init__(self, frame_size, n_frame_samples, n_rnn, dim, learn_h0, weight_norm): super().__init__() self.frame_size = frame_size self.n_frame_samples = n_frame_samples self.dim = dim h0 = torch.zeros(n_rnn, dim) if learn_h0: self.h0 = torch.nn.Parameter(h0) else: self.register_buffer('h0', torch.autograd.Variable(h0)) self.input_expand = torch.nn.Conv1d( in_channels=n_frame_samples, out_channels=dim, kernel_size=1 ) init.kaiming_uniform(self.input_expand.weight) init.constant(self.input_expand.bias, 0) if weight_norm: self.input_expand = torch.nn.utils.weight_norm(self.input_expand) self.rnn = torch.nn.GRU( input_size=dim, hidden_size=dim, num_layers=n_rnn, batch_first=True ) for i in range(n_rnn): nn.concat_init( getattr(self.rnn, 'weight_ih_l{}'.format(i)), [nn.lecun_uniform, nn.lecun_uniform, nn.lecun_uniform] ) init.constant(getattr(self.rnn, 'bias_ih_l{}'.format(i)), 0) nn.concat_init( getattr(self.rnn, 'weight_hh_l{}'.format(i)), [nn.lecun_uniform, nn.lecun_uniform, init.orthogonal] ) init.constant(getattr(self.rnn, 'bias_hh_l{}'.format(i)), 0) self.upsampling = nn.LearnedUpsampling1d( in_channels=dim, out_channels=dim, kernel_size=frame_size ) init.uniform( self.upsampling.conv_t.weight, -np.sqrt(6 / dim), np.sqrt(6 / dim) ) init.constant(self.upsampling.bias, 0) if weight_norm: self.upsampling.conv_t = torch.nn.utils.weight_norm( self.upsampling.conv_t )
def _initialize_weights(self): init.orthogonal(self.conv1.weight, init.calculate_gain('relu')) init.orthogonal(self.conv2.weight, init.calculate_gain('relu')) init.orthogonal(self.conv3.weight, init.calculate_gain('relu')) init.orthogonal(self.conv4.weight) # Create the super-resolution model by using the above model definition.