我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.nn.Parameter()。
def forward(self, x): x_shape = x.size() # (b, c, h, w) offset = self.offset_filter(x) # (b, 2*c, h, w) offset_w, offset_h = torch.split(offset, self.regular_filter.in_channels, 1) # (b, c, h, w) offset_w = offset_w.contiguous().view(-1, int(x_shape[2]), int(x_shape[3])) # (b*c, h, w) offset_h = offset_h.contiguous().view(-1, int(x_shape[2]), int(x_shape[3])) # (b*c, h, w) if not self.input_shape or self.input_shape != x_shape: self.input_shape = x_shape grid_w, grid_h = np.meshgrid(np.linspace(-1, 1, x_shape[3]), np.linspace(-1, 1, x_shape[2])) # (h, w) grid_w = torch.Tensor(grid_w) grid_h = torch.Tensor(grid_h) if self.cuda: grid_w = grid_w.cuda() grid_h = grid_h.cuda() self.grid_w = nn.Parameter(grid_w) self.grid_h = nn.Parameter(grid_h) offset_w = offset_w + self.grid_w # (b*c, h, w) offset_h = offset_h + self.grid_h # (b*c, h, w) x = x.contiguous().view(-1, int(x_shape[2]), int(x_shape[3])).unsqueeze(1) # (b*c, 1, h, w) x = F.grid_sample(x, torch.stack((offset_h, offset_w), 3)) # (b*c, h, w) x = x.contiguous().view(-1, int(x_shape[1]), int(x_shape[2]), int(x_shape[3])) # (b, c, h, w) x = self.regular_filter(x) return x
def test_parameters(self): def num_params(module): return len(list(module.parameters())) class Net(nn.Container): def __init__(self): super(Net, self).__init__( l1=l, l2=l ) self.param = Parameter(torch.Tensor(3, 5)) l = nn.Linear(10, 20) n = Net() s = nn.Sequential(n, n, n, n) self.assertEqual(num_params(l), 2) self.assertEqual(num_params(n), 3) self.assertEqual(num_params(s), 3)
def __init__(self, num_heads: int, input_dim: int, attention_dim: int, values_dim: int, output_projection_dim: int = None, attention_dropout_prob: float = 0.1) -> None: super(MultiHeadSelfAttention, self).__init__() self._num_heads = num_heads self._input_dim = input_dim self._output_dim = output_projection_dim or input_dim self._attention_dim = attention_dim self._values_dim = values_dim self._query_projections = Parameter(torch.FloatTensor(num_heads, input_dim, attention_dim)) self._key_projections = Parameter(torch.FloatTensor(num_heads, input_dim, attention_dim)) self._value_projections = Parameter(torch.FloatTensor(num_heads, input_dim, values_dim)) self._scale = input_dim ** 0.5 self._output_projection = Linear(num_heads * values_dim, self._output_dim) self._attention_dropout = Dropout(attention_dropout_prob) self.reset_parameters()
def __init__(self, shared_resources: SharedResources): super(FastQAPyTorchModule, self).__init__() self._shared_resources = shared_resources input_size = shared_resources.config["repr_dim_input"] size = shared_resources.config["repr_dim"] self._size = size self._with_char_embeddings = self._shared_resources.config.get("with_char_embeddings", False) # modules & parameters if self._with_char_embeddings: self._conv_char_embedding = embedding.ConvCharEmbeddingModule( len(shared_resources.char_vocab), size) self._embedding_projection = nn.Linear(size + input_size, size) self._embedding_highway = Highway(size, 1) self._v_wiq_w = nn.Parameter(torch.ones(1, 1, input_size + size)) input_size = size else: self._v_wiq_w = nn.Parameter(torch.ones(1, 1, input_size)) self._bilstm = BiLSTM(input_size + 2, size) self._answer_layer = FastQAAnswerModule(shared_resources) # [size, 2 * size] self._question_projection = nn.Parameter(torch.cat([torch.eye(size), torch.eye(size)], dim=1)) self._support_projection = nn.Parameter(torch.cat([torch.eye(size), torch.eye(size)], dim=1))
def __init__(self, n_in, n_out, dropout=0, rnn_dropout=0, bidirectional=False, use_tanh=1, use_relu=0): super(SRUCell, self).__init__() self.n_in = n_in self.n_out = n_out self.rnn_dropout = rnn_dropout self.dropout = dropout self.bidirectional = bidirectional self.activation_type = 2 if use_relu else (1 if use_tanh else 0) out_size = n_out*2 if bidirectional else n_out k = 4 if n_in != out_size else 3 self.size_per_dir = n_out*k self.weight = nn.Parameter(torch.Tensor( n_in, self.size_per_dir*2 if bidirectional else self.size_per_dir )) self.bias = nn.Parameter(torch.Tensor( n_out*4 if bidirectional else n_out*2 )) self.init_weight()
def load_embeddings(self, state_dict): self_state_dict = self.state_dict() self_states = set(self_state_dict.keys()) states = set(state_dict) assert self_states & states, "Given state dict does not contain " \ "word embedding params" for name, param in state_dict.items(): if name not in self_state_dict: continue if isinstance(param, nn.Parameter): param = param.data self_state_dict[name].copy_(param)
def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1): super(MultiHeadAttention, self).__init__() self.n_head = n_head self.d_k = d_k self.d_v = d_v self.w_qs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k)) self.w_ks = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k)) self.w_vs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_v)) self.attention = ScaledDotProductAttention(d_model) self.layer_norm = LayerNormalization(d_model) self.proj = Linear(n_head*d_v, d_model) self.dropout = nn.Dropout(dropout) init.xavier_normal(self.w_qs) init.xavier_normal(self.w_ks) init.xavier_normal(self.w_vs)
def copy_state_dict(state_dict, model, strip=None): tgt_state = model.state_dict() copied_names = set() for name, param in state_dict.items(): if strip is not None and name.startswith(strip): name = name[len(strip):] if name not in tgt_state: continue if isinstance(param, Parameter): param = param.data if param.size() != tgt_state[name].size(): print('mismatch:', name, param.size(), tgt_state[name].size()) continue tgt_state[name].copy_(param) copied_names.add(name) missing = set(tgt_state.keys()) - copied_names if len(missing) > 0: print("missing keys in state_dict:", missing) return model
def __init__(self, num_features, max_length, eps=1e-5, momentum=0.1, affine=True): """ Most parts are copied from torch.nn.modules.batchnorm._BatchNorm. """ super(SeparatedBatchNorm1d, self).__init__() self.num_features = num_features self.max_length = max_length self.affine = affine self.eps = eps self.momentum = momentum if self.affine: self.weight = nn.Parameter(torch.FloatTensor(num_features)) self.bias = nn.Parameter(torch.FloatTensor(num_features)) else: self.register_parameter('weight', None) self.register_parameter('bias', None) for i in range(max_length): self.register_buffer( 'running_mean_{}'.format(i), torch.zeros(num_features)) self.register_buffer( 'running_var_{}'.format(i), torch.ones(num_features)) self.reset_parameters()
def __init__(self, input_size, hidden_size, use_bias=True): """ Most parts are copied from torch.nn.LSTMCell. """ super(LSTMCell, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.use_bias = use_bias self.weight_ih = nn.Parameter( torch.FloatTensor(input_size, 4 * hidden_size)) self.weight_hh = nn.Parameter( torch.FloatTensor(hidden_size, 4 * hidden_size)) if use_bias: self.bias = nn.Parameter(torch.FloatTensor(4 * hidden_size)) else: self.register_parameter('bias', None) self.reset_parameters()
def __init__(self, input_size, hidden_size, max_length, use_bias=True): super(BNLSTMCell, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.max_length = max_length self.use_bias = use_bias self.weight_ih = nn.Parameter( torch.FloatTensor(input_size, 4 * hidden_size)) self.weight_hh = nn.Parameter( torch.FloatTensor(hidden_size, 4 * hidden_size)) if use_bias: self.bias = nn.Parameter(torch.FloatTensor(4 * hidden_size)) else: self.register_parameter('bias', None) # BN parameters self.bn_ih = SeparatedBatchNorm1d( num_features=4 * hidden_size, max_length=max_length) self.bn_hh = SeparatedBatchNorm1d( num_features=4 * hidden_size, max_length=max_length) self.bn_c = SeparatedBatchNorm1d( num_features=hidden_size, max_length=max_length) self.reset_parameters()
def __init__(self, vocab_size, tag_to_ix, embedding_dim, hidden_dim): super(BiLSTM_CRF, self).__init__() self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.vocab_size = vocab_size self.tag_to_ix = tag_to_ix self.tagset_size = len(tag_to_ix) self.word_embeds = nn.Embedding(vocab_size, embedding_dim, padding_idx = 0) self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2, num_layers=1, bidirectional=True) # Maps the output of the LSTM into tag space. self.hidden2tag = nn.Linear(hidden_dim, self.tagset_size) # Matrix of transition parameters. Entry i,j is the score of # transitioning *to* i *from* j. self.transitions = nn.Parameter( torch.randn(self.tagset_size, self.tagset_size)) self.hidden = self.init_hidden()
def __init__(self, question_size, passage_size, hidden_size, attn_size=None, cell_type=nn.GRUCell, num_layers=1, dropout=0, residual=False, **kwargs): super().__init__() self.num_layers = num_layers if attn_size is None: attn_size = question_size # TODO: what is V_q? (section 3.4) v_q_size = question_size self.question_pooling = AttentionPooling(question_size, v_q_size, attn_size=attn_size) self.passage_pooling = AttentionPooling(passage_size, question_size, attn_size=attn_size) self.V_q = nn.Parameter(torch.randn(1, 1, v_q_size), requires_grad=True) self.cell = StackedCell(question_size, question_size, num_layers=num_layers, dropout=dropout, rnn_cell=cell_type, residual=residual, **kwargs)
def __init__(self, fea_size, dropout=False, gate_width=128, use_region=True, use_kernel_function=False): super(Hierarchical_Message_Passing_Structure_base, self).__init__() #self.w_object = Parameter() if use_kernel_function: Message_Passing_Unit = Message_Passing_Unit_v2 else: Message_Passing_Unit = Message_Passing_Unit_v1 self.gate_sub2pred = Message_Passing_Unit(fea_size, gate_width) self.gate_obj2pred = Message_Passing_Unit(fea_size, gate_width) self.gate_pred2sub = Message_Passing_Unit(fea_size, gate_width) self.gate_pred2obj = Message_Passing_Unit(fea_size, gate_width) self.GRU_object = Gated_Recurrent_Unit(fea_size, dropout) # nn.GRUCell(fea_size, fea_size) # self.GRU_phrase = Gated_Recurrent_Unit(fea_size, dropout) if use_region: self.gate_pred2reg = Message_Passing_Unit(fea_size, gate_width) self.gate_reg2pred = Message_Passing_Unit(fea_size, gate_width) self.GRU_region = Gated_Recurrent_Unit(fea_size, dropout)
def __init__(self, num_features, max_len, eps=1e-5, momentum=0.1, affine=True): super(recurrent_BatchNorm, self).__init__() self.num_features = num_features self.affine = affine self.max_len = max_len self.eps = eps self.momentum = momentum if self.affine: self.weight = nn.Parameter(torch.Tensor(num_features)) self.register_parameter('weight', self.weight) self.bias = nn.Parameter(torch.Tensor(num_features)) self.register_parameter('bias', self.bias) else: self.register_parameter('weight', None) self.register_parameter('bias', None) for i in xrange(max_len): self.register_buffer('running_mean_{}'.format(i), torch.zeros(num_features)) self.register_buffer('running_var_{}'.format(i), torch.ones(num_features)) self.reset_parameters()
def __init__(self, in_channels, out_channels, kernel_size, bias=True): super().__init__() self.conv_t = nn.ConvTranspose1d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=kernel_size, bias=False ) if bias: self.bias = nn.Parameter( torch.FloatTensor(out_channels, kernel_size) ) else: self.register_parameter('bias', None) self.reset_parameters()
def test_parameters(self): def num_params(module): return len(list(module.parameters())) class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.l1 = l self.l2 = l self.param = Parameter(torch.Tensor(3, 5)) l = nn.Linear(10, 20) n = Net() s = nn.Sequential(n, n, n, n) self.assertEqual(num_params(l), 2) self.assertEqual(num_params(n), 3) self.assertEqual(num_params(s), 3)
def __init__(self, n_in, n_out, dropout=0, rnn_dropout=0, bidirectional=False, use_tanh=1, use_relu=0, use_kernel=True): super(SRUCell, self).__init__() self.n_in = n_in self.n_out = n_out self.rnn_dropout = rnn_dropout self.dropout = dropout self.bidirectional = bidirectional self.activation_type = 2 if use_relu else (1 if use_tanh else 0) self.use_kernel = use_kernel out_size = n_out*2 if bidirectional else n_out k = 4 if n_in != out_size else 3 self.size_per_dir = n_out*k self.weight = nn.Parameter(torch.Tensor( n_in, self.size_per_dir*2 if bidirectional else self.size_per_dir )) self.bias = nn.Parameter(torch.Tensor( n_out*4 if bidirectional else n_out*2 )) self.init_weight()
def __init__(self, opt ): super(StackLayer2, self).__init__() self.model_name = 'StackLayer2' self.opt=opt #self.fc=nn.Sequential( # nn.Linear(opt.model_num*opt.num_classes,opt.linear_hidden_size), # nn.BatchNorm1d(opt.linear_hidden_size), # nn.ReLU(inplace=True), # nn.Linear(opt.linear_hidden_size,opt.num_classes) #) # self.weights = nn.Parameter(t.zeros(opt.num_classes,opt.model_num)) self.weights=nn.Parameter(t.ones(opt.model_num)/opt.model_num) #self.fc=nn.Linear(opt.model_num*opt.num_classes,opt.num_classes) #weights=np.zeros((opt.num_classes,opt.model_num*opt.num_classes),dtype=np.float32) #for i in range(opt.model_num): # weights[range(1999),range(i*1999,i*1999+1999)]=0.125 #self.fc.weight.data=t.from_numpy(weights)
def __init__(self, opt ): super(MultiModelAll2, self).__init__() self.model_name = 'MultiModelAll2' self.opt=opt self.models = [] for _name,_path in zip(opt.model_names, opt.model_paths): tmp_config = Config().parse(opt.state_dict(),print_=False) # tmp_config.static=True tmp_config.embedding_path=None _model = getattr(models,_name)(tmp_config) if _path is not None: _model.load(_path) self.models.append(_model) self.models = nn.ModuleList(self.models) self.model_num = len(self.models) self.weights = nn.Parameter(t.ones(opt.num_classes,self.model_num)) assert self.opt.loss=='bceloss' self.eval()
def __init__(self, opt ): super(MultiModelAll4zhihu, self).__init__() self.model_name = 'MultiModelAll4zhihu' self.opt=opt self.models = [] self.word_embedding=nn.Embedding(411720,256) self.char_embedding=nn.Embedding(11973,256) model_opts = t.load(opt.model_path+'.json') for _name,_path,model_opt_ in zip(opt.model_names, opt.model_paths,model_opts): tmp_config = Config().parse(model_opt_,print_=False) tmp_config.embedding_path=None _model = getattr(models,_name)(tmp_config) _model.encoder=(self.char_embedding if _model.opt.type_=='char' else self.word_embedding) self.models.append(_model) self.models = nn.ModuleList(self.models) self.model_num = len(self.models) self.weights = nn.Parameter(t.ones(opt.num_classes,self.model_num)) self.load(opt.model_path)
def __init__(self): super(POSTag, self).__init__() self.w = nn.Parameter(torch.randn(postag_nb_layers * 2, max_sentence_size, postag_hn_size)) self.h = nn.Parameter(torch.randn(postag_nb_layers * 2, max_sentence_size, postag_hn_size)) # Bidirectional LSTM self.bi_lstm = nn.LSTM(embedding_size, postag_hn_size, postag_nb_layers, bidirectional=True) self.fc = nn.Linear(postag_hn_size * 2, nb_postags)
def __init__(self): super(Chunking, self).__init__() self.input_size = embedding_size \ + nb_postags \ + postag_hn_size * 2 self.w = nn.Parameter(torch.randn(chunking_nb_layers * 2, max_sentence_size, chunking_hn_size)) self.h = nn.Parameter(torch.randn(chunking_nb_layers * 2, max_sentence_size, chunking_hn_size)) self.embedding = nn.Embedding(nb_postags, chunking_postag_emb_size) self.aux_emb = torch.arange(0, nb_postags) self.aux_emb = Variable(self.aux_emb).long() self.bi_lstm = nn.LSTM(self.input_size, chunking_hn_size, chunking_nb_layers, bidirectional=True) self.fc = nn.Linear(chunking_hn_size * 2, nb_chunktags)
def __init__(self): super(Dependency, self).__init__() self.input_size = embedding_size \ + nb_postags \ + nb_chunktags \ + postag_hn_size * 2 \ + chunking_hn_size * 2 self.w = nn.Parameter(torch.randn(dependency_nb_layers * 2, max_sentence_size, dependency_hn_size)) self.h = nn.Parameter(torch.randn(dependency_nb_layers * 2, max_sentence_size, dependency_hn_size)) self.bi_lstm = nn.LSTM(self.input_size, dependency_hn_size, dependency_nb_layers, bidirectional=True) self.wd = nn.Parameter(torch.randn(dependency_hn_size * 2)) self.fc = nn.Linear(dependency_hn_size * 2, 1)
def __init__(self): super(SentimentClassification, self).__init__() self.input_size = embedding_size \ + nb_postags \ + nb_chunktags \ + max_sentence_size \ + postag_hn_size * 2 \ + chunking_hn_size * 2 \ + dependency_hn_size * 2 self.w = nn.Parameter(torch.randn(sentiment_nb_layers * 2, max_sentence_size, sentiment_hn_size)) self.h = nn.Parameter(torch.randn(sentiment_nb_layers * 2, max_sentence_size, sentiment_hn_size)) self.bi_lstm = nn.LSTM(self.input_size, sentiment_hn_size, sentiment_nb_layers, bidirectional=True) self.fc = nn.Linear(sentiment_hn_size * 2, 1)
def __init__(self, options, GPU = False): super(CRF, self).__init__() self.GPU = GPU if self.GPU: self.dtype = torch.cuda.FloatTensor else: self.dtype = torch.FloatTensor self.options = options self.tag_to_ix = options['CLASSES_2_IX'] self.ix_to_tag = {self.tag_to_ix[w]:w for w in self.tag_to_ix} self.tagset_size = len(self.tag_to_ix) # Matrix of transition parameters. Entry i,j is the score of # transitioning *to* i *from* j. self.transitions = nn.Parameter( torch.randn(self.tagset_size, self.tagset_size)).type(self.dtype) self.initial_weights = nn.Parameter( torch.randn(self.tagset_size, 1)).type(self.dtype) self.final_weights = nn.Parameter( torch.randn(self.tagset_size, 1)).type(self.dtype)
def __init__(self, options, GPU=False): super(CRF, self).__init__() self.GPU = GPU if self.GPU: self.dtype = torch.cuda.FloatTensor else: self.dtype = torch.FloatTensor self.options = options self.tag_to_ix = options['CLASSES_2_IX'] self.ix_to_tag = {self.tag_to_ix[w]: w for w in self.tag_to_ix} self.START_TAG = 'START' self.STOP_TAG = 'STOP' if self.START_TAG not in self.tag_to_ix: self.tag_to_ix[self.START_TAG] = len(self.tag_to_ix) if self.STOP_TAG not in self.tag_to_ix: self.tag_to_ix[self.STOP_TAG] = len(self.tag_to_ix) self.tagset_size = len(self.tag_to_ix) # Matrix of transition parameters. Entry i,j is the score of # transitioning *to* i *from* j. self.transitions = nn.Parameter(torch.randn(self.tagset_size, self.tagset_size).type(self.dtype))
def load_state_dict(module, state_dict): """Copies parameters and buffers from :attr:`state_dict` into this module and its descendants. The keys of :attr:`state_dict` must exactly match the keys returned by this module's :func:`state_dict()` function. Arguments: state_dict (dict): A dict containing parameters and persistent buffers. """ own_state = module.state_dict() for name, param in state_dict.items(): if name not in own_state: raise KeyError('unexpected key "{}" in state_dict' .format(name)) if isinstance(param, Parameter): # backwards compatibility for serialized parameters param = param.data own_state[name].copy_(param)
def __init__(self, grid_size, grid_bounds, n_components, mixing_params=False): super(AdditiveGridInducingPointModule, self).__init__(grid_size, grid_bounds) self.n_components = n_components # Resize variational parameters to have one size per component self.alpha.resize_(*([n_components] + list(self.alpha.size()))) variational_mean = self.variational_mean chol_variational_covar = self.chol_variational_covar variational_mean.data.resize_(*([n_components] + list(variational_mean.size()))) chol_variational_covar.data.resize_(*([n_components] + list(chol_variational_covar.size()))) # Mixing parameters if mixing_params: self.register_parameter('mixing_params', nn.Parameter(torch.Tensor(n_components).fill_(1. / n_components)), bounds=(-2, 2))
def __init__(self, in_size, out_size, batch_num = 10, epoch_num = 10): """ in_size: Data Input Dimension out_size: Data Output Dimension batch_num: Batch size of Input epoch_num: Training Epoches """ super(AutoEncoder, self).__init__() self.in_size = in_size self.out_size = out_size self.batch_num = batch_num self.epoch_num = epoch_num self.weight1 = nn.Parameter(torch.randn(in_size, out_size), requires_grad = True) self.bias1 = nn.Parameter(torch.randn(out_size, ), requires_grad = True) self.bias2 = nn.Parameter(torch.randn(in_size, ), requires_grad = True) #self.linear1 = nn.Linear(in_size, out_size) #self.linear2 = nn.Linear(out_size, in_size)
def create(cls, embeddings, labels, **kwargs): finetune = kwargs.get('finetune', True) dsz = embeddings.dsz model = cls() model.pdrop = kwargs.get('dropout', 0.5) model.labels = labels nc = len(labels) model.vocab = embeddings.vocab model.lut = nn.Embedding(embeddings.vsz + 1, dsz) del model.lut.weight model.lut.weight = nn.Parameter(torch.FloatTensor(embeddings.weights), requires_grad=finetune) pool_dim = model._init_pool(dsz, **kwargs) stacked_dim = model._init_stacked(pool_dim, **kwargs) model._init_output(stacked_dim, nc) print(model) return model
def __init__(self, num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2, scale_grad_by_freq=False, sparse=False, fixed_weight=False): super(Embedding, self).__init__() self.num_embeddings = num_embeddings self.embedding_dim = embedding_dim self.padding_idx = padding_idx self.max_norm = max_norm self.norm_type = norm_type self.scale_grad_by_freq = scale_grad_by_freq if fixed_weight: self.weight = Variable( torch.Tensor(num_embeddings, embedding_dim), requires_grad=False) else: self.weight = nn.Parameter( torch.Tensor(num_embeddings, embedding_dim)) self.fixed_weight = fixed_weight self.sparse = sparse self.reset_parameters()
def __init__(self, in1_features, in2_features, out_features, bias=(True, True, True)): super(Biaffine, self).__init__() self.in1_features = in1_features self.in2_features = in2_features self.out_features = out_features self._use_bias = bias shape = (in1_features + int(bias[0]), in2_features + int(bias[1]), out_features) self.weight = nn.Parameter(torch.Tensor(*shape)) if bias[2]: self.bias = nn.Parameter(torch.Tensor(out_features)) else: self.register_parameter('bias', None) self.reset_parameters()
def split_input_channel(self, channel_i): if channel_i > self.in_channels: print("cannot split channel {} of {}".format(channel_i, self.in_channels)) return self.in_channels += 1 orig_weight = self.weight.data dup_slice = orig_weight[:, channel_i, :] * .5 new_weight = torch.zeros(self.out_channels, self.in_channels, self.kernel_size[0]) if channel_i > 0: new_weight[:, :channel_i, :] = orig_weight[:, :channel_i, :] new_weight[:, channel_i, :] = dup_slice new_weight[:, channel_i + 1, :] = dup_slice if channel_i + 1 < self.in_channels: new_weight[:, channel_i + 2, :] = orig_weight[:, channel_i + 1, :] self.weight = Parameter(new_weight) self.init_ncc()
def test_forward_computes_forward_pass(): weight = torch.randn(4, 8, 3, 3).cuda() input = torch.randn(4, 8, 4, 4).cuda() out = F.conv2d( input=Variable(input), weight=Parameter(weight), bias=None, stride=1, padding=1, dilation=1, groups=1, ).data func = _EfficientConv2d( stride=1, padding=1, dilation=1, groups=1, ) out_efficient = func.forward(weight, None, input) assert(almost_equal(out, out_efficient))
def __init__(self, input_size, hidden_size, capacity): super(GORUCell, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.capacity = capacity self.U = nn.Parameter( torch.FloatTensor(input_size, hidden_size)) self.thetaA = nn.Parameter( torch.FloatTensor(hidden_size//2, capacity//2)) self.thetaB = nn.Parameter( torch.FloatTensor(hidden_size//2-1, capacity//2)) self.bias = nn.Parameter( torch.FloatTensor(hidden_size)) self.gate_U = nn.Parameter( torch.FloatTensor(input_size, 2 * hidden_size)) self.gate_W = nn.Parameter( torch.FloatTensor(hidden_size, 2 * hidden_size)) self.gate_bias = nn.Parameter(torch.FloatTensor(2 * hidden_size)) self.reset_parameters()
def init_hidden(self, hidden_dim): """Trainable initial hidden state""" enc_init_hx = Variable(torch.zeros(hidden_dim), requires_grad=False) if self.use_cuda: enc_init_hx = enc_init_hx.cuda() #enc_init_hx.data.uniform_(-(1. / math.sqrt(hidden_dim)), # 1. / math.sqrt(hidden_dim)) enc_init_cx = Variable(torch.zeros(hidden_dim), requires_grad=False) if self.use_cuda: enc_init_cx = enc_init_cx.cuda() #enc_init_cx = nn.Parameter(enc_init_cx) #enc_init_cx.data.uniform_(-(1. / math.sqrt(hidden_dim)), # 1. / math.sqrt(hidden_dim)) return (enc_init_hx, enc_init_cx)
def __init__(self, num_classes, embed_size): """ :param num_classes: An int. The number of possible classes. :param embed_size: An int. Embedding size """ super(NEG_loss, self).__init__() self.num_classes = num_classes self.embed_size = embed_size self.out_embed = nn.Embedding(self.num_classes, self.embed_size) self.out_embed.weight = Parameter(t.FloatTensor(self.num_classes, self.embed_size).uniform_(-1, 1)) self.in_embed = nn.Embedding(self.num_classes, self.embed_size) self.in_embed.weight = Parameter(t.FloatTensor(self.num_classes, self.embed_size).uniform_(-1, 1))
def init_embeddings(self, weight): emb_elements = self.embeddings.weight.data.nelement() mismatch_msg = "Expected " + str(emb_elements) + "elements but got {}" if isinstance(weight, np.ndarray): assert emb_elements == weight.size, \ mismatch_msg.format(weight.size) self.embeddings.weight.data = torch.Tensor(weight) elif isinstance(weight, torch.Tensor): assert emb_elements == weight.nelement(), \ mismatch_msg.format(weight.nelement()) self.embeddings.weight.data = weight elif isinstance(weight, nn.Parameter): assert emb_elements == weight.nelement(), \ mismatch_msg.format(weight.nelement()) self.embeddings.weight = weight else: raise ValueError("Unknown weight type [{}]".format(type(weight)))
def __init__(self, params): super(Decoder, self).__init__() self.params = params self.kernels = [Parameter(t.Tensor(out_chan, in_chan, width).normal_(0, 0.05)) for out_chan, in_chan, width in params.decoder_kernels] self._add_to_parameters(self.kernels, 'decoder_kernel') self.biases = [Parameter(t.Tensor(out_chan).normal_(0, 0.05)) for out_chan, in_chan, width in params.decoder_kernels] self._add_to_parameters(self.biases, 'decoder_bias') self.out_size = self.params.decoder_kernels[-1][0] self.fc = nn.Linear(self.out_size, self.params.word_vocab_size)
def __init__(self, start_tag_index, stop_tag_index, tag_size, embedding_dim, hidden_dim): super(EncoderCRF, self).__init__() self.hidden_dim = hidden_dim self.start_tag_index = start_tag_index self.stop_tag_index = stop_tag_index self.tag_size = tag_size self.encoder = nn.GRU(embedding_dim, hidden_dim // 2, num_layers=1, bidirectional=True) self.tag_projection = nn.Linear(hidden_dim, self.tag_size) self.transitions = nn.Parameter( torch.randn(self.tag_size, self.tag_size)) self.hidden = self.init_hidden()
def init_duvenaud(self, params): learn_args = [] learn_modules = [] args = {} args['out'] = params['out'] # Define a parameter matrix W for each layer. for l in range(params['layers']): learn_args.append(nn.Parameter(torch.randn(params['in'][l], params['out']))) # learn_modules.append(nn.Linear(params['out'], params['target'])) learn_modules.append(NNet(n_in=params['out'], n_out=params['target'])) return nn.ParameterList(learn_args), nn.ModuleList(learn_modules), args # GG-NN, Li et al.
def __init__(self, model, action_size=1, init_value=0.0, *args, **kwargs): super(DiagonalGaussianPolicy, self).__init__(model, *args, **kwargs) self.init_value = init_value self.logstd = th.zeros((1, action_size)) + self.init_value self.logstd = P(self.logstd) self.halflog2pie = V(T([2 * pi * exp(1)])) * 0.5 self.halflog2pi = V(T([2.0 * pi])) * 0.5 self.pi = V(T([pi]))