我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用torch.nn.functional.conv2d()。
def SSIM(img1, img2): (_, channel, _, _) = img1.size() window_size = 11 window = create_window(window_size, channel) mu1 = F.conv2d(img1, window, padding = window_size/2, groups = channel) mu2 = F.conv2d(img2, window, padding = window_size/2, groups = channel) mu1_sq = mu1.pow(2) mu2_sq = mu2.pow(2) mu1_mu2 = mu1*mu2 sigma1_sq = F.conv2d(img1*img1, window, padding = window_size/2, groups = channel) - mu1_sq sigma2_sq = F.conv2d(img2*img2, window, padding = window_size/2, groups = channel) - mu2_sq sigma12 = F.conv2d(img1*img2, window, padding = window_size/2, groups = channel) - mu1_mu2 C1 = 0.01**2 C2 = 0.03**2 ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2)) return ssim_map.mean()
def _ssim(img1, img2, window, window_size, channel, size_average = True): mu1 = F.conv2d(img1, window, padding = window_size//2, groups = channel) mu2 = F.conv2d(img2, window, padding = window_size//2, groups = channel) mu1_sq = mu1.pow(2) mu2_sq = mu2.pow(2) mu1_mu2 = mu1*mu2 sigma1_sq = F.conv2d(img1*img1, window, padding = window_size//2, groups = channel) - mu1_sq sigma2_sq = F.conv2d(img2*img2, window, padding = window_size//2, groups = channel) - mu2_sq sigma12 = F.conv2d(img1*img2, window, padding = window_size//2, groups = channel) - mu1_mu2 C1 = 0.01**2 C2 = 0.03**2 ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2)) if size_average: return ssim_map.mean() else: return ssim_map.mean(1).mean(1).mean(1)
def forward(self, x): # Create weights for the convolution on demand: # size or type of x changed... in_channels = x.size()[1] weight_shape = (in_channels, 1, self.kernel_size[0], self.kernel_size[1]) if self.weights is None or ( (tuple(self.weights.size()) != tuple(weight_shape)) or ( self.weights.is_cuda != x.is_cuda ) or ( self.weights.data.type() != x.data.type() )): n_pool = np.prod(self.kernel_size) weights = np_to_var( np.ones(weight_shape, dtype=np.float32) / float(n_pool)) weights = weights.type_as(x) if x.is_cuda: weights = weights.cuda() self.weights = weights pooled = F.conv2d(x, self.weights, bias=None, stride=self.stride, dilation=self.dilation, groups=in_channels,) return pooled
def forward(self, input, n_out, dilation, ks = (3,3), groups=1): # print(ks, self.kernel_size, dilation, (self.kernel_size[0] - ks[0]) //2, self.kernel_size[0] + (self.kernel_size[0] - ks[0]) //2, (ks[0] + ((ks[0] - 1 ) * (dilation[0] - 1 ))) // 2) # print(dilation,ks, tuple(int(item) for item in ( (ks[0] + ((ks[0] - 1 ) * (dilation[0] - 1 ))) // 2, (ks[1] + ((ks[1] - 1 ) * (dilation[1] - 1 ))) // 2))) return F.conv2d(input, weight=self.weight[:n_out, :input.size(1) // groups, (self.kernel_size[0] - ks[0]) //2 : ks[0] + (self.kernel_size[0] - ks[0]) //2, (self.kernel_size[1] - ks[1]) //2 : ks[1] + (self.kernel_size[1] - ks[1]) //2].contiguous(), dilation=tuple(int(d) for d in dilation), padding=tuple(int(item) for item in ( (ks[0] + ((ks[0] - 1 ) * (dilation[0] - 1 ))) // 2, (ks[1] + ((ks[1] - 1 ) * (dilation[1] - 1 ))) // 2)), groups=int(groups), bias=None) # A convenience wrapper to prevent the forward() method of SMASH from # being annoyingly verbose. This version of BatchNorm2D simply # slices its weights according to the size of the incoming tensor.
def test_forward_computes_forward_pass(): weight = torch.randn(4, 8, 3, 3).cuda() input = torch.randn(4, 8, 4, 4).cuda() out = F.conv2d( input=Variable(input), weight=Parameter(weight), bias=None, stride=1, padding=1, dilation=1, groups=1, ).data func = _EfficientConv2d( stride=1, padding=1, dilation=1, groups=1, ) out_efficient = func.forward(weight, None, input) assert(almost_equal(out, out_efficient))
def forward(self, x): """Return the deformed featured map""" x_shape = x.size() offsets = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) # offsets: (b*c, h, w, 2) offsets = self._to_bc_h_w_2(offsets, x_shape) # x: (b*c, h, w) x = self._to_bc_h_w(x, x_shape) # X_offset: (b*c, h, w) x_offset = th_batch_map_offsets(x, offsets, grid=self._get_grid(self,x)) # x_offset: (b, h, w, c) x_offset = self._to_b_c_h_w(x_offset, x_shape) return x_offset
def test_conv2d_depthwise(self): n = 6 x = Variable(torch.randn(1,n,5,5).double().cuda(), requires_grad=True) w = Variable(torch.randn(n,1,3,3).double().cuda(), requires_grad=True) y_fast = P.conv2d_depthwise(x, w, padding=1) y_ref = F.conv2d(x, w, padding=1, groups=n) go = torch.randn(y_fast.size()).double().cuda() self.assertLess((y_fast - y_ref).data.abs().max(), 1e-9) x.requires_grad = True w.requires_grad = True y_fast.backward(go) gx_fast = x.grad.data.clone() gw_fast = w.grad.data.clone() x.grad.data.zero_() w.grad.data.zero_() y_ref.backward(go) gx_ref = x.grad.data.clone() gw_ref = w.grad.data.clone() self.assertTrue(gradcheck(partial(P.conv2d_depthwise, padding=1), (x, w,)))
def conv2d_depthwise(input, weight, bias=None, stride=1, padding=0, dilation=1): """Depthwise 2D convolution. Implements depthwise convolution as in https://arxiv.org/pdf/1704.04861v1.pdf MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications CUDA kernels from https://github.com/BVLC/caffe/pull/5665 CPU side is done by F.conv2d Equivalent to: `F.conv2d(input, weight, groups=input.size(1))` """ assert input.size(1) == weight.size(0) if input.is_cuda: out = Conv2dDepthwise(stride, padding, dilation)(input, weight) if bias is not None: out += bias.view(1,-1,1,1) else: groups = input.size(1) out = F.conv2d(input, weight, bias, stride, padding, dilation, groups) return out
def pad_if_needed(input, padding, kind, k_h, k_w, s_h=1, s_w=1, dilation=1): if padding == 'VALID': return input elif padding == 'SAME' and kind in ('conv2d', 'pool2d'): in_height, in_width = input.size(2), input.size(3) out_height = int(np.ceil(float(in_height) / float(s_h))) out_width = int(np.ceil(float(in_width) / float(s_w))) pad_along_height = max((out_height - 1) * s_h + k_h - in_height, 0) pad_along_width = max((out_width - 1) * s_w + k_w - in_width, 0) pad_top = pad_along_height // 2 pad_bottom = pad_along_height - pad_top pad_left = pad_along_width // 2 pad_right = pad_along_width - pad_left input = F.pad(input, (pad_left, pad_right, pad_top, pad_bottom)) return input elif kind in ('atrous_conv2d',): effective_height = k_h + (k_h - 1) * (dilation - 1) effective_width = k_w + (k_w - 1) * (dilation - 1) return pad_if_needed(input, padding, 'conv2d', effective_height, effective_width, s_h, s_w, dilation=1) else: raise NotImplementedError
def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1, biased=True): input = pad_if_needed(input, padding, 'conv2d', k_h, k_w, s_h, s_w) result = F.conv2d(input, self.weights[name + '/weights'], bias=self.weights[name + '/biases'] if biased else None, padding=0, groups=group, stride=(s_h, s_w)) if relu: result = F.relu(result) return result
def atrous_conv(self, input, k_h, k_w, c_o, dilation, name, relu=True, padding=DEFAULT_PADDING, group=1, biased=True): if group != 1: raise NotImplementedError input = pad_if_needed(input, padding, 'atrous_conv2d', k_h, k_w, dilation=dilation) result = F.conv2d(input, self.weights[name + '/weights'], bias=self.weights[name + '/biases'] if biased else None, padding=0, dilation=dilation, groups=group, stride=1) if relu: result = F.relu(result) return result
def forward(self, input): return F.conv2d(input, self.alpha * Variable(self.delta) + self.beta * normalize(self.weight), self.bias, self.stride, self.padding, self.dilation)
def block(o, params, stats, base, mode, j): w = params[base + '.conv'] alpha = params[base + '.alpha'] beta = params[base + '.beta'] delta = Variable(stats[size2name(w.size())]) w = beta * F.normalize(w.view(w.size(0), -1)).view_as(w) + alpha * delta o = F.conv2d(ncrelu(o), w, stride=1, padding=1) o = batch_norm(o, params, stats, base + '.bn', mode) return o
def f(params, inputs, mode): o = inputs.view(inputs.size(0), 1, 28, 28) o = F.conv2d(o, params['conv0.weight'], params['conv0.bias'], stride=2) o = F.relu(o) o = F.conv2d(o, params['conv1.weight'], params['conv1.bias'], stride=2) o = F.relu(o) o = o.view(o.size(0), -1) o = F.linear(o, params['linear2.weight'], params['linear2.bias']) o = F.relu(o) o = F.linear(o, params['linear3.weight'], params['linear3.bias']) return o
def test_Conv2d_inconsistent_types(self): inputs = Variable(torch.randn(4, 1, 7, 7).float()) weights = Variable(torch.randn(1, 1, 3, 3).double()) # inconsistent types should raise an exception self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights)) # but it should work with the same type nn.functional.conv2d(inputs.float(), weights.float())
def test_dirac_identity(self): batch, in_c, out_c, size, kernel_size = 8, 3, 4, 5, 3 # Test 1D input_var = Variable(torch.randn(batch, in_c, size)) filter_var = Variable(torch.zeros(out_c, in_c, kernel_size)) init.dirac(filter_var) output_var = F.conv1d(input_var, filter_var) input_tensor, output_tensor = input_var.data, output_var.data # Variables do not support nonzero self.assertEqual(input_tensor[:, :, 1:-1], output_tensor[:, :in_c, :]) # Assert in_c outputs are preserved assert torch.nonzero(output_tensor[:, in_c:, :]).numel() == 0 # Assert extra outputs are 0 # Test 2D input_var = Variable(torch.randn(batch, in_c, size, size)) filter_var = Variable(torch.zeros(out_c, in_c, kernel_size, kernel_size)) init.dirac(filter_var) output_var = F.conv2d(input_var, filter_var) input_tensor, output_tensor = input_var.data, output_var.data self.assertEqual(input_tensor[:, :, 1:-1, 1:-1], output_tensor[:, :in_c, :, :]) assert torch.nonzero(output_tensor[:, in_c:, :, :]).numel() == 0 # Test 3D input_var = Variable(torch.randn(batch, in_c, size, size, size)) filter_var = Variable(torch.zeros(out_c, in_c, kernel_size, kernel_size, kernel_size)) init.dirac(filter_var) output_var = F.conv3d(input_var, filter_var) input_tensor, output_tensor = input_var.data, output_var.data self.assertEqual(input_tensor[:, :, 1:-1, 1:-1, 1:-1], output_tensor[:, :in_c, :, :]) assert torch.nonzero(output_tensor[:, in_c:, :, :, :]).numel() == 0
def test_Conv2d_inconsistent_types_on_GPU_without_cudnn(self): inputs = Variable(torch.randn(4, 1, 7, 7).float().cuda()) weights = Variable(torch.randn(1, 1, 3, 3).double().cuda()) bias = Variable(torch.randn(1).double().cuda()) torch.backends.cudnn.enabled = False # inconsistent types should raise an exception self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights)) self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights.float(), bias)) # but it should work with the same type nn.functional.conv2d(inputs.float(), weights.float(), bias.float())
def test_Conv2d_inconsistent_types_on_GPU_with_cudnn(self): inputs = Variable(torch.randn(4, 1, 7, 7).float().cuda()) weights = Variable(torch.randn(1, 1, 3, 3).double().cuda()) bias = Variable(torch.randn(1).double().cuda()) torch.backends.cudnn.enabled = True # inconsistent types should raise an exception self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights)) self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights.float(), bias)) # but it should work with the same type nn.functional.conv2d(inputs.float(), weights.float(), bias.float())
def test_calculate_gain_linear(self): for fn in ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose2d', 'conv_transpose2d', 'conv_transpose3d']: gain = init.calculate_gain(fn) self.assertEqual(gain, 1)
def forward(self, input): if isinstance(input, Variable): out = F.conv2d(input, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) return F.pixel_shuffle(out, self.scale_factor) elif isinstance(input, tuple) or isinstance(input, list): return my_data_parallel(self, input) else: raise RuntimeError('unknown input type')
def forward(self, input): if isinstance(input, Variable): return F.conv2d(input, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) elif isinstance(input, tuple) or isinstance(input, list): return my_data_parallel(self, input) else: raise RuntimeError('unknown input type')
def forward(self, input): return self.norm_scale_bias(F.conv2d(input, self.weight, None, self.stride, self.padding, self.dilation, 1))
def forward(self, input): return F.conv2d(input, wn2d(self.weight), self.bias, self.stride, self.padding, self.dilation, self.groups) # A convenience wrapper to prevent the forward() method of SMASH from # being annoyingly verbose. This version of Conv2D simply takes a user-input # dilation factor, and slices its input weight as requested.
def forward(self, input, n_out, dilation, ks = (3,3), groups=1): # print(ks, self.kernel_size, dilation, (self.kernel_size[0] - ks[0]) //2, self.kernel_size[0] + (self.kernel_size[0] - ks[0]) //2, (ks[0] + ((ks[0] - 1 ) * (dilation[0] - 1 ))) // 2) # print(dilation,ks, tuple(int(item) for item in ( (ks[0] + ((ks[0] - 1 ) * (dilation[0] - 1 ))) // 2, (ks[1] + ((ks[1] - 1 ) * (dilation[1] - 1 ))) // 2))) return F.conv2d(input, weight=self.weight[:n_out, :input.size(1) // groups, (self.kernel_size[0] - ks[0]) //2 : ks[0] + (self.kernel_size[0] - ks[0]) //2, (self.kernel_size[1] - ks[1]) //2 : ks[1] + (self.kernel_size[1] - ks[1]) //2].contiguous(), dilation=tuple(int(d) for d in dilation), padding=tuple(int(item) for item in ( (ks[0] + ((ks[0] - 1 ) * (dilation[0] - 1 ))) // 2, (ks[1] + ((ks[1] - 1 ) * (dilation[1] - 1 ))) // 2)), groups=int(groups), bias=None) # Simple class that dynamically inserts a nonlinearity between a batchnorm and a conv
def forward(self,x): if self.dilation>1: return F.conv2d(input = x,weight=self.conv.weight*V(self.m),padding=self.dilation,bias=None) else: return self.conv(x)
def f(o, params, stats, mode): o = F.batch_norm(o, running_mean=stats['bn.running_mean'], running_var=stats['bn.running_var'], weight=params['bn.weight'], bias=params['bn.bias'], training=mode) o = F.conv2d(o, params['conv1.weight'], params['conv1.bias']) o = F.relu(o) o = o.view(o.size(0), -1) o = F.linear(o, params['linear2.weight'], params['linear2.bias']) o = F.relu(o) o = F.linear(o, params['linear3.weight'], params['linear3.bias']) return o
def test_backward_computes_backward_pass(): weight = torch.randn(4, 8, 3, 3).cuda() input = torch.randn(4, 8, 4, 4).cuda() input_var = Variable(input, requires_grad=True) weight_var = Parameter(weight) out_var = F.conv2d( input=input_var, weight=weight_var, bias=None, stride=1, padding=1, dilation=1, groups=1, ) out_var.backward(gradient=input_var.data.clone().fill_(1)) out = out_var.data input_grad = input_var.grad.data weight_grad = weight_var.grad.data func = _EfficientConv2d( stride=1, padding=1, dilation=1, groups=1, ) out_efficient = func.forward(weight, None, input) weight_grad_efficient, _, input_grad_efficient = func.backward( weight, None, input, input.clone().fill_(1)) assert(almost_equal(out, out_efficient)) assert(almost_equal(input_grad, input_grad_efficient)) assert(almost_equal(weight_grad, weight_grad_efficient))
def test_contig_wrong_stride_cudnn(self): # x has to have batch_size 1 to test contiguous checks x = torch.randn(1, 16, 5, 5).cuda() stride = list(x.stride()) stride[0] = 20 # change the stride in dimension 0. the tensor is still contiguous because size[0] is 1 x.set_(x.storage(), 0, x.size(), stride) self.assertTrue(x.is_contiguous()) F.conv_transpose2d(Variable(x), Variable(torch.randn(16, 1, 1, 1)).cuda()) F.conv2d(Variable(x), Variable(torch.randn(1, 16, 1, 1)).cuda())
def mobilenet(depth, width, depthwise_function): cfg = [64, (128, 2), 128, (256, 2), 256, (512, 2), 512, 512, 512, 512, 512, (1024, 2), 1024] cast = lambda x: x.cuda() ni = 32 params = {'conv0': cast(kaiming_normal(torch.Tensor(ni, 3, 3, 3)))} for i, x in enumerate(cfg): no = x if isinstance(x, int) else x[0] params['block%d.conv0' % i] = cast(kaiming_normal(torch.Tensor(ni, 1, 3, 3))) params['block%d.conv1' % i] = cast(kaiming_normal(torch.Tensor(no, ni, 1, 1))) ni = no params = {k: Variable(v, requires_grad=True) for k, v in params.items()} def f(input, params): o = F.conv2d(input, params['conv0'], padding=1, stride=2) o = F.relu(o, inplace=True) for i, x in enumerate(cfg): stride = 1 if isinstance(x, int) else x[1] o = depthwise_function(o, params['block%d.conv0' % i], stride=stride, padding=1) o = F.conv2d(o, params['block%d.conv1' % i]) o = F.relu(o, inplace=True) return o return f, params
def fconv2d(x, w, stride, padding): return F.conv2d(x, w, stride=stride, padding=padding, groups=x.size(1))
def define_model(params): def conv2d(input, params, base, stride=1, pad=0): return F.conv2d(input, params[base + '.weight'], params[base + '.bias'], stride, pad) def group(input, params, base, stride, n): o = input for i in range(0,n): b_base = ('%s.block%d.conv') % (base, i) x = o o = conv2d(x, params, b_base + '0') o = F.relu(o) o = conv2d(o, params, b_base + '1', stride=i==0 and stride or 1, pad=1) o = F.relu(o) o = conv2d(o, params, b_base + '2') if i == 0: o += conv2d(x, params, b_base + '_dim', stride=stride) else: o += x o = F.relu(o) return o # determine network size by parameters blocks = [sum([re.match('group%d.block\d+.conv0.weight'%j, k) is not None for k in params.keys()]) for j in range(4)] def f(input, params, pooling_classif=True): o = F.conv2d(input, params['conv0.weight'], params['conv0.bias'], 2, 3) o = F.relu(o) o = F.max_pool2d(o, 3, 2, 1) o_g0 = group(o, params, 'group0', 1, blocks[0]) o_g1 = group(o_g0, params, 'group1', 2, blocks[1]) o_g2 = group(o_g1, params, 'group2', 2, blocks[2]) o_g3 = group(o_g2, params, 'group3', 2, blocks[3]) if pooling_classif: o = F.avg_pool2d(o_g3, 7, 1, 0) o = o.view(o.size(0), -1) o = F.linear(o, params['fc.weight'], params['fc.bias']) return o return f
def forward(self, X, S1, S2, config): h = self.h(X) r = self.r(h) q = self.q(r) v, _ = torch.max(q, dim=1, keepdim=True) for i in range(0, config.k - 1): q = F.conv2d(torch.cat([r, v], 1), torch.cat([self.q.weight, self.w], 1), stride=1, padding=1) v, _ = torch.max(q, dim=1, keepdim=True) q = F.conv2d(torch.cat([r, v], 1), torch.cat([self.q.weight, self.w], 1), stride=1, padding=1) slice_s1 = S1.long().expand(config.imsize, 1, config.l_q, q.size(0)) slice_s1 = slice_s1.permute(3, 2, 1, 0) q_out = q.gather(2, slice_s1).squeeze(2) slice_s2 = S2.long().expand(1, config.l_q, q.size(0)) slice_s2 = slice_s2.permute(2, 1, 0) q_out = q_out.gather(2, slice_s2).squeeze(2) logits = self.fc(q_out) return logits, self.sm(logits)
def forward(self, input): output = F.conv2d(input, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) return sparsify_grad(output, self.k, self.simplified)
def forward(self, input): bw = binarize(self.weight) return F.conv2d(input, bw, self.bias, self.stride, self.padding, self.dilation, self.groups)
def forward(self, input, kernel): self.weight = Parameter(kernel.data) # print 'weight: ', self.weight.size() # print 'bias: ', self.bias.size() # print 'forward:', type(input.data), type(self.weight.data) # print 'forward: ', input.size(), self.weight.size() return F.conv2d(input, kernel, self.bias, self.stride, self.padding, self.dilation, self.groups)
def run_conv_double_back_test(self, kern, stride, padding, chan_in, chan_out, batch_size, inp_size, dilation, no_weight, groups=1, use_cuda=False, use_bias=True): tensor = torch.Tensor(1) if use_cuda: tensor = tensor.cuda() x = Variable(tensor.new(batch_size, chan_in, inp_size, inp_size), requires_grad=True) x.data.normal_() weight = Variable(tensor.new(chan_out, chan_in // groups, kern, kern), requires_grad=True) weight.data.normal_() if use_bias: bias = Variable(tensor.new(chan_out), requires_grad=True) bias.data.normal_() else: bias = None def func(*inputs): if no_weight: lweight = weight if use_bias: lx, lbias = inputs else: lx, = inputs lbias = None else: if use_bias: lx, lweight, lbias = inputs else: lx, lweight = inputs lbias = None # We disable cudnn during forward to avoid finite difference imprecision issues with use_cudnn(False): out = F.conv2d(lx, lweight, lbias, stride, padding, dilation, groups) return out if no_weight: inputs = (x, bias) else: inputs = (x, weight, bias) if not use_bias: inputs = inputs[:-1] dummy_out = func(*inputs) grad_y = Variable(tensor.new(dummy_out.size()), requires_grad=True) grad_y.data.normal_() return gradgradcheck(func, inputs, (grad_y,))
def define_teacher(params_file): """ Defines student resnet Network size is determined from parameters, assuming pre-activation basic-block resnet (ResNet-18 or ResNet-34) """ params_hkl = hkl.load(params_file) params = OrderedDict({k: Variable(torch.from_numpy(v).cuda()) for k, v in params_hkl.items()}) blocks = [sum([re.match('group%d.block\d+.conv0.weight'%j, k) is not None for k in list(params.keys())]) for j in range(4)] def conv2d(input, params, base, stride=1, pad=0): return F.conv2d(input, params[base + '.weight'], params[base + '.bias'], stride, pad) def group(input, params, base, stride, n): o = input for i in range(0,n): b_base = ('%s.block%d.conv') % (base, i) x = o o = conv2d(x, params, b_base + '0', pad=1, stride=i==0 and stride or 1) o = F.relu(o, inplace=True) o = conv2d(o, params, b_base + '1', pad=1) if i == 0 and stride != 1: o += F.conv2d(x, params[b_base + '_dim.weight'], stride=stride) else: o += x o = F.relu(o, inplace=True) return o def f(inputs, params, pr=''): inputs = Variable(inputs.data, volatile=True) o = conv2d(inputs, params, pr+'conv0', 2, 3) o = F.relu(o, inplace=True) o = F.max_pool2d(o, 3, 2, 1) o_g0 = group(o, params, pr+'group0', 1, blocks[0]) o_g1 = group(o_g0, params, pr+'group1', 2, blocks[1]) o_g2 = group(o_g1, params, pr+'group2', 2, blocks[2]) o_g3 = group(o_g2, params, pr+'group3', 2, blocks[3]) o = F.avg_pool2d(o_g3, 7, 1, 0) o = o.view(o.size(0), -1) o = F.linear(o, params[pr+'fc.weight'], params[pr+'fc.bias']) return Variable(o.data), [Variable(v.data) for v in [o_g0, o_g1, o_g2, o_g3]] return f, params
def run_conv_double_back_test(self, kern, stride, padding, chan_in, chan_out, batch_size, inp_size, dilation, no_weight, groups=1, use_cuda=False, use_bias=True): tensor = torch.Tensor(1) if use_cuda: tensor = tensor.cuda() x = Variable(tensor.new(batch_size, chan_in, inp_size, inp_size), requires_grad=True) x.data.normal_() weight = Variable(tensor.new(chan_out, chan_in // groups, kern, kern), requires_grad=True) weight.data.normal_() if use_bias: bias = Variable(tensor.new(chan_out), requires_grad=True) bias.data.normal_() else: bias = None def func(*inputs): if no_weight: lweight = weight if use_bias: lx, lbias = inputs else: lx, = inputs lbias = None else: if use_bias: lx, lweight, lbias = inputs else: lx, lweight = inputs lbias = None # We disable cudnn during forward to avoid finite difference imprecision issues with cudnn.flags(enabled=False): out = F.conv2d(lx, lweight, lbias, stride, padding, dilation, groups) return out if no_weight: inputs = (x, bias) else: inputs = (x, weight, bias) if not use_bias: inputs = inputs[:-1] dummy_out = func(*inputs) grad_y = Variable(tensor.new(dummy_out.size()), requires_grad=True) grad_y.data.normal_() return gradgradcheck(func, inputs, (grad_y,))
def forward(self, x, init=False): if init is True: # out_channels, in_channels // groups, * kernel_size self.V.data.copy_(torch.randn(self.V.data.size() ).type_as(self.V.data) * 0.05) v_norm = self.V.data / self.V.data.view(self.out_channels, -1)\ .norm(2, 1).view(self.out_channels, *( [1] * (len(self.kernel_size) + 1))).expand_as(self.V.data) x_init = F.conv2d(x, Variable(v_norm), None, self.stride, self.padding, self.dilation, self.groups).data t_x_init = x_init.transpose(0, 1).contiguous().view( self.out_channels, -1) m_init, v_init = t_x_init.mean(1).squeeze( 1), t_x_init.var(1).squeeze(1) # out_features scale_init = self.init_scale / \ torch.sqrt(v_init + 1e-10) self.g.data.copy_(scale_init) self.b.data.copy_(-m_init * scale_init) scale_init_shape = scale_init.view( 1, self.out_channels, *([1] * (len(x_init.size()) - 2))) m_init_shape = m_init.view( 1, self.out_channels, *([1] * (len(x_init.size()) - 2))) x_init = scale_init_shape.expand_as( x_init) * (x_init - m_init_shape.expand_as(x_init)) self.V_avg.copy_(self.V.data) self.g_avg.copy_(self.g.data) self.b_avg.copy_(self.b.data) return Variable(x_init) else: v, g, b = get_vars_maybe_avg( self, ['V', 'g', 'b'], self.training, polyak_decay=self.polyak_decay) scalar = torch.norm(v.view(self.out_channels, -1), 2, 1) if len(scalar.size()) == 2: scalar = g / scalar.squeeze(1) else: scalar = g / scalar w = scalar.view(self.out_channels, * ([1] * (len(v.size()) - 1))).expand_as(v) * v x = F.conv2d(x, w, b, self.stride, self.padding, self.dilation, self.groups) return x