我们从Python开源项目中,提取了以下15个代码示例,用于说明如何使用chainer.cuda.cudnn_enabled()。
def forward(self, xs): x = xs[0] xp = cuda.get_array_module(x) if (xp != numpy and cuda.cudnn_enabled and self.use_cudnn and _cudnn_version >= 3000): oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes handle = cudnn.get_handle() x_cube = x.reshape(x.shape[:2] + (-1, 1)) desc = cudnn.create_tensor_descriptor(x_cube) self.y = xp.empty_like(x) libcudnn.softmaxForward( handle, _algorithm, _mode, one.data, desc.value, x_cube.data.ptr, zero.data, desc.value, self.y.data.ptr) return self.y, else: log_z = logsumexp(x) self.y = x - log_z return self.y,
def forward(self, x): xp = cuda.get_array_module(*x) if (xp != numpy and cuda.cudnn_enabled and self.use_cudnn and (_cudnn_version >= 3000 or x[0].dtype != numpy.float16)): oz_dtype = 'd' if x[0].dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes handle = cudnn.get_handle() x_cube = x[0].reshape(x[0].shape[:2] + (-1, 1)) desc = cudnn.create_tensor_descriptor(x_cube) self.y = xp.empty_like(x[0]) libcudnn.softmaxForward( handle, _algorithm, _mode, one.data, desc.value, x_cube.data.ptr, zero.data, desc.value, self.y.data.ptr) else: self.y = x[0] - x[0].max(axis=1, keepdims=True) xp.exp(self.y, out=self.y) self.y /= self.y.sum(axis=1, keepdims=True) return self.y,
def backward(self, x, gy): xp = cuda.get_array_module(*x) if (xp != numpy and cuda.cudnn_enabled and self.use_cudnn and (_cudnn_version >= 3000 or x[0].dtype != numpy.float16)): oz_dtype = 'd' if x[0].dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes handle = cudnn.get_handle() gx = xp.empty_like(x[0]) gx_cube = gx.reshape(gx.shape[:2] + (-1, 1)) desc = cudnn.create_tensor_descriptor(gx_cube) libcudnn.softmaxBackward( handle, _algorithm, _mode, one.data, desc.value, self.y.data.ptr, desc.value, gy[0].data.ptr, zero.data, desc.value, gx.data.ptr) else: gx = self.y * gy[0] sumdx = gx.sum(axis=1, keepdims=True) gx -= self.y * sumdx return gx,
def softmax_log(x, use_cudnn): xp = cuda.get_array_module(x) if (xp != numpy and cuda.cudnn_enabled and use_cudnn and _cudnn_version >= 3000): oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes handle = cudnn.get_handle() x_cube = x.reshape(x.shape[:2] + (-1, 1)) desc = cudnn.create_tensor_descriptor(x_cube) y = xp.empty_like(x) libcudnn.softmaxForward( handle, _algorithm, _mode, one.data, desc.value, x_cube.data.ptr, zero.data, desc.value, y.data.ptr) return y else: log_z = logsumexp(xp, x) return x - log_z
def forward_gpu(self, x): if (cuda.cudnn_enabled and self.use_cudnn and pooling_2d._check_cudnn_acceptable_type(x[0].dtype)): return super(AveragePooling2D, self).forward_gpu(x) n, c, h, w = x[0].shape y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph) y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw) y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype) coeff = 1. / (self.kh * self.kw) kern = cuda.elementwise( 'raw T in, int32 h, int32 w,' 'int32 out_h, int32 out_w, int32 kh, int32 kw,' 'int32 sy, int32 sx, int32 ph, int32 pw, T coeff', 'T out', ''' int c0 = i / (out_h * out_w); int out_y = i / out_w % out_h; int out_x = i % out_w; int in_y_0 = max(0, out_y * sy - ph); int in_y_1 = min(h, out_y * sy + kh - ph); int in_x_0 = max(0, out_x * sx - pw); int in_x_1 = min(w, out_x * sx + kw - pw); T val = 0; for (int y = in_y_0; y < in_y_1; ++y) { int offset_y = w * (y + h * c0); for (int x = in_x_0; x < in_x_1; ++x) { val = val + in[x + offset_y]; } } out = val * coeff; ''', 'avg_pool_fwd') kern(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, coeff, y) return y,
def backward_gpu(self, x, gy): if (cuda.cudnn_enabled and self.use_cudnn and pooling_2d._check_cudnn_acceptable_type(x[0].dtype)): return super(AveragePooling2D, self).backward_gpu(x, gy) n, c, h, w = x[0].shape y_h, y_w = gy[0].shape[2:] gx = cuda.cupy.empty_like(x[0]) coeff = 1. / (self.kh * self.kw) cuda.elementwise( 'raw T gy, int32 h, int32 w,' 'int32 out_h, int32 out_w, int32 kh, int32 kw,' 'int32 sy, int32 sx, int32 ph, int32 pw, T coeff', 'T gx', ''' int c0 = i / (h * w); int y = i / w % h + ph; int x = i % w + pw; int out_y_0 = max(0, (y - kh + sy) / sy); int out_y_1 = min(out_h, (y + sy) / sy); int out_x_0 = max(0, (x - kw + sx) / sx); int out_x_1 = min(out_w, (x + sx) / sx); int hc0 = out_h * c0; T val = 0; for (int out_y = out_y_0; out_y < out_y_1; ++out_y) { for (int out_x = out_x_0; out_x < out_x_1; ++out_x) { val = val + gy[out_x + out_w * (out_y + hc0)]; } } gx = val * coeff; ''', 'avg_pool_bwd')(gy[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, coeff, gx) return gx,
def backward_gpu(self, x, gy): if (cuda.cudnn_enabled and self.use_cudnn and pooling_2d._check_cudnn_acceptable_type(x[0].dtype)): return super(MaxPooling2D, self).backward_gpu(x, gy) n, c, h, w = x[0].shape y_h, y_w = gy[0].shape[2:] gx = cuda.cupy.empty_like(x[0]) cuda.elementwise( 'raw T gy, raw S indexes, int32 h, int32 w,' 'int32 out_h, int32 out_w, int32 kh, int32 kw,' 'int32 sy, int32 sx, int32 ph, int32 pw', 'T gx', ''' int c0 = i / (h * w); int y = i / w % h + ph; int x = i % w + pw; int out_y_0 = max(0, (y - kh + sy) / sy); int out_y_1 = min(out_h, (y + sy) / sy); int out_x_0 = max(0, (x - kw + sx) / sx); int out_x_1 = min(out_w, (x + sx) / sx); T val = 0; for (int out_y = out_y_0; out_y < out_y_1; ++out_y) { int ky = y - out_y * sy; for (int out_x = out_x_0; out_x < out_x_1; ++out_x) { int kx = x - out_x * sx; int offset = out_x + out_w * (out_y + out_h * c0); if (indexes[offset] == kx + kw * ky) { val = val + gy[offset]; } } } gx = val; ''', 'max_pool_bwd')(gy[0].reduced_view(), self.indexes.reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, gx) return gx,
def forward_gpu(self, x): if (cuda.cudnn_enabled and self.use_cudnn and (_cudnn_version >= 3000 or x[0].dtype != numpy.float16)): y = cudnn.activation_forward(x[0], _mode) self.y = y else: y = cuda.cupy.maximum(x[0], 0) return y,
def forward_gpu(self, inputs): x = inputs[0] if (cuda.cudnn_enabled and self.use_cudnn and (_cudnn_version >= 3000 or x.dtype != numpy.float16)): self.y = cuda.cupy.cudnn.activation_forward(x, _mode) else: self.y = cuda.elementwise( 'T x', 'T y', 'y = 1 / (1 + exp(-x))', 'sigmoid_fwd')(x) return self.y,
def backward_gpu(self, inputs, grads): x = inputs[0] gy = grads[0] if (cuda.cudnn_enabled and self.use_cudnn and (_cudnn_version >= 3000 or x.dtype != numpy.float16)): gx = cuda.cupy.cudnn.activation_backward(x, self.y, gy, _mode) else: gx = cuda.elementwise( 'T y, T gy', 'T gx', 'gx = gy * y * (1 - y)', 'sigmoid_bwd')(self.y, gy) return gx,
def forward_gpu(self, x): if (cuda.cudnn_enabled and self.use_cudnn and (_cudnn_version >= 3000 or x[0].dtype != numpy.float16)): self.y = cudnn.activation_forward(x[0], _mode) else: self.y = cuda.cupy.empty_like(x[0]) cuda.cupy.tanh(x[0], out=self.y) return self.y,
def backward_gpu(self, x, gy): if (cuda.cudnn_enabled and self.use_cudnn and (_cudnn_version >= 3000 or x[0].dtype != numpy.float16)): gx = cudnn.activation_backward(x[0], self.y, gy[0], _mode) else: gx = cuda.elementwise( 'T y, T gy', 'T gx', 'gx = gy * (1 - y * y)', 'tanh_bwd')(self.y, gy[0]) return gx,
def __init__(self, eps=2e-5, mean=None, var=None, train=False, decay=0.9, use_cudnn=True): self.running_mean = mean self.running_var = var self.train = train self.eps = eps if cuda.cudnn_enabled and use_cudnn: if eps <= 1e-5: msg = 'cuDNN does not allow an eps value less than 1e-5.' raise RuntimeError(msg) self.use_cudnn = use_cudnn self.mean_cache = None self.decay = decay
def forward_gpu(self, x): if (cuda.cudnn_enabled and self.use_cudnn and pooling_2d._check_cudnn_acceptable_type(x[0].dtype)): return super(MaxPooling2D, self).forward_gpu(x) n, c, h, w = x[0].shape y_h = conv.get_conv_outsize( h, self.kh, self.sy, self.ph, self.cover_all) y_w = conv.get_conv_outsize( w, self.kw, self.sx, self.pw, self.cover_all) y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype) self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32) cuda.elementwise( 'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,' 'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw', 'T out, S indexes', ''' int c0 = i / (out_h * out_w); int out_y = i / out_w % out_h; int out_x = i % out_w; int in_y_0 = max(0, out_y * sy - ph); int in_y_1 = min(h, out_y * sy + kh - ph); int in_x_0 = max(0, out_x * sx - pw); int in_x_1 = min(w, out_x * sx + kw - pw); T maxval = in[in_x_0 + w * (in_y_0 + h * c0)]; int argmax_y = in_y_0; int argmax_x = in_x_0; for (int y = in_y_0; y < in_y_1; ++y) { int offset_y = w * (y + h * c0); for (int x = in_x_0; x < in_x_1; ++x) { float v = in[x + offset_y]; if (maxval < v) { maxval = v; argmax_y = y; argmax_x = x; } } } out = maxval; int argmax_ky = argmax_y + ph - out_y * sy; int argmax_kx = argmax_x + pw - out_x * sx; indexes = argmax_kx + kw * argmax_ky; ''', 'max_pool_fwd')(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, y, self.indexes) return y,