Python chainer.cuda 模块，elementwise() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用chainer.cuda.elementwise()。

项目：chainerrl 作者：chainer | 项目源码 | 文件源码

def forward_gpu(self, inputs):
        n = len(inputs)
        ptrs = cuda.cupy.asarray([x.data.ptr for x in inputs],
                                 dtype=cuda.cupy.int64)
        ws = cuda.cupy.asarray(self.weights, dtype=cuda.cupy.float32)
        y = cuda.elementwise(
            'T x0, int64 xs, raw W ws, int32 n_xs',
            'T y',
            'float** xs_ = (float**) xs;'
            'y = 0;'
            'for (size_t j = 0; j < n_xs; ++j) {'
            '  y += xs_[j][i] * ws[j];'
            '}',
            'weighted_sum_arrays'.format(n))(inputs[0],
                                             ptrs.data.ptr,
                                             ws,
                                             len(ptrs))
        return y,

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def forward(self, inputs):
        x, t = inputs
        if chainer.is_debug():
            if not ((0 <= t).all() and
                    (t < x.shape[1]).all()):
                msg = 'Each label `t` need to satisfty `0 <= t < x.shape[1]`'
                raise ValueError(msg)

        xp = cuda.get_array_module(x)
        if xp is numpy:
            # This code is equivalent to `t.choose(x.T)`, but `numpy.choose`
            # does not work when `x.shape[1] > 32`.
            return x[six.moves.range(t.size), t],
        else:
            y = cuda.elementwise(
                'S t, raw T x',
                'T y',
                'int ind[] = {i, t}; y = x[ind];',
                'getitem_fwd'
            )(t, x)
            return y,

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def forward(self, inputs):
        c_prev, x = inputs
        a, i, f, o = _extract_gates(x)

        if isinstance(x, numpy.ndarray):
            self.a = numpy.tanh(a)
            self.i = _sigmoid(i)
            self.f = _sigmoid(f)
            self.o = _sigmoid(o)

            self.c = self.a * self.i + self.f * c_prev
            h = self.o * numpy.tanh(self.c)
        else:
            self.c, h = cuda.elementwise(
                'T c_prev, T a, T i_, T f, T o', 'T c, T h',
                '''
                    COMMON_ROUTINE;
                    c = aa * ai + af * c_prev;
                    h = ao * tanh(c);
                ''',
                'lstm_fwd', preamble=_preamble)(c_prev, a, i, f, o)

        return self.c, h

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def _cu_conv_sum(y, x, n):
    # Convolutional sum
    # TODO(beam2d): Use scan computation
    rdim = x.size // (x.shape[0] * x.shape[1])
    cuda.elementwise(
        'raw T x, int32 rdim, int32 N, int32 n_', 'raw T y',
        '''
          int half_n = n_ / 2;
          int offset = i / rdim * N * rdim + i % rdim;

          float sum_part = 0;
          for (int j = 0; j < N + half_n; ++j) {
            if (j < N) {
              sum_part += x[offset + j * rdim];
            }
            if (j >= n_) {
              sum_part -= x[offset + (j - n_) * rdim];
            }
            if (j >= half_n) {
              y[offset + (j - half_n) * rdim] = sum_part;
            }
          }
        ''', 'lrn_conv_sum')(x, rdim, x.shape[1], n, y,
                             size=x.shape[0] * rdim)

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def backward_gpu(self, inputs, grad_outputs):
        cupy = cuda.cupy
        x, t = inputs
        gloss = grad_outputs[0]
        n_unit = t.size // len(t)
        coeff = gloss * self._coeff
        gx = cuda.elementwise(
            'T y, S t, raw T coeff, S n_channel, S n_unit',
            'T gx',
            '''
               const int c = (i / n_unit % n_channel);
               gx = ((t == -1) || (c != t)) ? 0 : (coeff[0] / max(y, 1e-5));
            ''',
            'softmax_crossent_bwd')(
                self.y, cupy.expand_dims(t, 1), -coeff, x.shape[1], n_unit)
        return gx, None

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def backward_gpu(self, inputs, grad_outputs):
        cupy = cuda.cupy
        x, t = inputs
        if hasattr(self, 'y'):
            y = self.y
        else:
            y = softmax_log(x, self.use_cudnn)
            cupy.exp(y, out=y)
        gloss = grad_outputs[0]
        n_unit = t.size // len(t)
        coeff = gloss * self._coeff
        gx = cuda.elementwise(
            'T y, S t, raw T coeff, S n_channel, S n_unit',
            'T gx',
            '''
               const int c = (i / n_unit % n_channel);
               gx = (t == -1) ? 0 : (coeff[0] * (y - (c == t)));
            ''',
            'softmax_crossent_bwd')(
                y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit)
        return gx, None

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def sample_gpu(self, shape):
        ps = cuda.cupy.random.uniform(size=shape, dtype=numpy.float32)
        vs = cuda.elementwise(
            'T ps, raw T threshold , raw S values, int32 b',
            'int32 vs',
            '''
            T pb = ps * b;
            int index = __float2int_rd(pb);
            // fill_uniform sometimes returns 1.0, so we need to check index
            if (index >= b) {
              index = 0;
            }
            int lr = threshold[index] < pb - index;
            vs = values[index * 2 + lr];
            ''',
            'walker_alias_sample'
        )(ps, self.threshold, self.values, len(self.threshold))
        return vs

项目：nn_mask 作者：ZitengWang | 项目源码 | 文件源码

def forward_gpu(self, inputs):
        x, gamma, beta = inputs

        mean = x.mean(axis=(0, 1), keepdims=True)
        var = x.var(axis=(0, 1), keepdims=True) + self.eps

        normalize = cuda.elementwise(
                'T x, T var, T mean, T gamma, T beta',
                'T std, T x_hat, T y',
                'std = sqrtf(var);'
                'x_hat = (x - mean) / std;'
                'y = gamma * x_hat + beta;',
                'normalize')

        self.std, self.x_hat, y = normalize(x, var, mean, gamma, beta)

        return y,

项目：ddnn 作者：kunglab | 项目源码 | 文件源码

def __call__(self, opt):
        if cuda.available:
            kernel = cuda.elementwise(
                'T low, T high', 
                'T p', 
                'p = (p < low) ? low : (p > high) ? high : p',
                'weight_clip')

        for link in opt.target.links():
            # only apply to binary layers
            if getattr(link,'cname',False):
                for param in link.params():
                    p = param.data
                    with cuda.get_device(p) as dev:
                        if int(dev) == -1:
                            numpy.clip(p, self.low, self.high)
                        else:
                            kernel(self.low, self.high, p)

项目：nn-gev 作者：fgnt | 项目源码 | 文件源码

def forward_gpu(self, inputs):
        x, gamma, beta = inputs

        mean = x.mean(axis=(0, 1), keepdims=True)
        var = x.var(axis=(0, 1), keepdims=True) + self.eps

        normalize = cuda.elementwise(
                'T x, T var, T mean, T gamma, T beta',
                'T std, T x_hat, T y',
                'std = sqrtf(var);'
                'x_hat = (x - mean) / std;'
                'y = gamma * x_hat + beta;',
                'normalize')

        self.std, self.x_hat, y = normalize(x, var, mean, gamma, beta)

        return y,

项目：nmtrain 作者：philip30 | 项目源码 | 文件源码

def backward_gpu(self, inputs, grad_outputs):
    cupy = cuda.cupy
    x, t = inputs
    if hasattr(self, 'y'):
      y = self.y
    else:
      y = x
    gloss = grad_outputs[0]
    n_unit = t.size // len(t)
    coeff = gloss * self._coeff
    gx = cuda.elementwise(
      'T y, S t, raw T coeff, S n_channel, S n_unit',
      'T gx',
      '''
         const int c = (i / n_unit % n_channel);
         gx = (t == -1 || c != t) ? 0 : (coeff[0] * -1.0 / y);
      ''',
      'crossent_bwd')(
          y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit)
    return gx, None

项目：chainer-cf-nade 作者：dsanno | 项目源码 | 文件源码

def backward_gpu(self, inputs, grad_outputs):
        cupy = cuda.cupy
        x, t, w = inputs
        if hasattr(self, 'y'):
            y = self.y
        else:
            y = softmax_log(x, self.use_cudnn)
            cupy.exp(y, out=y)
        gloss = grad_outputs[0]
        n_unit = t.size // len(t)
        coeff = gloss * self._coeff * w
        gx = cuda.elementwise(
            'T y, S t, raw T coeff, S n_channel, S n_unit',
            'T gx',
            '''
               const int c = (i / n_unit % n_channel);
               gx = (t == -1) ? 0 : (coeff[0] * (y - (c == t)));
            ''',
            'softmax_crossent_bwd')(
                y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit)
        return gx, None, None

项目：binary_net 作者：hillbig | 项目源码 | 文件源码

def __call__(self, opt):
        if cuda.available:
            kernel = cuda.elementwise(
                'T low, T high', 
                'T p', 
                'p = (p < low) ? low : (p > high) ? high : p',
                'weight_clip')

        for param in opt.target.params():
            p = param.data
            with cuda.get_device(p) as dev:
                if int(dev) == -1:
                    numpy.clip(p, self.low, self.high)
                else:
                    kernel(self.low, self.high, p)

项目：binary_net 作者：hillbig | 项目源码 | 文件源码

def forward_gpu(self, x):
        y = cuda.elementwise(
            'T x', 'T y',
            'y = x >= 0 ? 1 : -1', 'bst_fwd')(
                x[0])
        return y,

项目：binary_net 作者：hillbig | 项目源码 | 文件源码

def backward_gpu(self, x, gy):
        gx = cuda.elementwise(
            'T x, T gy', 'T gx',
            'gx = abs(x) > 1 ? 0 : gy', 'bst_bwd')(
                x[0], gy[0])
        return gx,

项目：binary_net 作者：hillbig | 项目源码 | 文件源码

def _kern():
    return cuda.elementwise(
        'T x', 'T y',
        'y = x >= 0 ? 1 : -1',
        'binarize')

项目：chainerrl 作者：chainer | 项目源码 | 文件源码

def update_core_gpu(self, param):
        grad = param.grad
        if grad is None:
            return
        cuda.elementwise(
            'T grad, T lr, T alpha, T eps',
            'T param, T ms',
            '''ms = alpha * ms + (1 - alpha) * grad * grad;
               param -= lr * grad / sqrt(ms + eps);''',
            'rmsprop')(grad, self.hyperparam.lr, self.hyperparam.alpha,
                       self.hyperparam.eps, param.data, self.state['ms'])

项目：chainerrl 作者：chainer | 项目源码 | 文件源码

def __call__(self, opt):
        if cuda.available:
            kernel = cuda.elementwise(
                'T p, T decay', 'T g', 'g += decay * p', 'weight_decay')

        rate = self.rate
        for name, param in opt.target.namedparams():
            if name == 'b' or name.endswith('/b'):
                continue
            p, g = param.data, param.grad
            with cuda.get_device(p) as dev:
                if int(dev) == -1:
                    g += rate * p
                else:
                    kernel(p, rate, g)

项目：chainerrl 作者：chainer | 项目源码 | 文件源码

def forward_gpu(self, inputs):
        n = len(inputs)
        ptrs = cuda.cupy.asarray([x.data.ptr for x in inputs],
                                 dtype=cuda.cupy.int64)
        y = cuda.elementwise(
            'T x0, int64 xs, int32 n_xs',
            'T y',
            'float** xs_ = (float**) xs;'
            'y = 0;'
            'for (size_t j = 0; j < n_xs; ++j) {'
            '  y += xs_[j][i];'
            '}',
            'sum_arrays'.format(n))(inputs[0], ptrs.data.ptr, len(ptrs))
        return y,

项目：instance_normalization_chainer 作者：crcrpar | 项目源码 | 文件源码

def backward(self, inputs, grad_outputs):
        x, gamma, beta = inputs[:3]
        gy = grad_outputs[0]
        head_ndim = gamma.ndim + 1
        expander = (None, Ellipsis) + (None,) * (x.ndim - head_ndim)
        m = gamma.dtype.type(x.size // gamma.size)
        axis = (2, 3)
        gamma_beta_axis = (0, 2, 3)
        mean_var_expander = (Ellipsis, None, None)
        xp = cuda.get_array_module(x)

        gbeta = gy.sum(axis=gamma_beta_axis)
        ggamma = (gy * self.x_hat).sum(axis=gamma_beta_axis)
        if xp is numpy:
            gx = (gamma / self.std)[mean_var_expander] * (
                gy - (self.x_hat * ggamma[mean_var_expander] + gbeta[mean_var_expander]) / m)
        else:
            inv_m = numpy.float32(1) / m
            gx = cuda.elementwise(
                'T gy, T x_hat, T gamma, T std, T ggamma, T gbeta, \
                T inv_m',
                'T gx',
                'gx = (gamma / std) * (gy - (x_hat * ggamma + gbeta) * \
                inv_m)',
                'bn_bwd')(gy, self.x_hat, gamma[expander],
                          self.std[mean_var_expander], ggamma[mean_var_expander],
                          gbeta[mean_var_expander], inv_m)
        return gx, ggamma, gbeta

项目：chainer-segnet 作者：pfnet-research | 项目源码 | 文件源码

def backward_gpu(self, inputs, grad_outputs):
        cupy = cuda.cupy
        x, t = inputs
        if hasattr(self, 'y'):
            y = self.y
        else:
            y = log_softmax._log_softmax(x, self.use_cudnn)
            cupy.exp(y, out=y)
        gloss = grad_outputs[0]
        n_unit = t.size // len(t)
        coeff = gloss * self._coeff
        if self.class_weight is None:
            gx = cuda.elementwise(
                'T y, S t, raw T coeff, S n_channel, S n_unit',
                'T gx',
                '''
                    const int c = (i / n_unit % n_channel);
                    gx = (t == -1) ? 0 : (coeff[0] * (y - (c == t)));
                ''',
                'softmax_crossent_bwd')(
                    y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit)
        else:
            gx = cuda.elementwise(
                'T y, raw T w, S t, raw T coeff, S n_channel, S n_unit',
                'T gx',
                '''
                    const int c = (i / n_unit % n_channel);
                    gx = t == -1 ? 0 : coeff[0] * (y - (c == t)) * w[t];
                ''',
                'softmax_crossent_bwd')(
                    y, self.class_weight, cupy.expand_dims(t, 1), coeff,
                    x.shape[1], n_unit)
        return gx, None

项目：async-rl 作者：muupan | 项目源码 | 文件源码

def update_one_gpu(self, param, state):
        cuda.elementwise(
            'T grad, T lr, T alpha, T eps',
            'T param, T ms',
            '''ms = alpha * ms + (1 - alpha) * grad * grad;
               param -= lr * grad / sqrt(ms + eps);''',
            'rmsprop')(param.grad, self.lr, self.alpha, self.eps,
                       param.data, state['ms'])

项目：async-rl 作者：muupan | 项目源码 | 文件源码

def __call__(self, opt):
        if cuda.available:
            kernel = cuda.elementwise(
                'T p, T decay', 'T g', 'g += decay * p', 'weight_decay')

        rate = self.rate
        for name, param in opt.target.namedparams():
            if name == 'b' or name.endswith('/b'):
                continue
            p, g = param.data, param.grad
            with cuda.get_device(p) as dev:
                if int(dev) == -1:
                    g += rate * p
                else:
                    kernel(p, rate, g)

项目：chainer-qrnn 作者：musyoku | 项目源码 | 文件源码

def backward_gpu(self, inputs, grad_outputs):
        cupy = cuda.cupy
        x, t = inputs
        if hasattr(self, 'y'):
            y = self.y
        else:
            y = log_softmax._log_softmax(x)
            cupy.exp(y, out=y)
        gloss = grad_outputs[0]
        n_unit = t.size // len(t)
        if self.reduce == 'mean':
            coeff = gloss * self._coeff
        else:
            coeff = gloss[:, None, ...]

        if self.class_weight is None:
            gx = cuda.elementwise(
                'T y, S t, T coeff, S n_channel, S n_unit, S ignore_label',
                'T gx',
                '''
                    const int c = (i / n_unit % n_channel);
                    gx = t == ignore_label ? 0 : coeff * (y - (c == t));
                ''',
                'softmax_crossent_bwd')(
                    y, cupy.expand_dims(t, 1), coeff, x.shape[1],
                    n_unit, self.ignore_label)
        else:
            gx = cuda.elementwise(
                'T y, raw T w, S t, T coeff, S n_channel, S n_unit, '
                'S ignore_label',
                'T gx',
                '''
                    const int c = (i / n_unit % n_channel);
                    gx = t == ignore_label ? 0 : coeff * (y - (c == t)) * w[t];
                ''',
                'softmax_crossent_weight_bwd')(
                    y, self.class_weight, cupy.expand_dims(t, 1), coeff,
                    x.shape[1], n_unit, self.ignore_label)

        return gx, None

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def backward_gpu(self, inputs, grad_outputs):
        x, t, W = inputs
        gloss, = grad_outputs

        n_in = x.shape[1]
        gx = cuda.cupy.zeros_like(x)
        gW = cuda.cupy.zeros_like(W)
        cuda.elementwise(
            '''T wxy, raw T x, raw T w, raw int32 ts, raw int32 paths,
            raw T codes, raw int32 begins, raw T gloss,
            int32 c, int32 max_length''',
            'raw T gx, raw T gw',
            '''
            int ind = i / max_length;
            int offset = i - ind * max_length;
            int t = ts[ind];

            int begin = begins[t];
            int length = begins[t + 1] - begins[t];

            if (offset < length) {
              int p = begin + offset;
              int node = paths[p];
              T code = codes[p];

              T g = -gloss[0] * code / (1.0 + exp(wxy));
              for (int j = 0; j < c; ++j) {
                int w_ind[] = {node, j};
                int x_ind[] = {ind, j};
                atomicAdd(&gx[x_ind], g * w[w_ind]);
                atomicAdd(&gw[w_ind], g * x[x_ind]);
              }
            }
            ''',
            'binary_hierarchical_softmax_bwd'
        )(self.wxy, x, W, t, self.paths, self.codes, self.begins, gloss, n_in,
          self.max_length, gx, gW)
        return gx, None, gW

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def update_one_gpu(self, param, state):
        cuda.elementwise(
            'T grad, T lr, T alpha, T eps',
            'T param, T ms',
            '''ms = alpha * ms + (1 - alpha) * grad * grad;
               param -= lr * grad / (sqrt(ms) + eps);''',
            'rmsprop')(param.grad, self.lr, self.alpha, self.eps,
                       param.data, state['ms'])

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def update_one_gpu(self, param, state):
        cuda.elementwise(
            'T grad, T lr, T alpha, T momentum, T eps',
            'T param, T avg_n, T avg_g, T delta',
            '''avg_n = alpha * avg_n + (1 - alpha) * grad * grad;
               avg_g = alpha * avg_g + (1 - alpha) * grad;
               delta = delta * momentum -
                   lr * grad * rsqrt(avg_n - avg_g * avg_g + eps);
               param += delta;''',
            'rmsprop_graves')(
                param.grad, self.lr, self.alpha, self.momentum, self.eps,
                param.data, state['n'], state['g'], state['delta'])

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def update_one_gpu(self, param, state):
        cuda.elementwise(
            'T grad, T lr, T momentum',
            'T param, T v',
            '''v = v * momentum - lr * grad;
               param += momentum * momentum * v - (1 + momentum) * lr * grad;
               ''',
            'nesterov_ag')(param.grad, self.lr, self.momentum,
                           param.data, state['v'])

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def update_one_gpu(self, param, state):
        cuda.elementwise(
            'T grad, T lr, T momentum',
            'T param, T v',
            '''v = momentum * v - lr * grad;
               param += v;''',
            'momentum_sgd')(param.grad, self.lr, self.momentum,
                            param.data, state['v'])

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def update_one_gpu(self, param, state):
        cuda.elementwise(
            'T grad, T lr, T eps',
            'T param, T h',
            '''h += grad * grad;
               param -= lr * grad / (sqrt(h) + eps);''',
            'adagrad')(param.grad, self.lr, self.eps,
                       param.data, state['h'])

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def update_one_gpu(self, param, state):
        cuda.elementwise(
            'T grad, T one_minus_rho, T eps',
            'T param, T msg, T msdx',
            '''msg   = msg + one_minus_rho * (grad * grad - msg);
               T dx  = sqrt((msdx + eps) / (msg + eps)) * grad;
               msdx  += one_minus_rho * (dx * dx - msdx);
               param -= dx;''',
            'adadelta')(param.grad, 1 - self.rho, self.eps,
                        param.data, state['msg'], state['msdx'])

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def update_one_gpu(self, param, state):
        cuda.elementwise('T grad, T lr', 'T param',
                         'param -= lr * grad',
                         'sgd')(param.grad, self.lr, param.data)

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def __call__(self, opt):
        if cuda.available:
            kernel = cuda.elementwise(
                'T p, T decay', 'T g', 'g += decay * p', 'weight_decay')

        rate = self.rate
        for param in opt.target.params():
            p, g = param.data, param.grad
            with cuda.get_device(p) as dev:
                if int(dev) == -1:
                    g += rate * p
                else:
                    kernel(p, rate, g)

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def forward_gpu(self, x):
        if (cuda.cudnn_enabled and self.use_cudnn and
                pooling_2d._check_cudnn_acceptable_type(x[0].dtype)):
            return super(AveragePooling2D, self).forward_gpu(x)

        n, c, h, w = x[0].shape
        y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph)
        y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw)
        y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype)
        coeff = 1. / (self.kh * self.kw)
        kern = cuda.elementwise(
            'raw T in, int32 h, int32 w,'
            'int32 out_h, int32 out_w, int32 kh, int32 kw,'
            'int32 sy, int32 sx, int32 ph, int32 pw, T coeff',
            'T out', '''
            int c0    = i / (out_h * out_w);
            int out_y = i / out_w % out_h;
            int out_x = i % out_w;
            int in_y_0 = max(0, out_y * sy - ph);
            int in_y_1 = min(h, out_y * sy + kh - ph);
            int in_x_0 = max(0, out_x * sx - pw);
            int in_x_1 = min(w, out_x * sx + kw - pw);

            T val = 0;
            for (int y = in_y_0; y < in_y_1; ++y) {
              int offset_y = w * (y + h * c0);
              for (int x = in_x_0; x < in_x_1; ++x) {
                val = val + in[x + offset_y];
              }
            }
            out = val * coeff;
            ''', 'avg_pool_fwd')
        kern(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw,
             self.sy, self.sx, self.ph, self.pw, coeff, y)
        return y,

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def backward_gpu(self, x, gy):
        if (cuda.cudnn_enabled and self.use_cudnn and
                pooling_2d._check_cudnn_acceptable_type(x[0].dtype)):
            return super(AveragePooling2D, self).backward_gpu(x, gy)

        n, c, h, w = x[0].shape
        y_h, y_w = gy[0].shape[2:]
        gx = cuda.cupy.empty_like(x[0])
        coeff = 1. / (self.kh * self.kw)
        cuda.elementwise(
            'raw T gy, int32 h, int32 w,'
            'int32 out_h, int32 out_w, int32 kh, int32 kw,'
            'int32 sy, int32 sx, int32 ph, int32 pw, T coeff',
            'T gx',
            '''
               int c0 = i / (h * w);
               int y  = i / w % h + ph;
               int x  = i % w + pw;
               int out_y_0 = max(0,     (y - kh + sy) / sy);
               int out_y_1 = min(out_h, (y      + sy) / sy);
               int out_x_0 = max(0,     (x - kw + sx) / sx);
               int out_x_1 = min(out_w, (x      + sx) / sx);
               int hc0  = out_h * c0;

               T val = 0;
               for (int out_y = out_y_0; out_y < out_y_1; ++out_y) {
                 for (int out_x = out_x_0; out_x < out_x_1; ++out_x) {
                   val = val + gy[out_x + out_w * (out_y + hc0)];
                 }
               }
               gx = val * coeff;
            ''', 'avg_pool_bwd')(gy[0].reduced_view(),
                                 h, w, y_h, y_w, self.kh, self.kw,
                                 self.sy, self.sx, self.ph, self.pw, coeff,
                                 gx)
        return gx,

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def forward_gpu(self, x):
    shape = self.shape.tolist()
    y = cuda.cupy.zeros((shape[0], shape[1], shape[2], shape[3]), dtype=x[0].dtype)

    cuda.elementwise(
            'T in, S indices',
            'raw T out',
            '''
           out[indices] = in;

            ''', 'unpool')(x[0], self.indices, y.reduced_view())

    return y,

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def backward_gpu(self, x, gy):
        gx = cuda.cupy.empty_like(x[0])
    cuda.elementwise(
            'raw T in, S indices',
            'T out',
            '''
           out = in[indices];

            ''', 'unpool')(gy[0].reduced_view(), self.indices, gx)
        return gx,

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def backward_gpu(self, x, gy):
        if (cuda.cudnn_enabled and self.use_cudnn and
                pooling_2d._check_cudnn_acceptable_type(x[0].dtype)):
            return super(MaxPooling2D, self).backward_gpu(x, gy)

        n, c, h, w = x[0].shape
        y_h, y_w = gy[0].shape[2:]
        gx = cuda.cupy.empty_like(x[0])

        cuda.elementwise(
            'raw T gy, raw S indexes, int32 h, int32 w,'
            'int32 out_h, int32 out_w, int32 kh, int32 kw,'
            'int32 sy, int32 sx, int32 ph, int32 pw',
            'T gx',
            '''
               int c0 = i / (h * w);
               int y  = i / w % h + ph;
               int x  = i % w + pw;
               int out_y_0 = max(0,     (y - kh + sy) / sy);
               int out_y_1 = min(out_h, (y      + sy) / sy);
               int out_x_0 = max(0,     (x - kw + sx) / sx);
               int out_x_1 = min(out_w, (x      + sx) / sx);

               T val = 0;
               for (int out_y = out_y_0; out_y < out_y_1; ++out_y) {
                 int ky = y - out_y * sy;
                 for (int out_x = out_x_0; out_x < out_x_1; ++out_x) {
                   int kx = x - out_x * sx;
                   int offset = out_x + out_w * (out_y + out_h * c0);
                   if (indexes[offset] == kx + kw * ky) {
                     val = val + gy[offset];
                   }
                 }
               }
               gx = val;
            ''',
            'max_pool_bwd')(gy[0].reduced_view(), self.indexes.reduced_view(),
                            h, w, y_h, y_w, self.kh, self.kw,
                            self.sy, self.sx, self.ph, self.pw,
                            gx)
        return gx,

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def backward_gpu(self, inputs, grad_outputs):
        x, t = inputs
        gloss = grad_outputs[0]
        gx = cuda.cupy.zeros_like(x)
        gx = cuda.elementwise(
            'S t, T gloss',
            'raw T gx',
            'int ind[] = {i, t}; gx[ind] = gloss;',
            'getitem_bwd'
        )(t, gloss, gx)
        return gx, None

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def forward_gpu(self, inputs):
        cupy = cuda.cupy
        mean, ln_var = inputs
        if self.eps is None:
            self.eps = cupy.random.standard_normal(
                ln_var.shape, dtype=mean.dtype)

        self.noise = cuda.cupy.empty_like(mean)
        self.noise = cuda.elementwise(
            'T v, T e', 'T noise',
            'noise = exp(v / 2) * e',
            'gaussian_forward'
        )(ln_var, self.eps)
        return mean + self.noise,

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def forward_gpu(self, inputs):
        x = inputs[0]
        return cuda.elementwise(
            'T x', 'T y',
            'y = min(1.0, max(0.0, x * 0.2 + 0.5))',
            'hard_sigmoid_fwd'
        )(x),

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def backward_gpu(self, inputs, grads):
        x = inputs[0]
        g = grads[0]
        return cuda.elementwise(
            'T x, T g', 'T gx',
            'gx = fabs(x) < 2.5 ? 0.2 * g : 0',
            'hard_sigmoid_bwd'
        )(x, g),

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def forward_gpu(self, inputs):
        x = inputs[0]
        if (cuda.cudnn_enabled and self.use_cudnn and
                (_cudnn_version >= 3000 or x.dtype != numpy.float16)):
            self.y = cuda.cupy.cudnn.activation_forward(x, _mode)
        else:
            self.y = cuda.elementwise(
                'T x', 'T y', 'y = 1 / (1 + exp(-x))',
                'sigmoid_fwd')(x)
        return self.y,

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def backward_gpu(self, inputs, grads):
        x = inputs[0]
        gy = grads[0]
        if (cuda.cudnn_enabled and self.use_cudnn and
                (_cudnn_version >= 3000 or x.dtype != numpy.float16)):
            gx = cuda.cupy.cudnn.activation_backward(x, self.y, gy, _mode)
        else:
            gx = cuda.elementwise(
                'T y, T gy', 'T gx',
                'gx = gy * y * (1 - y)',
                'sigmoid_bwd')(self.y, gy)
        return gx,

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def forward(self, inputs):
        c_prev1, c_prev2, x1, x2 = inputs
        a1, i1, f1, o1 = _extract_gates(x1)
        a2, i2, f2, o2 = _extract_gates(x2)

        if isinstance(x1, numpy.ndarray):
            self.a1 = numpy.tanh(a1)
            self.i1 = _sigmoid(i1)
            self.f1 = _sigmoid(f1)

            self.a2 = numpy.tanh(a2)
            self.i2 = _sigmoid(i2)
            self.f2 = _sigmoid(f2)

            self.o = _sigmoid(o1 + o2)
            self.c = self.a1 * self.i1 + self.a2 * self.i2 + \
                self.f1 * c_prev1 + self.f2 * c_prev2

            h = self.o * numpy.tanh(self.c)
        else:
            self.c, h = cuda.elementwise(
                '''T c_prev1, T a1, T i1, T f1, T o1,
                   T c_prev2, T a2, T i2, T f2, T o2''',
                'T c, T h',
                '''
                    COMMON_ROUTINE;
                    c = aa1 * ai1 + af1 * c_prev1 + aa2 * ai2 + af2 * c_prev2;
                    h = ao * tanh(c);
                ''',
                'slstm_fwd', preamble=_preamble)(
                    c_prev1, a1, i1, f1, o1, c_prev2, a2, i2, f2, o2)

        return self.c, h

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def forward_gpu(self, x):
        return cuda.elementwise(
            'T x, T cap', 'T y', 'y = min(max(x, (T)0), cap)',
            'clipped_relu_fwd')(x[0], self.cap),

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def backward_gpu(self, x, gy):
        gx = cuda.elementwise(
            'T x, T gy, T z', 'T gx',
            'gx = ((x > 0) & (x < z))? gy : (T)0',
            'clipped_relu_bwd')(x[0], gy[0], self.cap)
        return gx,

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def _kern():
    return cuda.elementwise(
        'T cond, T x, T slope', 'T y',
        'y = cond >= 0 ? x : (T)(slope * x)', 'lrelu')

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs
        gy = grad_outputs[0]
        masked = cuda.elementwise(
            'T x, T gy', 'T masked',
            'masked = x >= 0 ? (T)0 : (T)(x * gy)',
            'prelu_masked')(x, gy)
        axes = (0,) + tuple(six.moves.range(1 + W.ndim, gy.ndim))
        gW = masked.sum(axis=axes)

        gx = masked  # reuse buffer
        shape = _get_extended_shape(W, gx)
        _fwd_kern()(gy, x, W.reshape(shape), gx)
        return gx, gW

项目：chainer-deconv 作者：germanRos | 项目源码 | 文件源码

def prelu(x, W):
    """Parametric ReLU function.

    It accepts two arguments: an input ``x`` and a weight array ``W``
    and computes the output as :math:`PReLU(x) = \\max(x, W*x)`,
    where :math:`*` is an elementwise multiplication for each sample in the
    batch.

    When the PReLU function is combined with two-dimensional convolution, the
    elements of parameter :math:`a` are typically shared across the same filter
    of different pixels. In order to support such usage, this function supports
    the shape of parameter array that indicates leading dimensions of input
    arrays except the batch dimension.

    For example :math:`W` has the shape of :math:`(2, 3, 4)`,
    :math:`x` must have the shape of :math:`(B, 2, 3, 4, S1, ..., SN)`
    where B is batchsize and the number of trailing S's
    is arbitrary non-negative integer.

    Args:
        x (~chainer.Variable): Input variable.
            Its first argument is assumed to be the minibatch dimension.
        W (~chainer.Variable): Weight variable.

    Returns:
        ~chainer.Variable: Output variable

    .. seealso:: :class:`~chainer.links.PReLU`

    """
    return PReLUFunction()(x, W)