我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用chainer.cuda.elementwise()。
def forward_gpu(self, inputs): n = len(inputs) ptrs = cuda.cupy.asarray([x.data.ptr for x in inputs], dtype=cuda.cupy.int64) ws = cuda.cupy.asarray(self.weights, dtype=cuda.cupy.float32) y = cuda.elementwise( 'T x0, int64 xs, raw W ws, int32 n_xs', 'T y', 'float** xs_ = (float**) xs;' 'y = 0;' 'for (size_t j = 0; j < n_xs; ++j) {' ' y += xs_[j][i] * ws[j];' '}', 'weighted_sum_arrays'.format(n))(inputs[0], ptrs.data.ptr, ws, len(ptrs)) return y,
def forward(self, inputs): x, t = inputs if chainer.is_debug(): if not ((0 <= t).all() and (t < x.shape[1]).all()): msg = 'Each label `t` need to satisfty `0 <= t < x.shape[1]`' raise ValueError(msg) xp = cuda.get_array_module(x) if xp is numpy: # This code is equivalent to `t.choose(x.T)`, but `numpy.choose` # does not work when `x.shape[1] > 32`. return x[six.moves.range(t.size), t], else: y = cuda.elementwise( 'S t, raw T x', 'T y', 'int ind[] = {i, t}; y = x[ind];', 'getitem_fwd' )(t, x) return y,
def forward(self, inputs): c_prev, x = inputs a, i, f, o = _extract_gates(x) if isinstance(x, numpy.ndarray): self.a = numpy.tanh(a) self.i = _sigmoid(i) self.f = _sigmoid(f) self.o = _sigmoid(o) self.c = self.a * self.i + self.f * c_prev h = self.o * numpy.tanh(self.c) else: self.c, h = cuda.elementwise( 'T c_prev, T a, T i_, T f, T o', 'T c, T h', ''' COMMON_ROUTINE; c = aa * ai + af * c_prev; h = ao * tanh(c); ''', 'lstm_fwd', preamble=_preamble)(c_prev, a, i, f, o) return self.c, h
def _cu_conv_sum(y, x, n): # Convolutional sum # TODO(beam2d): Use scan computation rdim = x.size // (x.shape[0] * x.shape[1]) cuda.elementwise( 'raw T x, int32 rdim, int32 N, int32 n_', 'raw T y', ''' int half_n = n_ / 2; int offset = i / rdim * N * rdim + i % rdim; float sum_part = 0; for (int j = 0; j < N + half_n; ++j) { if (j < N) { sum_part += x[offset + j * rdim]; } if (j >= n_) { sum_part -= x[offset + (j - n_) * rdim]; } if (j >= half_n) { y[offset + (j - half_n) * rdim] = sum_part; } } ''', 'lrn_conv_sum')(x, rdim, x.shape[1], n, y, size=x.shape[0] * rdim)
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t = inputs gloss = grad_outputs[0] n_unit = t.size // len(t) coeff = gloss * self._coeff gx = cuda.elementwise( 'T y, S t, raw T coeff, S n_channel, S n_unit', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = ((t == -1) || (c != t)) ? 0 : (coeff[0] / max(y, 1e-5)); ''', 'softmax_crossent_bwd')( self.y, cupy.expand_dims(t, 1), -coeff, x.shape[1], n_unit) return gx, None
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t = inputs if hasattr(self, 'y'): y = self.y else: y = softmax_log(x, self.use_cudnn) cupy.exp(y, out=y) gloss = grad_outputs[0] n_unit = t.size // len(t) coeff = gloss * self._coeff gx = cuda.elementwise( 'T y, S t, raw T coeff, S n_channel, S n_unit', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = (t == -1) ? 0 : (coeff[0] * (y - (c == t))); ''', 'softmax_crossent_bwd')( y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit) return gx, None
def sample_gpu(self, shape): ps = cuda.cupy.random.uniform(size=shape, dtype=numpy.float32) vs = cuda.elementwise( 'T ps, raw T threshold , raw S values, int32 b', 'int32 vs', ''' T pb = ps * b; int index = __float2int_rd(pb); // fill_uniform sometimes returns 1.0, so we need to check index if (index >= b) { index = 0; } int lr = threshold[index] < pb - index; vs = values[index * 2 + lr]; ''', 'walker_alias_sample' )(ps, self.threshold, self.values, len(self.threshold)) return vs
def forward_gpu(self, inputs): x, gamma, beta = inputs mean = x.mean(axis=(0, 1), keepdims=True) var = x.var(axis=(0, 1), keepdims=True) + self.eps normalize = cuda.elementwise( 'T x, T var, T mean, T gamma, T beta', 'T std, T x_hat, T y', 'std = sqrtf(var);' 'x_hat = (x - mean) / std;' 'y = gamma * x_hat + beta;', 'normalize') self.std, self.x_hat, y = normalize(x, var, mean, gamma, beta) return y,
def __call__(self, opt): if cuda.available: kernel = cuda.elementwise( 'T low, T high', 'T p', 'p = (p < low) ? low : (p > high) ? high : p', 'weight_clip') for link in opt.target.links(): # only apply to binary layers if getattr(link,'cname',False): for param in link.params(): p = param.data with cuda.get_device(p) as dev: if int(dev) == -1: numpy.clip(p, self.low, self.high) else: kernel(self.low, self.high, p)
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t = inputs if hasattr(self, 'y'): y = self.y else: y = x gloss = grad_outputs[0] n_unit = t.size // len(t) coeff = gloss * self._coeff gx = cuda.elementwise( 'T y, S t, raw T coeff, S n_channel, S n_unit', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = (t == -1 || c != t) ? 0 : (coeff[0] * -1.0 / y); ''', 'crossent_bwd')( y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit) return gx, None
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t, w = inputs if hasattr(self, 'y'): y = self.y else: y = softmax_log(x, self.use_cudnn) cupy.exp(y, out=y) gloss = grad_outputs[0] n_unit = t.size // len(t) coeff = gloss * self._coeff * w gx = cuda.elementwise( 'T y, S t, raw T coeff, S n_channel, S n_unit', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = (t == -1) ? 0 : (coeff[0] * (y - (c == t))); ''', 'softmax_crossent_bwd')( y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit) return gx, None, None
def __call__(self, opt): if cuda.available: kernel = cuda.elementwise( 'T low, T high', 'T p', 'p = (p < low) ? low : (p > high) ? high : p', 'weight_clip') for param in opt.target.params(): p = param.data with cuda.get_device(p) as dev: if int(dev) == -1: numpy.clip(p, self.low, self.high) else: kernel(self.low, self.high, p)
def forward_gpu(self, x): y = cuda.elementwise( 'T x', 'T y', 'y = x >= 0 ? 1 : -1', 'bst_fwd')( x[0]) return y,
def backward_gpu(self, x, gy): gx = cuda.elementwise( 'T x, T gy', 'T gx', 'gx = abs(x) > 1 ? 0 : gy', 'bst_bwd')( x[0], gy[0]) return gx,
def _kern(): return cuda.elementwise( 'T x', 'T y', 'y = x >= 0 ? 1 : -1', 'binarize')
def update_core_gpu(self, param): grad = param.grad if grad is None: return cuda.elementwise( 'T grad, T lr, T alpha, T eps', 'T param, T ms', '''ms = alpha * ms + (1 - alpha) * grad * grad; param -= lr * grad / sqrt(ms + eps);''', 'rmsprop')(grad, self.hyperparam.lr, self.hyperparam.alpha, self.hyperparam.eps, param.data, self.state['ms'])
def __call__(self, opt): if cuda.available: kernel = cuda.elementwise( 'T p, T decay', 'T g', 'g += decay * p', 'weight_decay') rate = self.rate for name, param in opt.target.namedparams(): if name == 'b' or name.endswith('/b'): continue p, g = param.data, param.grad with cuda.get_device(p) as dev: if int(dev) == -1: g += rate * p else: kernel(p, rate, g)
def forward_gpu(self, inputs): n = len(inputs) ptrs = cuda.cupy.asarray([x.data.ptr for x in inputs], dtype=cuda.cupy.int64) y = cuda.elementwise( 'T x0, int64 xs, int32 n_xs', 'T y', 'float** xs_ = (float**) xs;' 'y = 0;' 'for (size_t j = 0; j < n_xs; ++j) {' ' y += xs_[j][i];' '}', 'sum_arrays'.format(n))(inputs[0], ptrs.data.ptr, len(ptrs)) return y,
def backward(self, inputs, grad_outputs): x, gamma, beta = inputs[:3] gy = grad_outputs[0] head_ndim = gamma.ndim + 1 expander = (None, Ellipsis) + (None,) * (x.ndim - head_ndim) m = gamma.dtype.type(x.size // gamma.size) axis = (2, 3) gamma_beta_axis = (0, 2, 3) mean_var_expander = (Ellipsis, None, None) xp = cuda.get_array_module(x) gbeta = gy.sum(axis=gamma_beta_axis) ggamma = (gy * self.x_hat).sum(axis=gamma_beta_axis) if xp is numpy: gx = (gamma / self.std)[mean_var_expander] * ( gy - (self.x_hat * ggamma[mean_var_expander] + gbeta[mean_var_expander]) / m) else: inv_m = numpy.float32(1) / m gx = cuda.elementwise( 'T gy, T x_hat, T gamma, T std, T ggamma, T gbeta, \ T inv_m', 'T gx', 'gx = (gamma / std) * (gy - (x_hat * ggamma + gbeta) * \ inv_m)', 'bn_bwd')(gy, self.x_hat, gamma[expander], self.std[mean_var_expander], ggamma[mean_var_expander], gbeta[mean_var_expander], inv_m) return gx, ggamma, gbeta
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t = inputs if hasattr(self, 'y'): y = self.y else: y = log_softmax._log_softmax(x, self.use_cudnn) cupy.exp(y, out=y) gloss = grad_outputs[0] n_unit = t.size // len(t) coeff = gloss * self._coeff if self.class_weight is None: gx = cuda.elementwise( 'T y, S t, raw T coeff, S n_channel, S n_unit', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = (t == -1) ? 0 : (coeff[0] * (y - (c == t))); ''', 'softmax_crossent_bwd')( y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit) else: gx = cuda.elementwise( 'T y, raw T w, S t, raw T coeff, S n_channel, S n_unit', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = t == -1 ? 0 : coeff[0] * (y - (c == t)) * w[t]; ''', 'softmax_crossent_bwd')( y, self.class_weight, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit) return gx, None
def update_one_gpu(self, param, state): cuda.elementwise( 'T grad, T lr, T alpha, T eps', 'T param, T ms', '''ms = alpha * ms + (1 - alpha) * grad * grad; param -= lr * grad / sqrt(ms + eps);''', 'rmsprop')(param.grad, self.lr, self.alpha, self.eps, param.data, state['ms'])
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t = inputs if hasattr(self, 'y'): y = self.y else: y = log_softmax._log_softmax(x) cupy.exp(y, out=y) gloss = grad_outputs[0] n_unit = t.size // len(t) if self.reduce == 'mean': coeff = gloss * self._coeff else: coeff = gloss[:, None, ...] if self.class_weight is None: gx = cuda.elementwise( 'T y, S t, T coeff, S n_channel, S n_unit, S ignore_label', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = t == ignore_label ? 0 : coeff * (y - (c == t)); ''', 'softmax_crossent_bwd')( y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit, self.ignore_label) else: gx = cuda.elementwise( 'T y, raw T w, S t, T coeff, S n_channel, S n_unit, ' 'S ignore_label', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = t == ignore_label ? 0 : coeff * (y - (c == t)) * w[t]; ''', 'softmax_crossent_weight_bwd')( y, self.class_weight, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit, self.ignore_label) return gx, None
def backward_gpu(self, inputs, grad_outputs): x, t, W = inputs gloss, = grad_outputs n_in = x.shape[1] gx = cuda.cupy.zeros_like(x) gW = cuda.cupy.zeros_like(W) cuda.elementwise( '''T wxy, raw T x, raw T w, raw int32 ts, raw int32 paths, raw T codes, raw int32 begins, raw T gloss, int32 c, int32 max_length''', 'raw T gx, raw T gw', ''' int ind = i / max_length; int offset = i - ind * max_length; int t = ts[ind]; int begin = begins[t]; int length = begins[t + 1] - begins[t]; if (offset < length) { int p = begin + offset; int node = paths[p]; T code = codes[p]; T g = -gloss[0] * code / (1.0 + exp(wxy)); for (int j = 0; j < c; ++j) { int w_ind[] = {node, j}; int x_ind[] = {ind, j}; atomicAdd(&gx[x_ind], g * w[w_ind]); atomicAdd(&gw[w_ind], g * x[x_ind]); } } ''', 'binary_hierarchical_softmax_bwd' )(self.wxy, x, W, t, self.paths, self.codes, self.begins, gloss, n_in, self.max_length, gx, gW) return gx, None, gW
def update_one_gpu(self, param, state): cuda.elementwise( 'T grad, T lr, T alpha, T eps', 'T param, T ms', '''ms = alpha * ms + (1 - alpha) * grad * grad; param -= lr * grad / (sqrt(ms) + eps);''', 'rmsprop')(param.grad, self.lr, self.alpha, self.eps, param.data, state['ms'])
def update_one_gpu(self, param, state): cuda.elementwise( 'T grad, T lr, T alpha, T momentum, T eps', 'T param, T avg_n, T avg_g, T delta', '''avg_n = alpha * avg_n + (1 - alpha) * grad * grad; avg_g = alpha * avg_g + (1 - alpha) * grad; delta = delta * momentum - lr * grad * rsqrt(avg_n - avg_g * avg_g + eps); param += delta;''', 'rmsprop_graves')( param.grad, self.lr, self.alpha, self.momentum, self.eps, param.data, state['n'], state['g'], state['delta'])
def update_one_gpu(self, param, state): cuda.elementwise( 'T grad, T lr, T momentum', 'T param, T v', '''v = v * momentum - lr * grad; param += momentum * momentum * v - (1 + momentum) * lr * grad; ''', 'nesterov_ag')(param.grad, self.lr, self.momentum, param.data, state['v'])
def update_one_gpu(self, param, state): cuda.elementwise( 'T grad, T lr, T momentum', 'T param, T v', '''v = momentum * v - lr * grad; param += v;''', 'momentum_sgd')(param.grad, self.lr, self.momentum, param.data, state['v'])
def update_one_gpu(self, param, state): cuda.elementwise( 'T grad, T lr, T eps', 'T param, T h', '''h += grad * grad; param -= lr * grad / (sqrt(h) + eps);''', 'adagrad')(param.grad, self.lr, self.eps, param.data, state['h'])
def update_one_gpu(self, param, state): cuda.elementwise( 'T grad, T one_minus_rho, T eps', 'T param, T msg, T msdx', '''msg = msg + one_minus_rho * (grad * grad - msg); T dx = sqrt((msdx + eps) / (msg + eps)) * grad; msdx += one_minus_rho * (dx * dx - msdx); param -= dx;''', 'adadelta')(param.grad, 1 - self.rho, self.eps, param.data, state['msg'], state['msdx'])
def update_one_gpu(self, param, state): cuda.elementwise('T grad, T lr', 'T param', 'param -= lr * grad', 'sgd')(param.grad, self.lr, param.data)
def __call__(self, opt): if cuda.available: kernel = cuda.elementwise( 'T p, T decay', 'T g', 'g += decay * p', 'weight_decay') rate = self.rate for param in opt.target.params(): p, g = param.data, param.grad with cuda.get_device(p) as dev: if int(dev) == -1: g += rate * p else: kernel(p, rate, g)
def forward_gpu(self, x): if (cuda.cudnn_enabled and self.use_cudnn and pooling_2d._check_cudnn_acceptable_type(x[0].dtype)): return super(AveragePooling2D, self).forward_gpu(x) n, c, h, w = x[0].shape y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph) y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw) y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype) coeff = 1. / (self.kh * self.kw) kern = cuda.elementwise( 'raw T in, int32 h, int32 w,' 'int32 out_h, int32 out_w, int32 kh, int32 kw,' 'int32 sy, int32 sx, int32 ph, int32 pw, T coeff', 'T out', ''' int c0 = i / (out_h * out_w); int out_y = i / out_w % out_h; int out_x = i % out_w; int in_y_0 = max(0, out_y * sy - ph); int in_y_1 = min(h, out_y * sy + kh - ph); int in_x_0 = max(0, out_x * sx - pw); int in_x_1 = min(w, out_x * sx + kw - pw); T val = 0; for (int y = in_y_0; y < in_y_1; ++y) { int offset_y = w * (y + h * c0); for (int x = in_x_0; x < in_x_1; ++x) { val = val + in[x + offset_y]; } } out = val * coeff; ''', 'avg_pool_fwd') kern(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, coeff, y) return y,
def backward_gpu(self, x, gy): if (cuda.cudnn_enabled and self.use_cudnn and pooling_2d._check_cudnn_acceptable_type(x[0].dtype)): return super(AveragePooling2D, self).backward_gpu(x, gy) n, c, h, w = x[0].shape y_h, y_w = gy[0].shape[2:] gx = cuda.cupy.empty_like(x[0]) coeff = 1. / (self.kh * self.kw) cuda.elementwise( 'raw T gy, int32 h, int32 w,' 'int32 out_h, int32 out_w, int32 kh, int32 kw,' 'int32 sy, int32 sx, int32 ph, int32 pw, T coeff', 'T gx', ''' int c0 = i / (h * w); int y = i / w % h + ph; int x = i % w + pw; int out_y_0 = max(0, (y - kh + sy) / sy); int out_y_1 = min(out_h, (y + sy) / sy); int out_x_0 = max(0, (x - kw + sx) / sx); int out_x_1 = min(out_w, (x + sx) / sx); int hc0 = out_h * c0; T val = 0; for (int out_y = out_y_0; out_y < out_y_1; ++out_y) { for (int out_x = out_x_0; out_x < out_x_1; ++out_x) { val = val + gy[out_x + out_w * (out_y + hc0)]; } } gx = val * coeff; ''', 'avg_pool_bwd')(gy[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, coeff, gx) return gx,
def forward_gpu(self, x): shape = self.shape.tolist() y = cuda.cupy.zeros((shape[0], shape[1], shape[2], shape[3]), dtype=x[0].dtype) cuda.elementwise( 'T in, S indices', 'raw T out', ''' out[indices] = in; ''', 'unpool')(x[0], self.indices, y.reduced_view()) return y,
def backward_gpu(self, x, gy): gx = cuda.cupy.empty_like(x[0]) cuda.elementwise( 'raw T in, S indices', 'T out', ''' out = in[indices]; ''', 'unpool')(gy[0].reduced_view(), self.indices, gx) return gx,
def backward_gpu(self, x, gy): if (cuda.cudnn_enabled and self.use_cudnn and pooling_2d._check_cudnn_acceptable_type(x[0].dtype)): return super(MaxPooling2D, self).backward_gpu(x, gy) n, c, h, w = x[0].shape y_h, y_w = gy[0].shape[2:] gx = cuda.cupy.empty_like(x[0]) cuda.elementwise( 'raw T gy, raw S indexes, int32 h, int32 w,' 'int32 out_h, int32 out_w, int32 kh, int32 kw,' 'int32 sy, int32 sx, int32 ph, int32 pw', 'T gx', ''' int c0 = i / (h * w); int y = i / w % h + ph; int x = i % w + pw; int out_y_0 = max(0, (y - kh + sy) / sy); int out_y_1 = min(out_h, (y + sy) / sy); int out_x_0 = max(0, (x - kw + sx) / sx); int out_x_1 = min(out_w, (x + sx) / sx); T val = 0; for (int out_y = out_y_0; out_y < out_y_1; ++out_y) { int ky = y - out_y * sy; for (int out_x = out_x_0; out_x < out_x_1; ++out_x) { int kx = x - out_x * sx; int offset = out_x + out_w * (out_y + out_h * c0); if (indexes[offset] == kx + kw * ky) { val = val + gy[offset]; } } } gx = val; ''', 'max_pool_bwd')(gy[0].reduced_view(), self.indexes.reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, gx) return gx,
def backward_gpu(self, inputs, grad_outputs): x, t = inputs gloss = grad_outputs[0] gx = cuda.cupy.zeros_like(x) gx = cuda.elementwise( 'S t, T gloss', 'raw T gx', 'int ind[] = {i, t}; gx[ind] = gloss;', 'getitem_bwd' )(t, gloss, gx) return gx, None
def forward_gpu(self, inputs): cupy = cuda.cupy mean, ln_var = inputs if self.eps is None: self.eps = cupy.random.standard_normal( ln_var.shape, dtype=mean.dtype) self.noise = cuda.cupy.empty_like(mean) self.noise = cuda.elementwise( 'T v, T e', 'T noise', 'noise = exp(v / 2) * e', 'gaussian_forward' )(ln_var, self.eps) return mean + self.noise,
def forward_gpu(self, inputs): x = inputs[0] return cuda.elementwise( 'T x', 'T y', 'y = min(1.0, max(0.0, x * 0.2 + 0.5))', 'hard_sigmoid_fwd' )(x),
def backward_gpu(self, inputs, grads): x = inputs[0] g = grads[0] return cuda.elementwise( 'T x, T g', 'T gx', 'gx = fabs(x) < 2.5 ? 0.2 * g : 0', 'hard_sigmoid_bwd' )(x, g),
def forward_gpu(self, inputs): x = inputs[0] if (cuda.cudnn_enabled and self.use_cudnn and (_cudnn_version >= 3000 or x.dtype != numpy.float16)): self.y = cuda.cupy.cudnn.activation_forward(x, _mode) else: self.y = cuda.elementwise( 'T x', 'T y', 'y = 1 / (1 + exp(-x))', 'sigmoid_fwd')(x) return self.y,
def backward_gpu(self, inputs, grads): x = inputs[0] gy = grads[0] if (cuda.cudnn_enabled and self.use_cudnn and (_cudnn_version >= 3000 or x.dtype != numpy.float16)): gx = cuda.cupy.cudnn.activation_backward(x, self.y, gy, _mode) else: gx = cuda.elementwise( 'T y, T gy', 'T gx', 'gx = gy * y * (1 - y)', 'sigmoid_bwd')(self.y, gy) return gx,
def forward(self, inputs): c_prev1, c_prev2, x1, x2 = inputs a1, i1, f1, o1 = _extract_gates(x1) a2, i2, f2, o2 = _extract_gates(x2) if isinstance(x1, numpy.ndarray): self.a1 = numpy.tanh(a1) self.i1 = _sigmoid(i1) self.f1 = _sigmoid(f1) self.a2 = numpy.tanh(a2) self.i2 = _sigmoid(i2) self.f2 = _sigmoid(f2) self.o = _sigmoid(o1 + o2) self.c = self.a1 * self.i1 + self.a2 * self.i2 + \ self.f1 * c_prev1 + self.f2 * c_prev2 h = self.o * numpy.tanh(self.c) else: self.c, h = cuda.elementwise( '''T c_prev1, T a1, T i1, T f1, T o1, T c_prev2, T a2, T i2, T f2, T o2''', 'T c, T h', ''' COMMON_ROUTINE; c = aa1 * ai1 + af1 * c_prev1 + aa2 * ai2 + af2 * c_prev2; h = ao * tanh(c); ''', 'slstm_fwd', preamble=_preamble)( c_prev1, a1, i1, f1, o1, c_prev2, a2, i2, f2, o2) return self.c, h
def forward_gpu(self, x): return cuda.elementwise( 'T x, T cap', 'T y', 'y = min(max(x, (T)0), cap)', 'clipped_relu_fwd')(x[0], self.cap),
def backward_gpu(self, x, gy): gx = cuda.elementwise( 'T x, T gy, T z', 'T gx', 'gx = ((x > 0) & (x < z))? gy : (T)0', 'clipped_relu_bwd')(x[0], gy[0], self.cap) return gx,
def _kern(): return cuda.elementwise( 'T cond, T x, T slope', 'T y', 'y = cond >= 0 ? x : (T)(slope * x)', 'lrelu')
def backward_gpu(self, inputs, grad_outputs): x, W = inputs gy = grad_outputs[0] masked = cuda.elementwise( 'T x, T gy', 'T masked', 'masked = x >= 0 ? (T)0 : (T)(x * gy)', 'prelu_masked')(x, gy) axes = (0,) + tuple(six.moves.range(1 + W.ndim, gy.ndim)) gW = masked.sum(axis=axes) gx = masked # reuse buffer shape = _get_extended_shape(W, gx) _fwd_kern()(gy, x, W.reshape(shape), gx) return gx, gW
def prelu(x, W): """Parametric ReLU function. It accepts two arguments: an input ``x`` and a weight array ``W`` and computes the output as :math:`PReLU(x) = \\max(x, W*x)`, where :math:`*` is an elementwise multiplication for each sample in the batch. When the PReLU function is combined with two-dimensional convolution, the elements of parameter :math:`a` are typically shared across the same filter of different pixels. In order to support such usage, this function supports the shape of parameter array that indicates leading dimensions of input arrays except the batch dimension. For example :math:`W` has the shape of :math:`(2, 3, 4)`, :math:`x` must have the shape of :math:`(B, 2, 3, 4, S1, ..., SN)` where B is batchsize and the number of trailing S's is arbitrary non-negative integer. Args: x (~chainer.Variable): Input variable. Its first argument is assumed to be the minibatch dimension. W (~chainer.Variable): Weight variable. Returns: ~chainer.Variable: Output variable .. seealso:: :class:`~chainer.links.PReLU` """ return PReLUFunction()(x, W)