我们从Python开源项目中,提取了以下18个代码示例,用于说明如何使用chainer.cuda.reduce()。
def __init__(self, use_cudnn=True, normalize=True, cache_score=True, class_weight=None, ignore_label=-1, reduce='mean'): self.use_cudnn = use_cudnn self.normalize = normalize self.cache_score = cache_score self.class_weight = class_weight if class_weight is not None: if self.class_weight.ndim != 1: raise ValueError('class_weight.ndim should be 1') if self.class_weight.dtype.kind != 'f': raise ValueError('The dtype of class_weight should be \'f\'') if isinstance(self.class_weight, chainer.Variable): raise ValueError('class_weight should be a numpy.ndarray or ' 'cupy.ndarray, not a chainer.Variable') self.ignore_label = ignore_label if reduce not in ('mean', 'no'): raise ValueError( "only 'mean' and 'no' are valid for 'reduce', but '%s' is " 'given' % reduce) self.reduce = reduce
def forward_gpu(self, inputs): cupy = cuda.cupy x, t = inputs log_y = cupy.log(x + 1e-5) self.y = x if(self.debug): ipdb.set_trace() if getattr(self, 'normalize', True): coeff = cupy.maximum(1, (t != self.ignore_label).sum()) else: coeff = max(1, len(t)) self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) ret = cuda.reduce( 'S t, raw T log_y, int32 n_channel, raw T coeff', 'T out', 't == -1 ? 0 : log_y[_j * n_channel + t]', 'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff) return ret,
def forward_gpu(self, inputs): cupy = cuda.cupy x, t = inputs if chainer.is_debug(): self._check_input_values(x, t) log_y = softmax_log(x, self.use_cudnn) if self.cache_score: self.y = cupy.exp(log_y) if getattr(self, 'normalize', True): coeff = cupy.maximum(1, (t != self.ignore_label).sum()) else: coeff = max(1, len(t)) self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) ret = cuda.reduce( 'S t, raw T log_y, int32 n_channel, raw T coeff', 'T out', 't == -1 ? T(0) : log_y[_j * n_channel + t]', 'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff) return ret,
def forward_gpu(self, inputs): cupy = cuda.cupy x, t = inputs log_y = cupy.log(x + 1e-5) self.y = x if(self.debug): ipdb.set_trace() if getattr(self, 'normalize', True): coeff = cupy.maximum(1, (t != self.ignore_label).sum()) else: coeff = max(1, len(t)) self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) ret = cuda.reduce( 'S t, raw T log_y, int32 n_channel, raw T coeff, raw T weights', 'T out', 't == -1 ? 0 : log_y[_j * n_channel + t] * weights[t]', 'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff, self.weights.reduced_view()) return ret,
def forward_gpu(self, inputs): cupy = cuda.cupy x, t = inputs if chainer.is_debug(): self._check_input_values(x, t) log_y = cupy.log(x) if self.cache_score: self.y = x if getattr(self, 'normalize', True): coeff = cupy.maximum(1, (t != self.ignore_label).sum()) else: coeff = max(1, len(t)) self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) ret = cuda.reduce( 'S t, raw T log_y, int32 n_channel, raw T coeff', 'T out', 't == -1 ? 0 : log_y[_j * n_channel + t]', 'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff) return ret,
def forward_gpu(self, inputs): cupy = cuda.cupy x, t, w = inputs if chainer.is_debug(): self._check_input_values(x, t) log_y = softmax_log(x, self.use_cudnn) if self.cache_score: self.y = cupy.exp(log_y) if getattr(self, 'normalize', True): coeff = cupy.maximum(1, (t != self.ignore_label).sum()) else: coeff = max(1, len(t)) self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) ret = cuda.reduce( 'S t, T w, raw T log_y, int32 n_channel, raw T coeff', 'T out', 't == -1 ? T(0) : log_y[_j * n_channel + t] * w', 'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd' )(t, w, log_y.reduced_view(), log_y.shape[-1], self._coeff) return ret,
def forward_gpu(self, inputs): cupy = cuda.cupy x, t = inputs if chainer.is_debug(): self._check_input_values(x, t) log_y = log_softmax._log_softmax(x, self.use_cudnn) if self.cache_score: self.y = cupy.exp(log_y) if self.class_weight is not None: shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)] log_y *= cupy.broadcast_to( self.class_weight.reshape(shape), x.shape) if self.normalize: coeff = cupy.maximum(1, (t != self.ignore_label).sum()) else: coeff = max(1, len(t)) self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) ret = cuda.reduce( 'S t, raw T log_y, int32 n_channel, raw T coeff', 'T out', 't == -1 ? T(0) : log_y[_j * n_channel + t]', 'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff) return ret,
def forward_cpu(self, inputs): x, t = inputs if chainer.is_debug(): self._check_input_values(x, t) log_y = log_softmax._log_softmax(x) if self.cache_score: self.y = np.exp(log_y) if self.class_weight is not None: shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)] log_y *= _broadcast_to(self.class_weight.reshape(shape), x.shape) log_yd = np.rollaxis(log_y, 1) log_yd = log_yd.reshape(len(log_yd), -1) log_p = log_yd[np.maximum(t.ravel(), 0), np.arange(t.size)] log_p *= (t.ravel() != self.ignore_label) if self.reduce == 'mean': # deal with the case where the SoftmaxCrossEntropy is # unpickled from the old version if self.normalize: count = (t != self.ignore_label).sum() else: count = len(x) self._coeff = 1.0 / max(count, 1) y = log_p.sum(keepdims=True) * (-self._coeff) return y.reshape(()), else: return -log_p.reshape(t.shape),
def backward_cpu(self, inputs, grad_outputs): x, t = inputs gloss = grad_outputs[0] if hasattr(self, 'y'): y = self.y.copy() else: y = log_softmax._log_softmax(x) np.exp(y, out=y) if y.ndim == 2: gx = y gx[np.arange(len(t)), np.maximum(t, 0)] -= 1 if self.class_weight is not None: shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)] c = _broadcast_to(self.class_weight.reshape(shape), x.shape) c = c[np.arange(len(t)), np.maximum(t, 0)] gx *= _broadcast_to(np.expand_dims(c, 1), gx.shape) gx *= (t != self.ignore_label).reshape((len(t), 1)) else: n_unit = t.size // len(t) gx = y.reshape(y.shape[0], y.shape[1], -1) fst_index = np.arange(t.size) // n_unit trd_index = np.arange(t.size) % n_unit gx[fst_index, np.maximum(t.ravel(), 0), trd_index] -= 1 if self.class_weight is not None: shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)] c = _broadcast_to(self.class_weight.reshape(shape), x.shape) c = c.reshape(gx.shape) c = c[fst_index, np.maximum(t.ravel(), 0), trd_index] c = c.reshape(y.shape[0], 1, -1) gx *= _broadcast_to(c, gx.shape) gx *= (t != self.ignore_label).reshape((len(t), 1, -1)) gx = gx.reshape(y.shape) if self.reduce == 'mean': gx *= gloss * self._coeff else: gx *= gloss[:, None] return gx, None
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t = inputs if hasattr(self, 'y'): y = self.y else: y = log_softmax._log_softmax(x) cupy.exp(y, out=y) gloss = grad_outputs[0] n_unit = t.size // len(t) if self.reduce == 'mean': coeff = gloss * self._coeff else: coeff = gloss[:, None, ...] if self.class_weight is None: gx = cuda.elementwise( 'T y, S t, T coeff, S n_channel, S n_unit, S ignore_label', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = t == ignore_label ? 0 : coeff * (y - (c == t)); ''', 'softmax_crossent_bwd')( y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit, self.ignore_label) else: gx = cuda.elementwise( 'T y, raw T w, S t, T coeff, S n_channel, S n_unit, ' 'S ignore_label', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = t == ignore_label ? 0 : coeff * (y - (c == t)) * w[t]; ''', 'softmax_crossent_weight_bwd')( y, self.class_weight, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit, self.ignore_label) return gx, None
def backward(self, inputs, grad_outputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] gy = grad_outputs[0] xp = cuda.get_array_module(*inputs) if xp is numpy: gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy) ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy) ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy) else: kern = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, None, None] # ij e2_b = e2[:, None, :, None] # ik gy_b = gy[:, None, None, :] # il W_b = W[None, :, :, :] # jkl gW = kern(e1_b, e2_b, gy_b, axis=0) # 'ij,ik,il->jkl' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ik,jkl,il->ij' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) # 'ij,jkl,il->ik' ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2, b = inputs[3:] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb return ret
def forward_cpu(self, inputs): x, t = inputs if chainer.is_debug(): self._check_input_values(x, t) log_y = log_softmax._log_softmax(x) if self.cache_score: self.y = np.exp(log_y) if self.class_weight is not None: shape = [1 if d != 1 else -1 for d in xrange(x.ndim)] log_y *= _broadcast_to(self.class_weight.reshape(shape), x.shape) log_yd = np.rollaxis(log_y, 1) log_yd = log_yd.reshape(len(log_yd), -1) log_p = log_yd[np.maximum(t.ravel(), 0), np.arange(t.size)] log_p *= (t.ravel() != self.ignore_label) if self.reduce == 'mean': # deal with the case where the SoftmaxCrossEntropy is # unpickled from the old version if self.normalize: count = (t != self.ignore_label).sum() else: count = len(x) self._coeff = 1.0 / max(count, 1) y = log_p.sum(keepdims=True) * (-self._coeff) return y.reshape(()), else: return -log_p.reshape(t.shape),
def backward_cpu(self, inputs, grad_outputs): x, t = inputs gloss = grad_outputs[0] if hasattr(self, 'y'): y = self.y.copy() else: y = log_softmax._log_softmax(x) np.exp(y, out=y) if y.ndim == 2: gx = y gx[np.arange(len(t)), np.maximum(t, 0)] -= 1 if self.class_weight is not None: shape = [1 if d != 1 else -1 for d in xrange(x.ndim)] c = _broadcast_to(self.class_weight.reshape(shape), x.shape) c = c[np.arange(len(t)), np.maximum(t, 0)] gx *= _broadcast_to(np.expand_dims(c, 1), gx.shape) gx *= (t != self.ignore_label).reshape((len(t), 1)) else: n_unit = t.size // len(t) gx = y.reshape(y.shape[0], y.shape[1], -1) fst_index = np.arange(t.size) // n_unit trd_index = np.arange(t.size) % n_unit gx[fst_index, np.maximum(t.ravel(), 0), trd_index] -= 1 if self.class_weight is not None: shape = [1 if d != 1 else -1 for d in xrange(x.ndim)] c = _broadcast_to(self.class_weight.reshape(shape), x.shape) c = c.reshape(gx.shape) c = c[fst_index, np.maximum(t.ravel(), 0), trd_index] c = c.reshape(y.shape[0], 1, -1) gx *= _broadcast_to(c, gx.shape) gx *= (t != self.ignore_label).reshape((len(t), 1, -1)) gx = gx.reshape(y.shape) if self.reduce == 'mean': gx *= gloss * self._coeff else: gx *= gloss[:, None] return gx, None
def forward_gpu(self, inputs): cupy = cuda.cupy x, t = inputs if chainer.is_debug(): self._check_input_values(x, t) log_y = log_softmax._log_softmax(x) if self.cache_score: self.y = cupy.exp(log_y) if self.class_weight is not None: shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)] log_y *= cupy.broadcast_to( self.class_weight.reshape(shape), x.shape) if self.normalize: coeff = cupy.maximum(1, (t != self.ignore_label).sum()) else: coeff = max(1, len(t)) self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) if self.reduce == 'mean': ret = cuda.reduce( 'S t, raw T log_y, int32 n_channel, raw T coeff, ' 'S ignore_label', 'T out', 't == ignore_label ? T(0) : log_y[_j * n_channel + t]', 'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff, self.ignore_label) else: ret = cuda.elementwise( 'S t, raw T log_y, int32 n_channel, T ignore', 'T out', ''' if (t == ignore) { out = 0; } else { out = -log_y[i * n_channel + t]; } ''', 'softmax_crossent_no_reduce_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self.ignore_label) ret = ret.reshape(t.shape) return ret,
def forward_gpu(self, inputs): x, t, W = inputs max_length = cuda.reduce( 'T t, raw T begins', 'T out', 'begins[t + 1] - begins[t]', 'max(a, b)', 'out = a', '0', 'binary_hierarchical_softmax_max_length')(t, self.begins) max_length = cuda.to_cpu(max_length)[()] length = max_length * x.shape[0] ls = cuda.cupy.empty((length,), dtype=numpy.float32) n_in = x.shape[1] wxy = cuda.cupy.empty_like(ls) cuda.elementwise( '''raw T x, raw T w, raw int32 ts, raw int32 paths, raw T codes, raw int32 begins, int32 c, int32 max_length''', 'T ls, T wxy', ''' int ind = i / max_length; int offset = i - ind * max_length; int t = ts[ind]; int begin = begins[t]; int length = begins[t + 1] - begins[t]; if (offset < length) { int p = begin + offset; int node = paths[p]; T wx = 0; for (int j = 0; j < c; ++j) { int w_ind[] = {node, j}; int x_ind[] = {ind, j}; wx += w[w_ind] * x[x_ind]; } wxy = wx * codes[p]; ls = log(1 + exp(-wxy)); } else { ls = 0; } ''', 'binary_hierarchical_softmax_forward' )(x, W, t, self.paths, self.codes, self.begins, n_in, max_length, ls, wxy) self.max_length = max_length self.wxy = wxy return ls.sum(),
def forward_gpu(self, inputs): cupy = cuda.cupy x, t = inputs if chainer.is_debug(): self._check_input_values(x, t) log_y = log_softmax._log_softmax(x) if self.cache_score: self.y = cupy.exp(log_y) if self.class_weight is not None: shape = [1 if d != 1 else -1 for d in xrange(x.ndim)] log_y *= cupy.broadcast_to( self.class_weight.reshape(shape), x.shape) if self.normalize: coeff = cupy.maximum(1, (t != self.ignore_label).sum()) else: coeff = max(1, len(t)) self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) if self.reduce == 'mean': ret = cuda.reduce( 'S t, raw T log_y, int32 n_channel, raw T coeff, ' 'S ignore_label', 'T out', 't == ignore_label ? T(0) : log_y[_j * n_channel + t]', 'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff, self.ignore_label) else: ret = cuda.elementwise( 'S t, raw T log_y, int32 n_channel, T ignore', 'T out', ''' if (t == ignore) { out = 0; } else { out = -log_y[i * n_channel + t]; } ''', 'softmax_crossent_no_reduce_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self.ignore_label) ret = ret.reshape(t.shape) return ret,