我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用chainer.cuda.cupy()。
def set_by_sample(self, train=True): xp = self.xp use_gpu = (xp == cuda.cupy) for i in range(self.num_layers): # h mu, sigma = self.hmus[i], self.hsigmas[i] e = np.random.normal(0., 1., self.z_size).astype(np.float32) if use_gpu: e = cuda.to_gpu(e) self.decoder.set_h(i, self.get_zh(i)(mu + e * sigma)) # c mu, sigma = self.cmus[i], self.csigmas[i] e = np.random.normal(0., 1., self.z_size).astype(np.float32) if use_gpu: e = cuda.to_gpu(e) self.decoder.set_c(i, self.get_zc(i)(mu + e * sigma))
def __init__(self, model, optimizer, content_weight, style_weight, tv_weight, content_layers, style_layers, resolution_num=1, device_id=-1, initial_image='random', keep_color=False): self.model = model self.optimizer = optimizer self.content_weight = content_weight self.style_weight = style_weight self.tv_weight = tv_weight self.device_id = device_id self.content_layer_names = content_layers self.style_layer_names = style_layers self.resolution_num = resolution_num self.initial_image = initial_image self.keep_color = keep_color if device_id >= 0: self.xp = cuda.cupy self.model.to_gpu(device_id) else: self.xp = np
def __init__(self, model, optimizer, content_weight, style_weight, tv_weight, content_layers, style_layers, resolution_num=1, device_id=-1, initial_image='content', keep_color=False): self.model = model self.optimizer = optimizer self.content_weight = content_weight self.style_weight = style_weight self.tv_weight = tv_weight self.device_id = device_id self.content_layer_names = content_layers self.style_layer_names = style_layers self.resolution_num = resolution_num self.initial_image = initial_image self.keep_color = keep_color if device_id >= 0: self.xp = cuda.cupy self.model.to_gpu(device_id) else: self.xp = np
def backprop_check(): xp = cuda.cupy if config.use_gpu else np duel = DDQN() state = xp.random.uniform(-1.0, 1.0, (2, config.rl_agent_history_length * config.ale_screen_channels, config.ale_scaled_screen_size[1], config.ale_scaled_screen_size[0])).astype(xp.float32) reward = [1, 0] action = [3, 4] episode_ends = [0, 0] next_state = xp.random.uniform(-1.0, 1.0, (2, config.rl_agent_history_length * config.ale_screen_channels, config.ale_scaled_screen_size[1], config.ale_scaled_screen_size[0])).astype(xp.float32) optimizer_conv = optimizers.Adam(alpha=config.rl_learning_rate, beta1=config.rl_gradient_momentum) optimizer_conv.setup(duel.conv) optimizer_fc = optimizers.Adam(alpha=config.rl_learning_rate, beta1=config.rl_gradient_momentum) optimizer_fc.setup(duel.fc) for i in xrange(10000): optimizer_conv.zero_grads() optimizer_fc.zero_grads() loss, _ = duel.forward_one_step(state, action, reward, next_state, episode_ends) loss.backward() optimizer_conv.update() optimizer_fc.update() print loss.data, print duel.conv.layer_2.W.data[0, 0, 0, 0], print duel.fc.layer_2.W.data[0, 0],
def check_gpu(self, gpu): """Check cuda.cupy Example: :: gpu = 0 self.check_gpu(gpu) Args: gpu (int): gpu id """ if gpu >= 0: cuda.get_device(gpu).use() self.to_gpu(gpu) return True return False
def _check_cupy(): """Set xp Note: cuda.cupy if gpu, numpy otherwise Example: :: self.xp = self._check_cupy() Returns: cuda.cupy if gpu, numpy otherwise """ try: cuda.check_cuda_available() return cuda.cupy # if gpu is not available, RuntimeError arises except RuntimeError: return np
def __init__(self, use_cudnn=True, normalize=True, cache_score=True, class_weight=None, ignore_label=-1, reduce='mean'): self.use_cudnn = use_cudnn self.normalize = normalize self.cache_score = cache_score self.class_weight = class_weight if class_weight is not None: if self.class_weight.ndim != 1: raise ValueError('class_weight.ndim should be 1') if self.class_weight.dtype.kind != 'f': raise ValueError('The dtype of class_weight should be \'f\'') if isinstance(self.class_weight, chainer.Variable): raise ValueError('class_weight should be a numpy.ndarray or ' 'cupy.ndarray, not a chainer.Variable') self.ignore_label = ignore_label if reduce not in ('mean', 'no'): raise ValueError( "only 'mean' and 'no' are valid for 'reduce', but '%s' is " 'given' % reduce) self.reduce = reduce
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t = inputs gloss = grad_outputs[0] n_unit = t.size // len(t) coeff = gloss * self._coeff gx = cuda.elementwise( 'T y, S t, raw T coeff, S n_channel, S n_unit', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = ((t == -1) || (c != t)) ? 0 : (coeff[0] / max(y, 1e-5)); ''', 'softmax_crossent_bwd')( self.y, cupy.expand_dims(t, 1), -coeff, x.shape[1], n_unit) return gx, None
def forward_gpu(self, inputs): cupy = cuda.cupy x, t = inputs if chainer.is_debug(): self._check_input_values(x, t) log_y = softmax_log(x, self.use_cudnn) if self.cache_score: self.y = cupy.exp(log_y) if getattr(self, 'normalize', True): coeff = cupy.maximum(1, (t != self.ignore_label).sum()) else: coeff = max(1, len(t)) self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) ret = cuda.reduce( 'S t, raw T log_y, int32 n_channel, raw T coeff', 'T out', 't == -1 ? T(0) : log_y[_j * n_channel + t]', 'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff) return ret,
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t = inputs if hasattr(self, 'y'): y = self.y else: y = softmax_log(x, self.use_cudnn) cupy.exp(y, out=y) gloss = grad_outputs[0] n_unit = t.size // len(t) coeff = gloss * self._coeff gx = cuda.elementwise( 'T y, S t, raw T coeff, S n_channel, S n_unit', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = (t == -1) ? 0 : (coeff[0] * (y - (c == t))); ''', 'softmax_crossent_bwd')( y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit) return gx, None
def forward_gpu(self, inputs): cupy = cuda.cupy x, t = inputs log_y = cupy.log(x + 1e-5) self.y = x if(self.debug): ipdb.set_trace() if getattr(self, 'normalize', True): coeff = cupy.maximum(1, (t != self.ignore_label).sum()) else: coeff = max(1, len(t)) self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) ret = cuda.reduce( 'S t, raw T log_y, int32 n_channel, raw T coeff, raw T weights', 'T out', 't == -1 ? 0 : log_y[_j * n_channel + t] * weights[t]', 'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff, self.weights.reduced_view()) return ret,
def test_copydata_gpu_to_another_gpu(self): cp = cuda.cupy with cuda.get_device(0): data1 = cp.zeros(3, dtype=np.float32) expect = cp.ones(3, dtype=np.float32) with cuda.get_device(1): data2 = cp.ones(3, dtype=np.float32) self.check_copydata(data1, data2, expect)
def supervised_update(self): # array backend xp = cuda.cupy if self.gpu >= 0 else numpy # read data data = self.train_data_queues[0].get() vx = tuple([chainer.Variable(xp.asarray(data[k])) for k in data.keys() if 'data' in k]) vt = tuple([chainer.Variable(xp.asarray(data[k])) for k in data.keys() if 'target' in k]) # forward and update self.optimizer.update(self.optimizer.target, vx, vt) # get result res = {'loss': float(self.optimizer.target.loss.data)} if self.optimizer.target.accuracy is not None: res['accuracy'] = float(self.optimizer.target.accuracy.data) return res
def supervised_update(self): # array backend xp = cuda.cupy if self.gpu >= 0 else numpy self.accuracy = None # read data data = self.train_data_queues[0].get() vx = tuple([chainer.Variable(xp.asarray(data[k])) for k in data.keys() if 'data' in k]) vt = tuple([chainer.Variable(xp.asarray(data[k])) for k in data.keys() if 'target' in k]) self.optimizer.update(self.adversarial_loss, vx, vt) # get result res = {'loss': float(self.loss.data), 'adversarial_loss': float(self.adv_loss.data)} if self.accuracy is not None: res['accuracy'] = self.accuracy return res
def __init__(self, *, iterator, noise_iterator, optimizer_generator, optimizer_critic, device=-1): if optimizer_generator.target.name is None: optimizer_generator.target.name = 'generator' if optimizer_critic.target.name is None: optimizer_critic.target.name = 'critic' iterators = {'main': iterator, 'z': noise_iterator} optimizers = {'generator': optimizer_generator, 'critic': optimizer_critic} super().__init__(iterators, optimizers, device=device) if device >= 0: cuda.get_device(device).use() [optimizer.target.to_gpu() for optimizer in optimizers.values()] self.xp = cuda.cupy if device >= 0 else np
def __init__(self,gpu,batchsize,data_dir,dataset,net,mode,epochs,save_every,size,**kwargs): super(Network, self).__init__(epochs,save_every) print "building ..." self.input_height=size self.input_width=size self.net = net self.mode=mode self.dataset=dataset self.train_data, self.test_data=self.get_dataset(data_dir,dataset) print 'input_channel ==> %d using %s dataset'%(self.in_channel, self.dataset) self.enc = GoogLeNet() self.dec = Decoder(self.in_size) self.xp = cuda.cupy cuda.get_device(gpu).use() self.enc.to_gpu() self.dec.to_gpu() self.o_dec = optimizers.RMSpropGraves() self.o_dec.setup(self.dec) self.batchsize=batchsize
def __init__(self, net, optimizer, epoch_num=100, batch_size=100, device_id=-1, lr_shape='multistep', lr_decay=[0]): self.net = net self.optimizer = optimizer self.epoch_num = epoch_num self.batch_size = batch_size self.device_id = device_id if hasattr(optimizer, 'alpha'): self.initial_lr = optimizer.alpha else: self.initial_lr = optimizer.lr self.lr_shape = lr_shape self.lr_decay = lr_decay if device_id >= 0: self.xp = cuda.cupy self.net.to_gpu(device_id) else: self.xp = np
def forward_gpu(self, inputs): cupy = cuda.cupy x, t = inputs if chainer.is_debug(): self._check_input_values(x, t) log_y = cupy.log(x) if self.cache_score: self.y = x if getattr(self, 'normalize', True): coeff = cupy.maximum(1, (t != self.ignore_label).sum()) else: coeff = max(1, len(t)) self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) ret = cuda.reduce( 'S t, raw T log_y, int32 n_channel, raw T coeff', 'T out', 't == -1 ? 0 : log_y[_j * n_channel + t]', 'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff) return ret,
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t = inputs if hasattr(self, 'y'): y = self.y else: y = x gloss = grad_outputs[0] n_unit = t.size // len(t) coeff = gloss * self._coeff gx = cuda.elementwise( 'T y, S t, raw T coeff, S n_channel, S n_unit', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = (t == -1 || c != t) ? 0 : (coeff[0] * -1.0 / y); ''', 'crossent_bwd')( y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit) return gx, None
def calculate_accuracy(self, predictions, truths): if cuda.get_array_module(predictions.data) == cuda.cupy: with predictions.data.device: predictions = predictions.data.get() with truths.data.device: truths = truths.data.get() else: predictions = predictions.data truths = truths.data # we want to exclude labels with -1 mask = truths != -1 # reduce values along classe axis reduced_preditions = np.argmax(predictions, axis=1) # mask masked_reduced_preditions = reduced_preditions[mask] masked_truths = truths[mask] s = (masked_reduced_preditions == masked_truths).mean() return s
def forward_gpu(self, inputs): cupy = cuda.cupy x, t, w = inputs if chainer.is_debug(): self._check_input_values(x, t) log_y = softmax_log(x, self.use_cudnn) if self.cache_score: self.y = cupy.exp(log_y) if getattr(self, 'normalize', True): coeff = cupy.maximum(1, (t != self.ignore_label).sum()) else: coeff = max(1, len(t)) self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) ret = cuda.reduce( 'S t, T w, raw T log_y, int32 n_channel, raw T coeff', 'T out', 't == -1 ? T(0) : log_y[_j * n_channel + t] * w', 'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd' )(t, w, log_y.reduced_view(), log_y.shape[-1], self._coeff) return ret,
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t, w = inputs if hasattr(self, 'y'): y = self.y else: y = softmax_log(x, self.use_cudnn) cupy.exp(y, out=y) gloss = grad_outputs[0] n_unit = t.size // len(t) coeff = gloss * self._coeff * w gx = cuda.elementwise( 'T y, S t, raw T coeff, S n_channel, S n_unit', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = (t == -1) ? 0 : (coeff[0] * (y - (c == t))); ''', 'softmax_crossent_bwd')( y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit) return gx, None, None
def __init__(self, in_channels, out_channels, ksize, stride=1, real=0, wscale=1.0): super(ConvolutionRBM, self).__init__( conv=L.Convolution2D(in_channels, out_channels, ksize, stride=stride, wscale=wscale), ) # if gpu >= 0: # cuda.check_cuda_available() # xp = cuda.cupy # if gpu >= 0 else np self.conv.add_param("a", in_channels) # dtype=xp.float32 self.conv.a.data.fill(0.) self.in_channels = in_channels self.out_channels = out_channels self.ksize = ksize self.real = real self.rbm_train = False # default value is false
def sample_h_given_v(self, v0_sample): """ get a sample of the hiddens by gibbs sampling :param v0_sample: Variable, see vis above :return: h1_mean: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out) h1_sample: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out) - actual sample for hidden units, populated by 0 or 1. """ h1_mean = self.propup(v0_sample) xp = cuda.get_array_module(h1_mean.data) if xp == cuda.cupy: h1_sample = cuda.cupy.random.random_sample(size=h1_mean.data.shape) h1_sample[:] = h1_sample[:] < h1_mean.data[:] else: # xp == np h1_sample = np.random.binomial(size=h1_mean.data.shape, n=1, p=h1_mean.data) return h1_mean, Variable(h1_sample.astype(xp.float32))
def forward_check(): xp = cuda.cupy if config.use_gpu else np out_head = 2 in_head = 3 n_x = 100 state = xp.ones((2, n_x)).astype(xp.float32) state = Variable(state) initial_weight = np.ones((config.q_k_heads * in_head, n_x)) shared = L.Linear(n_x, config.q_k_heads * in_head, initialW=initial_weight) initial_weight = np.ones((out_head * config.q_k_heads, in_head * config.q_k_heads)) link1 = model.LinearHead(in_head, out_head, config.q_k_heads, initialW=initial_weight) initial_weight = np.ones((in_head * config.q_k_heads, out_head * config.q_k_heads)) link2 = model.LinearHead(out_head, in_head, config.q_k_heads, initialW=initial_weight) if config.use_gpu: link1.to_gpu() link2.to_gpu() shared.to_gpu() output = link2(link1(shared(state))) print output.data
def update(self, s, i): """Update decoder state Args: s (any): Current (hidden, cell) states. If ``None`` is specified zero-vector is used. i (int): input label. Return: (~chainer.Variable) updated decoder state """ if cuda.get_device_from_array(s[0].data).id >= 0: xp = cuda.cupy else: xp = np v = chainer.Variable(xp.array([i],dtype=np.int32)) x = self.embed(v) if s is not None: hy, cy, dy = self.lstm(s[0], s[1], [x]) else: hy, cy, dy = self.lstm(None, None, [x]) return hy, cy, dy
def train(model, batch, num_samples, word_keep_rate, UNK, alpha): xp = model.xp use_gpu = (xp == cuda.cupy) if use_gpu: batch = cuda.to_gpu(batch) KL, xents = forward(model, batch, num_samples=num_samples, word_keep_rate=word_keep_rate, UNK=UNK, train=True) loss = alpha * KL + sum(xents) / num_samples loss.backward() optimizer.update() loss.unchain_backward() if alpha == 0: KL.unchain_backward()
def save_hdf5(filename, obj): gpu = (hasattr(obj, "xp") and obj.xp == cuda.cupy) if gpu: obj.to_cpu() serializers.save_hdf5(filename, obj) if gpu: obj.to_gpu()
def make_batch(datas, train=True): allconcat = np.concatenate(datas, axis=0) if args.gpu >= 0: allconcat = cuda.cupy.array(allconcat) batch = xp.split(allconcat, allconcat.shape[1], axis=1) batch = [xp.reshape(x, (x.shape[0], x.shape[2])) for x in batch] return batch
def forward(self, inputs): xp = cuda.get_array_module(*inputs) x0, x1 = inputs self.diff = self.inside_weights * (x0 - x1) abs_diff = xp.abs(self.diff) flag = abs_diff < 1.0 / self.sigma2 y = (flag * 0.5 * xp.square(self.diff) * self.sigma2 + (~flag) * (abs_diff - 0.5 / self.sigma2)) if xp == cuda.cupy: with cuda.Device(cuda.get_device(y)): num = xp.prod(xp.asarray(y.shape)) else: num = xp.prod(y.shape) return xp.array(y.sum() / num).astype(numpy.float32),
def forward_gpu(self, inputs): cupy = cuda.cupy x, t = inputs if chainer.is_debug(): self._check_input_values(x, t) log_y = log_softmax._log_softmax(x, self.use_cudnn) if self.cache_score: self.y = cupy.exp(log_y) if self.class_weight is not None: shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)] log_y *= cupy.broadcast_to( self.class_weight.reshape(shape), x.shape) if self.normalize: coeff = cupy.maximum(1, (t != self.ignore_label).sum()) else: coeff = max(1, len(t)) self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) ret = cuda.reduce( 'S t, raw T log_y, int32 n_channel, raw T coeff', 'T out', 't == -1 ? T(0) : log_y[_j * n_channel + t]', 'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff) return ret,
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t = inputs if hasattr(self, 'y'): y = self.y else: y = log_softmax._log_softmax(x, self.use_cudnn) cupy.exp(y, out=y) gloss = grad_outputs[0] n_unit = t.size // len(t) coeff = gloss * self._coeff if self.class_weight is None: gx = cuda.elementwise( 'T y, S t, raw T coeff, S n_channel, S n_unit', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = (t == -1) ? 0 : (coeff[0] * (y - (c == t))); ''', 'softmax_crossent_bwd')( y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit) else: gx = cuda.elementwise( 'T y, raw T w, S t, raw T coeff, S n_channel, S n_unit', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = t == -1 ? 0 : coeff[0] * (y - (c == t)) * w[t]; ''', 'softmax_crossent_bwd')( y, self.class_weight, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit) return gx, None
def softmax_cross_entropy( x, t, use_cudnn=True, normalize=True, cache_score=True, class_weight=None): """Computes cross entropy loss for pre-softmax activations. Args: x (~chainer.Variable): Variable holding a multidimensional array whose element indicates unnormalized log probability: the first axis of the variable represents the number of samples, and the second axis represents the number of classes. While this function computes a usual softmax cross entropy if the number of dimensions is equal to 2, it computes a cross entropy of the replicated softmax if the number of dimensions is greater than 2. t (~chainer.Variable): Variable holding an int32 vector of ground truth labels. If ``t[i] == -1``, corresponding ``x[i]`` is ignored. normalize (bool): If ``True``, this function normalizes the cross entropy loss across all instances. If ``False``, it only normalizes along a batch size. cache_score (bool): When it is ``True``, the function stores result of forward computation to use it on backward computation. It reduces computational cost though consumes more memory. class_weight (~numpy.ndarray or ~cupy.ndarray): An array that contains constant weights that will be multiplied with the loss values along with the second dimension. The shape of this array should be ``(x.shape[1],)``. Returns: Variable: A variable holding a scalar array of the cross entropy loss. .. note:: This function is differentiable only by ``x``. """ return SoftmaxCrossEntropy( use_cudnn, normalize, cache_score, class_weight)(x, t)
def forward_gpu(self, x): xp = cuda.cupy n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize( h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize( w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = xp.zeros((n, c, self.outh, self.outw), dtype=numpy.float32) up_y = conv.im2col_gpu( up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) up_y = up_y.transpose(0, 1, 4, 5, 2, 3) n, c, oy, ox, ky, kx = up_y.shape indexes = xp.asarray(self.indexes, dtype=numpy.int32) xp.ElementwiseKernel( 'int32 index, float32 x, int32 n, int32 c, int32 oy, int32 ox,' 'int32 ky, int32 kx', 'raw float32 up_y', ''' int yn = i / c / oy / ox; int yc = (i / oy / ox) % c; int yoy = (i / ox) % oy; int yox = i % ox; up_y[yn * c * oy * ox * ky * kx + \ yc * oy * ox * ky * kx + \ yoy * ox * ky * kx + \ yox * ky * kx + \ index] = x; ''', 'upsampling_2d_fwd')(indexes, x[0], n, c, oy, ox, ky, kx, up_y) up_y = up_y.transpose(0, 1, 4, 5, 2, 3) up_y = conv.col2im_gpu(up_y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def backward_gpu(self, x, gy): xp = cuda.cupy gcol = conv.im2col_gpu( gy[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) gcol = gcol.transpose(0, 1, 4, 5, 2, 3) n, c, oy, ox, ky, kx = gcol.shape gcol = gcol.reshape((n, c, oy, ox, ky * kx)) indexes = xp.asarray(self.indexes, dtype=numpy.int32) gx = xp.empty((n, c, oy, ox), dtype=x[0].dtype) xp.ElementwiseKernel( 'int32 indexes, raw float32 gcol, int32 n, int32 c, int32 oy,' 'int32 ox, int32 ky, int32 kx', 'raw float32 gx', ''' int ind_n = i / c / oy / ox; int ind_c = (i / oy / ox) % c; int ind_oy = (i / ox) % oy; int ind_ox = i % ox; int gcol_ky = indexes / kx; int gcol_kx = indexes % kx; float top_gx = gcol[ind_n * c * oy * ox * ky * kx + \ ind_c * oy * ox * ky * kx + \ ind_oy * ox * ky * kx + \ ind_ox * ky * kx + \ gcol_ky * kx + \ gcol_kx]; gx[ind_n * c * oy * ox + \ ind_c * oy * ox + \ ind_oy * ox + \ ind_ox] = top_gx; ''', 'upsampling_2d_bwd')(indexes, gcol, n, c, oy, ox, ky, kx, gx) return gx,
def eps_greedy(self, state, exploration_rate): prop = np.random.uniform() q_max = None q_min = None if prop < exploration_rate: # Select a random action action_index = np.random.randint(0, len(config.ale_actions)) else: # Select a greedy action state = Variable(state) if config.use_gpu: state.to_gpu() q = self.compute_q_variable(state, test=True) if config.use_gpu: action_index = cuda.to_cpu(cuda.cupy.argmax(q.data)) q_max = cuda.to_cpu(cuda.cupy.max(q.data)) q_min = cuda.to_cpu(cuda.cupy.min(q.data)) else: action_index = np.argmax(q.data) q_max = np.max(q.data) q_min = np.min(q.data) action = self.get_action_with_index(action_index) # No-op self.no_op_count = self.no_op_count + 1 if action == 0 else 0 if self.no_op_count > config.rl_no_op_max: no_op_index = np.argmin(np.asarray(config.ale_actions)) actions_without_no_op = [] for i in range(len(config.ale_actions)): if i == no_op_index: continue actions_without_no_op.append(config.ale_actions[i]) action_index = np.random.randint(0, len(actions_without_no_op)) action = actions_without_no_op[action_index] print "Reached no_op_max.", "New action:", action return action, q_max, q_min
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t = inputs if hasattr(self, 'y'): y = self.y else: y = log_softmax._log_softmax(x) cupy.exp(y, out=y) gloss = grad_outputs[0] n_unit = t.size // len(t) if self.reduce == 'mean': coeff = gloss * self._coeff else: coeff = gloss[:, None, ...] if self.class_weight is None: gx = cuda.elementwise( 'T y, S t, T coeff, S n_channel, S n_unit, S ignore_label', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = t == ignore_label ? 0 : coeff * (y - (c == t)); ''', 'softmax_crossent_bwd')( y, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit, self.ignore_label) else: gx = cuda.elementwise( 'T y, raw T w, S t, T coeff, S n_channel, S n_unit, ' 'S ignore_label', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = t == ignore_label ? 0 : coeff * (y - (c == t)) * w[t]; ''', 'softmax_crossent_weight_bwd')( y, self.class_weight, cupy.expand_dims(t, 1), coeff, x.shape[1], n_unit, self.ignore_label) return gx, None
def compute_accuracy_batch(model, batch): source, target = make_source_target_pair(batch) if model.xp is cuda.cupy: source = cuda.to_gpu(source) target = cuda.to_gpu(target) model.reset_state() Y = model(source) return float(F.accuracy(Y, target, ignore_label=ID_PAD).data)
def forward_one_step(self, state, action, reward, next_state, test=False): xp = cuda.cupy if config.use_gpu else np n_batch = state.shape[0] state = Variable(state) next_state = Variable(next_state) if config.use_gpu: state.to_gpu() next_state.to_gpu() q = self.compute_q_variable(state, test=test) max_target_q = self.compute_target_q_variable(next_state, test=test) max_target_q = xp.amax(max_target_q.data, axis=1) target = q.data.copy() for i in xrange(n_batch): if episode_ends[i] is True: target_value = np.sign(reward[i]) else: target_value = np.sign(reward[i]) + config.rl_discount_factor * max_target_q[i] action_index = self.get_index_with_action(action[i]) old_value = target[i, action_index] diff = target_value - old_value if diff > 1.0: target_value = 1.0 + old_value elif diff < -1.0: target_value = -1.0 + old_value target[i, action_index] = target_value target = Variable(target) loss = F.mean_squared_error(target, q) return loss, q
def forward_one_step(self, state, action, reward, next_state, test=False): xp = cuda.cupy if config.use_gpu else np n_batch = state.shape[0] state = Variable(state.reshape((n_batch, config.rl_history_length * 34))) next_state = Variable(next_state.reshape((n_batch, config.rl_history_length * 34))) if config.use_gpu: state.to_gpu() next_state.to_gpu() q = self.compute_q_variable(state, test=test) q_ = self.compute_q_variable(next_state, test=test) max_action_indices = xp.argmax(q_.data, axis=1) if config.use_gpu: max_action_indices = cuda.to_cpu(max_action_indices) target_q = self.compute_target_q_variable(next_state, test=test) target = q.data.copy() for i in xrange(n_batch): max_action_index = max_action_indices[i] target_value = reward[i] + config.rl_discount_factor * target_q.data[i][max_action_indices[i]] action_index = self.get_index_for_action(action[i]) old_value = target[i, action_index] diff = target_value - old_value if diff > 1.0: target_value = 1.0 + old_value elif diff < -1.0: target_value = -1.0 + old_value target[i, action_index] = target_value target = Variable(target) loss = F.mean_squared_error(target, q) return loss, q
def xp(self): """Array module for this link. Depending on which of CPU/GPU this link is on, this property returns :mod:`numpy` or :mod:`cupy`. """ return numpy if self._cpu else cuda.cupy
def prepare(self): """Prepares for an update. This method initializes missing optimizer states (e.g. for newly added parameters after the set up), and copies arrays in each state dictionary to CPU or GPU according to the corresponding parameter array. """ states = self._states for name, param in self.target.namedparams(): if name not in states: state = {} self.init_state(param, state) states[name] = state else: state = states[name] with cuda.get_device(param.data) as dev: if int(dev) == -1: # cpu for key, value in six.iteritems(state): if isinstance(value, cuda.ndarray): state[key] = value.get() else: # gpu cupy = cuda.cupy for key, value in six.iteritems(state): if isinstance(value, numpy.ndarray): state[key] = cuda.to_gpu(value) elif (isinstance(value, cupy.ndarray) and value.device != dev): state[key] = cupy.copy(value)
def init_state_gpu(self, param, state): """Initializes the optimizer state on GPU. This method is called from :meth:`init_state` by default. Args: param (~chainer.Variable): Parameter variable. Its data array is of type :class:`cupy.ndarray`. state (dict): State dictionary. .. seealso:: :meth:`init_state` """ pass
def forward_gpu(self, inputs): cupy = cuda.cupy mean, ln_var = inputs if self.eps is None: self.eps = cupy.random.standard_normal( ln_var.shape, dtype=mean.dtype) self.noise = cuda.cupy.empty_like(mean) self.noise = cuda.elementwise( 'T v, T e', 'T noise', 'noise = exp(v / 2) * e', 'gaussian_forward' )(ln_var, self.eps) return mean + self.noise,
def backward_gpu(self, inputs, grad_outputs): cupy = cuda.cupy x, t = inputs gloss = grad_outputs[0] n_unit = t.size // len(t) coeff = gloss * self._coeff gx = cuda.elementwise( 'T y, S t, raw T coeff, S n_channel, S n_unit, raw T weights', 'T gx', ''' const int c = (i / n_unit % n_channel); gx = ((t == -1) || (c != t)) ? 0 : ((weights[t]*coeff[0]) / max(y, 1e-5)); ''', 'crossent_bwd')(self.y, cupy.expand_dims(t, 1), -coeff, x.shape[1], n_unit, self.weights.reduced_view()) return gx, None
def debug_print(self): """Display a summary of the stored data and location of the Variable""" msg = """{summary} - device: {device} - volatile: {volatile} - backend: {background} - shape: {shape} - dtype: {dtype} - statistics: {stats} - grad: {grad}""" stats_msg = 'mean={0:.8f}, std={1:.8f}' try: device = self.data.device except AttributeError: device = 'CPU' with cuda.get_device(self.data) as dev: xp = numpy if int(dev) == -1 else cuda.cupy if self.grad is None: grad = None elif xp.all(self.grad == 0): grad = 0 else: grad = stats_msg.format(float(xp.mean(self.grad)), float(xp.std(self.grad))) stats = stats_msg.format(float(xp.mean(self.data)), float(xp.std(self.data))) return msg.format(summary=repr(self), volatile=self.volatile, grad=grad, shape=self.data.shape, background=type(self.data), dtype=self.data.dtype, device=device, stats=stats)