我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用chainer.cuda.to_gpu()。
def set_by_sample(self, train=True): xp = self.xp use_gpu = (xp == cuda.cupy) for i in range(self.num_layers): # h mu, sigma = self.hmus[i], self.hsigmas[i] e = np.random.normal(0., 1., self.z_size).astype(np.float32) if use_gpu: e = cuda.to_gpu(e) self.decoder.set_h(i, self.get_zh(i)(mu + e * sigma)) # c mu, sigma = self.cmus[i], self.csigmas[i] e = np.random.normal(0., 1., self.z_size).astype(np.float32) if use_gpu: e = cuda.to_gpu(e) self.decoder.set_c(i, self.get_zc(i)(mu + e * sigma))
def get_model_and_optimizer(result_dir, modelfn, opt, opt_kwargs, net_kwargs, gpu): model_fn = os.path.basename(modelfn) model_name = model_fn.split('.')[0] module = imp.load_source(model_name, modelfn) net = getattr(module, model_name) # Copy model definition and this train script to the result dir dst = '%s/%s' % (result_dir, model_fn) if not os.path.exists(dst): shutil.copy(modelfn, dst) dst = '%s/%s' % (result_dir, os.path.basename(__file__)) if not os.path.exists(dst): shutil.copy(__file__, dst) # Create model model = net(**net_kwargs) if gpu >= 0: model.to_gpu(gpu) # Create optimizer optimizer = optimizers.__dict__[opt](**opt_kwargs) optimizer.setup(model) return model, optimizer
def get_model_and_optimizer(result_dir, modelfn, opt, opt_kwargs, net_kwargs, gpu): model_fn = os.path.basename(modelfn) model_name = model_fn.split('.')[0] module = imp.load_source(model_name, modelfn) Net = getattr(module, model_name) dst = '%s/%s' % (result_dir, model_fn) if not os.path.exists(dst): shutil.copy(modelfn, dst) dst = '%s/%s' % (result_dir, os.path.basename(__file__)) if not os.path.exists(dst): shutil.copy(__file__, dst) # prepare model model = Net(**net_kwargs) if gpu >= 0: model.to_gpu() optimizer = optimizers.__dict__[opt](**opt_kwargs) optimizer.setup(model) return model, optimizer
def test_index_group_func(): import numpy as np import cupy as cp from chainer import cuda input = np.random.randn(2, 3, 4, 5, 6) I = np.random.randint(0, 4, (7, 8, 9, 10)) J = np.random.randint(0, 5, (7, 8, 9, 10)) K = np.random.randint(0, 6, (7, 8, 9, 10)) output = input[..., I, J, K].swapaxes(1, 2) cpoutput = cp.zeros(output.shape) cpinput = cuda.to_gpu(input) cpI = cuda.to_gpu(I) cpJ = cuda.to_gpu(J) cpK = cuda.to_gpu(K) index_group_func_kernel(cpinput, cpI, cpJ, cpK, cpoutput) cpoutput = cuda.to_cpu(cpoutput) error = np.abs(cpoutput - output).sum() print(error) assert np.isclose(error, 0.)
def check_transform_grad(inds, w, transformer, dtype, toll): from chainer import gradient_check inds = cuda.to_gpu(inds) W = Variable(w.astype(dtype)) R = transformer(inds) RW = R(W) RW.grad = cp.random.randn(*RW.data.shape).astype(dtype) RW.backward(retain_grad=True) func = RW.creator fn = lambda: func.forward((W.data,)) gW, = gradient_check.numerical_grad(fn, (W.data,), (RW.grad,)) gan = cuda.to_cpu(gW) gat = cuda.to_cpu(W.grad) relerr = np.max(np.abs(gan - gat) / np.maximum(np.abs(gan), np.abs(gat))) print (dtype, toll, relerr) assert relerr < toll
def check_equivariance(im, layers, input_array, output_array, point_group): # Transform the image f = input_array(im) g = point_group.rand() gf = g * f im1 = gf.v # Apply layers to both images im = Variable(cuda.to_gpu(im)) im1 = Variable(cuda.to_gpu(im1)) fmap = im fmap1 = im1 for layer in layers: layer.to_gpu() fmap = layer(fmap) fmap1 = layer(fmap1) # Transform the computed feature maps fmap1_garray = output_array(cuda.to_cpu(fmap1.data)) r_fmap1_data = (g.inv() * fmap1_garray).v fmap_data = cuda.to_cpu(fmap.data) assert np.allclose(fmap_data, r_fmap1_data, rtol=1e-5, atol=1e-3)
def concat_examples(batch, device=None): if len(batch) == 0: raise ValueError('batch is empty') if device is None: def to_device(x): return x elif device < 0: to_device = cuda.to_cpu else: def to_device(x): return cuda.to_gpu(x, device, cuda.Stream.null) result = [to_device(_concat_arrays([s[0] for s in batch], -1)), # ws to_device(_concat_arrays([s[1] for s in batch], -1)), # ps to_device(_concat_arrays([s[2] for s in batch], -1)), # ss [s[3] for s in batch]] # ls if len(batch[0]) == 7: result.append([to_device(s[4]) for s in batch]) # cat_ts result.append([to_device(s[5]) for s in batch]) # dep_ts result.append(to_device(_concat_arrays([s[6] for s in batch], None))) # weights return tuple(result)
def setup_workers(self): # work only once if self._initialized: return self._initialized = True self.model.cleargrads() for i in six.moves.range(1, len(self.gpus)): pipe, worker_end = multiprocessing.Pipe() worker = _Worker(i, worker_end, self.model, self.gpus, self.da, int(float(self.batch) / len(self.gpus) / self.train_batch_divide), self) worker.start() self._workers.append(worker) self._pipes.append(pipe) with cuda.Device(self.gpus[0]): self.model.to_gpu(self.gpus[0]) if len(self.gpus) > 1: communication_id = nccl.get_unique_id() self._send_message(("set comm_id", communication_id)) self.communication = nccl.NcclCommunicator(len(self.gpus), communication_id, 0)
def setup_workers(self): # work only once if self._initialized: return self._initialized = True self.model.zerograds() for i in six.moves.range(1, len(self.gpus)): pipe, worker_end = multiprocessing.Pipe() worker = _Worker(i, worker_end, self.model, self.gpus, self.da, int(self.batch / len(self.gpus) / self.train_batch_divide), self) worker.start() self._workers.append(worker) self._pipes.append(pipe) with cuda.Device(self.gpus[0]): self.model.to_gpu(self.gpus[0]) if len(self.gpus) > 1: communication_id = nccl.get_unique_id() self._send_message(("set comm_id", communication_id)) self.communication = nccl.NcclCommunicator(len(self.gpus), communication_id, 0)
def features_to_minibatch(self, features, sentences, max_feature_length, max_sentence_length, gpu=True): x_batch, x_length_batch, t_batch, t_length_batch, bigram_batch = self.processor.features_to_minibatch(features, sentences, max_feature_length, max_sentence_length, self.token_ids, self.id_blank) if self.stats_total > 0: for x, length in zip(x_batch, x_length_batch): self._update_stats_recursively(x[..., :length]) x_mean, x_std = self.get_mean_and_std() x_batch = (x_batch - x_mean) / x_std if gpu: x_batch = cuda.to_gpu(x_batch.astype(np.float32)) t_batch = cuda.to_gpu(t_batch.astype(np.int32)) bigram_batch = cuda.to_gpu(bigram_batch.astype(np.int32)) x_length_batch = cuda.to_gpu(np.asarray(x_length_batch).astype(np.int32)) t_length_batch = cuda.to_gpu(np.asarray(t_length_batch).astype(np.int32)) return x_batch, x_length_batch, t_batch, t_length_batch, bigram_batch
def __init__(self, d, f, R, gpu): self.d = d self.f = f self.R = R self.gpu = gpu g = ChainList(*[L.Linear(1, f) for i in six.moves.range(AtomIdMax)]) H = ChainList(*[L.Linear(f, f) for i in six.moves.range(R)]) W = ChainList(*[L.Linear(f, d) for i in six.moves.range(R + 1)]) self.optimizer = optimizers.Adam() self.model = Chain(H=H, W=W, g=g) if gpu: self.model.to_gpu(0) self.optimizer.setup(self.model) self.to = [[] for i in six.moves.range(2)] self.atom_sid = [[] for i in six.moves.range(2)] self.anum = [[] for i in six.moves.range(2)]
def test_forward_gpu(self): x = chainer.Variable(self.x) t = chainer.Variable(self.t) y = self.link(x, t) self.assertEqual(y.data.dtype, numpy.float32) self.assertEqual(y.data.shape, ()) # fix samples negative_sampling.NegativeSamplingFunction.samples = cuda.to_gpu( y.creator.samples) self.link.to_gpu() y_g = self.link(chainer.Variable(cuda.to_gpu(self.x)), chainer.Variable(cuda.to_gpu(self.t))) del negative_sampling.NegativeSamplingFunction.samples self.assertEqual(y_g.data.dtype, numpy.float32) self.assertEqual(y_g.data.shape, ()) gradient_check.assert_allclose(y.data, y_g.data, atol=1.e-4)
def __init__(self, n_history, n_action, on_gpu=False): self.n_history = n_history self.n_action = n_action self.on_gpu = on_gpu super(Q, self).__init__( l1=F.Convolution2D(n_history, 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)), l2=F.Convolution2D(32, 64, ksize=3, stride=2, nobias=False, wscale=np.sqrt(2)), l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)), l4=F.Linear(3136, 512, wscale=np.sqrt(2)), out=F.Linear(512, self.n_action, initialW=np.zeros((n_action, 512), dtype=np.float32)) ) if on_gpu: self.to_gpu()
def arr_to_gpu(self, arr): return arr if not self.on_gpu else cuda.to_gpu(arr)
def gan_sampling(gen, eval_folder, gpu, rows=6, cols=6, latent_len=128): @chainer.training.make_extension() def samples_generation(trainer): if not os.path.exists(eval_folder): os.makedirs(eval_folder) z = np.random.normal(size=(rows*cols, latent_len)).astype("f") if gpu>=0: z = cuda.to_gpu(z) z = Variable(z, volatile=True) imgs = gen(z, test=True) save_images_grid(imgs, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".jpg", grid_w=rows, grid_h=cols) return samples_generation
def gan_sampling_tags(gen, eval_folder, gpu, rows=6, cols=6, latent_len=128, attr_len=38, threshold=0.25): @chainer.training.make_extension() def get_fake_tag(): prob2 = np.random.rand(attr_len) tags = np.zeros((attr_len)).astype("f") tags[:] = -1.0 tags[np.argmax(prob2[0:13])]=1.0 tags[27 + np.argmax(prob2[27:])] = 1.0 prob2[prob2<threshold] = -1.0 prob2[prob2>=threshold] = 1.0 for i in range(13, 27): tags[i] = prob2[i] return tags def get_fake_tag_batch(): xp = gen.xp batch = rows*cols tags = xp.zeros((batch, attr_len)).astype("f") for i in range(batch): tags[i] = xp.asarray(get_fake_tag()) return tags def samples_generation(trainer): if not os.path.exists(eval_folder): os.makedirs(eval_folder) z = np.random.normal(size=(rows*cols, latent_len)).astype("f") if gpu>=0: z = cuda.to_gpu(z) tags =get_fake_tag_batch() z = Variable(z, volatile=True) tags = Variable(tags, volatile=True) imgs = gen(F.concat([z,tags]), test=True) save_images_grid(imgs, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".jpg", grid_w=rows, grid_h=cols) return samples_generation
def to_gpu(self): super(Encoder, self).to_gpu() for i in range(self.num_layers): h = self.get_h(i) c = self.get_c(i) if h is not None: h.to_gpu() if c is not None: c.to_gpu()
def to_gpu(self): super(RNNLM, self).to_gpu() for i in range(self.num_layers): h = self.get_h(i) c = self.get_c(i) if h is not None: h.to_gpu() if c is not None: c.to_gpu()
def to_gpu(self): super(VAELM, self).to_gpu() self.encoder.to_gpu() self.decoder.to_gpu() for i in range(self.num_layers): self.get_htrans(i).to_gpu() self.get_ctrans(i).to_gpu()
def train(model, batch, num_samples, word_keep_rate, UNK, alpha): xp = model.xp use_gpu = (xp == cuda.cupy) if use_gpu: batch = cuda.to_gpu(batch) KL, xents = forward(model, batch, num_samples=num_samples, word_keep_rate=word_keep_rate, UNK=UNK, train=True) loss = alpha * KL + sum(xents) / num_samples loss.backward() optimizer.update() loss.unchain_backward() if alpha == 0: KL.unchain_backward()
def batch_to_vars(batch, device=-1): import chainer from chainer import cuda in_arrays = [np.asarray(x) for x in zip(*batch)] if device >= 0: in_arrays = [cuda.to_gpu(x, device=device) for x in in_arrays] in_vars = [chainer.Variable(x) for x in in_arrays] return in_vars # ----------------------------------------------------------------------------- # Color Util # -----------------------------------------------------------------------------
def _diagonal_idx_array(batch_size, n): idx_offsets = np.arange( start=0, stop=batch_size * n * n, step=n * n, dtype=np.int32).reshape( (batch_size, 1)) idx = np.ravel_multi_index( np.diag_indices(n), (n, n)).reshape((1, n)).astype(np.int32) return cuda.to_gpu(idx + idx_offsets)
def _non_diagonal_idx_array(batch_size, n): idx_offsets = np.arange( start=0, stop=batch_size * n * n, step=n * n, dtype=np.int32).reshape( (batch_size, 1)) idx = np.ravel_multi_index( np.tril_indices(n, -1), (n, n)).reshape((1, -1)).astype(np.int32) return cuda.to_gpu(idx + idx_offsets)
def test_forward_gpu(self): self.check_forward(cuda.to_gpu(self.x))
def test_forward_gpu(self): xs_gpu = [chainer.cuda.to_gpu(x) for x in self.xs] self.check_forward(xs_gpu)
def test_backward_gpu(self): xs_gpu = [chainer.cuda.to_gpu(x) for x in self.xs] self.check_backward(xs_gpu, cuda.to_gpu(self.gy))
def test_forward_gpu(self): self.check_forward(cuda.to_gpu(self.diag), cuda.to_gpu(self.non_diag))
def test_backward_gpu(self): self.check_backward((cuda.to_gpu(self.diag), cuda.to_gpu( self.non_diag)), cuda.to_gpu(self.gy))
def testplot_p4m(im=None, m=0, r=0): if im is None: im = np.zeros((5, 5), dtype='float32') im[0:5, 1] = 1. im[0, 1:4] = 1. im[2, 1:3] = 1. from groupy.gfunc.z2func_array import Z2FuncArray from groupy.garray.D4_array import D4Array def rotate_flip_z2_func(im, flip, theta_index): imf = Z2FuncArray(im) rot = D4Array([flip, theta_index], 'int') rot_imf = rot * imf return rot_imf.v im = rotate_flip_z2_func(im, m, r) filter_e = np.array([[-1., -4., 1.], [-2., 0., 2.], [-1., 0., 1.]]) from groupy.gconv.chainer_gconv.p4m_conv import P4MConvZ2 from chainer import Variable from chainer import cuda print im.shape imv = Variable(cuda.to_gpu(im.astype('float32').reshape(1, 1, 5, 5))) conv = P4MConvZ2(in_channels=1, out_channels=1, ksize=3, pad=2, flat_channels=True, initialW=filter_e.reshape(1, 1, 1, 3, 3)) conv.to_gpu() conv_imv = conv(imv) print im.shape, conv_imv.data.shape return im, cuda.to_cpu(conv_imv.data)
def train_epoch(train_data, train_labels, model, optimizer, batchsize, transformations, silent, gpu=0, finetune=False): N = train_data.shape[0] pbar = ProgressBar(0, N) perm = np.random.permutation(N) sum_accuracy = 0 sum_loss = 0 for i in range(0, N, batchsize): x_batch = train_data[perm[i:i + batchsize]] y_batch = train_labels[perm[i:i + batchsize]] if transformations is not None: if 'rotation' == transformations: x_batch = rotate_transform_batch( x_batch, rotation=2 * np.pi ) if gpu >= 0: x_batch = cuda.to_gpu(x_batch.astype(np.float32)) y_batch = cuda.to_gpu(y_batch.astype(np.int32)) optimizer.zero_grads() x = Variable(x_batch) t = Variable(y_batch) loss, acc = model(x, t, train=True, finetune=finetune) if not finetune: loss.backward() optimizer.update() sum_loss += float(cuda.to_cpu(loss.data)) * y_batch.size sum_accuracy += float(cuda.to_cpu(acc.data)) * y_batch.size if not silent: pbar.update(i + y_batch.size) return sum_loss, sum_accuracy
def test_forward_gpu(self): self.check_forward(cuda.to_gpu(self.x), cuda.to_gpu(self.rois))
def test_backward_gpu(self): self.check_backward(cuda.to_gpu(self.x), cuda.to_gpu(self.rois), cuda.to_gpu(self.gy))
def ch_trans_filter(w, inds): from chainer import cuda, Variable from groupy.gconv.chainer_gconv.transform_filter import TransformGFilter w_gpu = cuda.to_gpu(w) inds_gpu = cuda.to_gpu(inds) wv = Variable(w_gpu) rwv = TransformGFilter(inds_gpu)(wv) return cuda.to_cpu(rwv.data)
def __call__(self, x, t): self.y = self.predictor(x, self.train_depth) if hasattr(self, 'class_weight'): if isinstance(x.data, cuda.cupy.ndarray) \ and not isinstance(self.class_weight, cuda.cupy.ndarray): self.class_weight = cuda.to_gpu( self.class_weight, device=x.data.device) self.loss = softmax_cross_entropy( self.y, t, class_weight=self.class_weight) else: self.loss = F.softmax_cross_entropy(self.y, t) reporter.report({'loss': self.loss}, self) return self.loss
def test_forward_gpu(self): self.pooled_y.to_gpu() self.check_forward(self.pooled_y)
def test_backward_gpu(self): self.check_backward(cuda.to_gpu( self.pooled_y.data), cuda.to_gpu(self.gy))
def test_forward_gpu(self): self.check_forward( cuda.to_gpu(self.x), cuda.to_gpu(self.t), None if not self.weight_apply else cuda.to_gpu(self.class_weight))
def test_forward_gpu_no_cudnn(self): self.check_forward( cuda.to_gpu(self.x), cuda.to_gpu(self.t), None if not self.weight_apply else cuda.to_gpu(self.class_weight), False)
def test_backward_gpu_no_cudnn(self): self.check_backward( cuda.to_gpu(self.x), cuda.to_gpu(self.t), None if not self.weight_apply else cuda.to_gpu(self.class_weight), False)
def test_value_check_gpu_cudnn(self): self.check_value_check(cuda.to_gpu(self.x), cuda.to_gpu(self.t), True)
def __init__(self, model, target, device_id=-1, learning_rate=0.00025, momentum=.9, minibatch_size=32, update_interval=10000): assert isinstance(model, ChainerModel), \ 'model should inherit from ChainerModel' super(QNeuralNetwork, self).__init__(model.input_shape, model.output_shape) self._gpu_device = None self._loss_val = 0 # Target model update method self._steps = 0 self._target_update_interval = update_interval # Setup model and target network self._minibatch_size = minibatch_size self._model = model self._target = target self._target.copyparams(self._model) # If GPU move to GPU memory if device_id >= 0: with cuda.get_device(device_id) as device: self._gpu_device = device self._model.to_gpu(device) self._target.to_gpu(device) # Setup optimizer self._optimizer = Adam(learning_rate, momentum, 0.999) self._optimizer.setup(self._model)
def evaluate(self, environment, model=QModel.ACTION_VALUE_NETWORK): if check_rank(environment.shape, get_rank(self._input_shape)): environment = environment.reshape((1,) + environment.shape) # Move data if necessary if self._gpu_device is not None: environment = cuda.to_gpu(environment, self._gpu_device) if model == QModel.ACTION_VALUE_NETWORK: output = self._model(environment) else: output = self._target(environment) return cuda.to_cpu(output.data)
def train(self, x, y, actions=None): actions = actions.astype(np.int32) batch_size = len(actions) if self._gpu_device: x = cuda.to_gpu(x, self._gpu_device) y = cuda.to_gpu(y, self._gpu_device) actions = cuda.to_gpu(actions, self._gpu_device) q = self._model(x) q_subset = F.reshape(F.select_item(q, actions), (batch_size, 1)) y = y.reshape(batch_size, 1) loss = F.sum(F.huber_loss(q_subset, y, 1.0)) self._model.cleargrads() loss.backward() self._optimizer.update() self._loss_val = np.asscalar(cuda.to_cpu(loss.data)) # Keeps track of the number of train() calls self._steps += 1 if self._steps % self._target_update_interval == 0: # copy weights self._target.copyparams(self._model)
def converter(xs, device): if device is None: return xs elif device < 0: return map(lambda x: map(lambda m: cuda.to_cpu(m), x), xs) else: return map(lambda x: map( lambda m: cuda.to_gpu(m, device, cuda.Stream.null), x), xs)