我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用chainer.cuda.get_device()。
def check_gpu(self, gpu): """Check cuda.cupy Example: :: gpu = 0 self.check_gpu(gpu) Args: gpu (int): gpu id """ if gpu >= 0: cuda.get_device(gpu).use() self.to_gpu(gpu) return True return False
def __init__(self, *, iterator, noise_iterator, optimizer_generator, optimizer_discriminator, generator_lr_decay_interval, discriminator_lr_decay_interval, gamma, k_0, lambda_k, loss_norm, device=-1): iterators = {'main': iterator, 'z': noise_iterator} optimizers = {'gen': optimizer_generator, 'dis': optimizer_discriminator} super().__init__(iterators, optimizers, device=device) self.gen_lr_decay_interval = generator_lr_decay_interval self.dis_lr_decay_interval = discriminator_lr_decay_interval self.k = k_0 self.lambda_k = lambda_k self.gamma = gamma self.loss_norm = loss_norm if device >= 0: cuda.get_device(device).use() for optimizer in optimizers.values(): optimizer.target.to_gpu()
def init_state(self, param, state): """Initializes the optimizer state corresponding to the parameter. This method should add needed items to the ``state`` dictionary. Each optimizer implementation that uses its own states should override this method or CPU/GPU dedicated versions (:meth:`init_state_cpu` and :meth:`init_state_gpu`). Args: param (~chainer.Variable): Parameter variable. state (dict): State dictionary. .. seealso:: :meth:`init_state_cpu`, :meth:`init_state_gpu` """ with cuda.get_device(param.data) as dev: if int(dev) == -1: self.init_state_cpu(param, state) else: self.init_state_gpu(param, state)
def test_copydata_gpu_to_another_gpu(self): cp = cuda.cupy with cuda.get_device(0): data1 = cp.zeros(3, dtype=np.float32) expect = cp.ones(3, dtype=np.float32) with cuda.get_device(1): data2 = cp.ones(3, dtype=np.float32) self.check_copydata(data1, data2, expect)
def __init__(self, *, iterator, noise_iterator, optimizer_generator, optimizer_critic, device=-1): if optimizer_generator.target.name is None: optimizer_generator.target.name = 'generator' if optimizer_critic.target.name is None: optimizer_critic.target.name = 'critic' iterators = {'main': iterator, 'z': noise_iterator} optimizers = {'generator': optimizer_generator, 'critic': optimizer_critic} super().__init__(iterators, optimizers, device=device) if device >= 0: cuda.get_device(device).use() [optimizer.target.to_gpu() for optimizer in optimizers.values()] self.xp = cuda.cupy if device >= 0 else np
def __init__(self,gpu,batchsize,data_dir,dataset,net,mode,epochs,save_every,size,**kwargs): super(Network, self).__init__(epochs,save_every) print "building ..." self.input_height=size self.input_width=size self.net = net self.mode=mode self.dataset=dataset self.train_data, self.test_data=self.get_dataset(data_dir,dataset) print 'input_channel ==> %d using %s dataset'%(self.in_channel, self.dataset) self.enc = GoogLeNet() self.dec = Decoder(self.in_size) self.xp = cuda.cupy cuda.get_device(gpu).use() self.enc.to_gpu() self.dec.to_gpu() self.o_dec = optimizers.RMSpropGraves() self.o_dec.setup(self.dec) self.batchsize=batchsize
def __call__(self, opt): if cuda.available: kernel = cuda.elementwise( 'T low, T high', 'T p', 'p = (p < low) ? low : (p > high) ? high : p', 'weight_clip') for link in opt.target.links(): # only apply to binary layers if getattr(link,'cname',False): for param in link.params(): p = param.data with cuda.get_device(p) as dev: if int(dev) == -1: numpy.clip(p, self.low, self.high) else: kernel(self.low, self.high, p)
def entropy_filter(self, x, b, ent_T): xp = cuda.get_array_module(b) eb = entropy(F.softmax(b))/np.log(b.shape[1]) eb.to_cpu() if hasattr(eb.data,'get'): with cuda.get_device(eb.data): exited = eb.data < ent_T exited = exited.get() else: exited = eb.data < ent_T y_exit = [] y_cont = [] for i,idx in enumerate(exited): if idx: y_exit.append(b[i:i+1]) else: y_cont.append(x[i:i+1]) if len(y_exit) > 0: y_exit = F.vstack(y_exit) if len(y_cont) > 0: y_cont = F.vstack(y_cont) return y_exit,y_cont,exited
def printAllParameters(self, optimizer, init_type="***", init_scale=1.0): total_norm = 0 total_param = 0 named_params = sorted( optimizer.target.namedparams(), key=lambda x: x[0]) for n, p in named_params: t_norm = chainer.optimizer._sum_sqnorm(p.data) sys.stdout.write( '### {} {} {} {} {}\n'.format( p.name, p.data.ndim, p.data.shape, p.data.size, t_norm)) total_norm += t_norm total_param += p.data.size with cuda.get_device(total_norm): sys.stdout.write( '# param size= [{}] norm = [{}] scale=[{}, {}]\n'.format( total_param, self.model.xp.sqrt(total_norm), init_type, init_scale)) ############################################### # ??????????????? ?? ????backward???????
def __call__(self, x): """Applies the graph convolutional layer. Args: x: (~chainer.Variable): Input graph signal. Returns: ~chainer.Variable: Output of the graph convolution. """ if self.has_uninitialized_params: with cuda.get_device(self._device_id): self._initialize_params(x.shape[1]) if self.b is None: return self.func(x, self.W) else: return self.func(x, self.W, self.b)
def forward_gpu(self, inputs): x = inputs[0] xp = cuda.get_array_module(x) n_batch, c, N = x.shape N_coarse = len(self.pooling_inds) with cuda.get_device(x.data): x = x.transpose((2, 1, 0)) p_dim = self.pooling_inds.shape[1] y = xp.empty((N_coarse, c, n_batch), dtype=x.dtype) self.max_inds = xp.empty((N_coarse, c, n_batch), dtype=np.int32) pooling_inds = cuda.to_gpu(self.pooling_inds) gpu_graphpool_fwd(N_coarse, p_dim, pooling_inds, x, y, self.max_inds) y = y.transpose((2, 1, 0)) return y,
def __call__(self, x): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ if self.has_uninitialized_params: with cuda.get_device(self._device_id): self._initialize_params(x.size // x.shape[0]) return linear.linear(x, self.W, self.b)
def setup_model(args): if args.model_type.lower() == "lstm": model = LSTM(args) else: print('set valid model type name') exit() optimizer = model.setup_optimizer() if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() return model, optimizer
def multi_box_iou(a, b): with cuda.get_device(a.x): return multi_box_intersection(a, b) / multi_box_union(a, b)
def predict(self, input_x): if isinstance(input_x, chainer.Variable): device = cuda.get_device(input_x.data) else: device = cuda.get_device(input_x) xp = self.predictor.xp with device: output = self.predictor(input_x) batch_size, input_channel, input_h, input_w = input_x.shape batch_size, _, grid_h, grid_w = output.shape x, y, w, h, conf, prob = F.split_axis(F.reshape(output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes+5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2) x = F.sigmoid(x) y = F.sigmoid(y) conf = F.sigmoid(conf) prob = F.transpose(prob, (0, 2, 1, 3, 4)) prob = F.softmax(prob) prob = F.transpose(prob, (0, 2, 1, 3, 4)) # convert coordinates to those on the image x_shift = xp.asarray(np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape)) y_shift = xp.asarray(np.broadcast_to(np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape)) w_anchor = xp.asarray(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape)) h_anchor = xp.asarray(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape)) box_x = (x + x_shift) / grid_w box_y = (y + y_shift) / grid_h box_w = F.exp(w) * w_anchor / grid_w box_h = F.exp(h) * h_anchor / grid_h return box_x, box_y, box_w, box_h, conf, prob
def __call__(self, opt): if cuda.available: kernel = cuda.elementwise( 'T low, T high', 'T p', 'p = (p < low) ? low : (p > high) ? high : p', 'weight_clip') for param in opt.target.params(): p = param.data with cuda.get_device(p) as dev: if int(dev) == -1: numpy.clip(p, self.low, self.high) else: kernel(self.low, self.high, p)
def __call__(self, opt): if cuda.available: kernel = cuda.elementwise( 'T p, T decay', 'T g', 'g += decay * p', 'weight_decay') rate = self.rate for name, param in opt.target.namedparams(): if name == 'b' or name.endswith('/b'): continue p, g = param.data, param.grad with cuda.get_device(p) as dev: if int(dev) == -1: g += rate * p else: kernel(p, rate, g)
def __init__(self, model, target, device_id=-1, learning_rate=0.00025, momentum=.9, minibatch_size=32, update_interval=10000): assert isinstance(model, ChainerModel), \ 'model should inherit from ChainerModel' super(QNeuralNetwork, self).__init__(model.input_shape, model.output_shape) self._gpu_device = None self._loss_val = 0 # Target model update method self._steps = 0 self._target_update_interval = update_interval # Setup model and target network self._minibatch_size = minibatch_size self._model = model self._target = target self._target.copyparams(self._model) # If GPU move to GPU memory if device_id >= 0: with cuda.get_device(device_id) as device: self._gpu_device = device self._model.to_gpu(device) self._target.to_gpu(device) # Setup optimizer self._optimizer = Adam(learning_rate, momentum, 0.999) self._optimizer.setup(self._model)
def _concat_arrays(arrays): xp = cuda.get_array_module(arrays[0]) with cuda.get_device(arrays[0]): return xp.concatenate([array[None] for array in arrays])
def main(): args = parse_args() gen1 = net.Generator1() chainer.serializers.load_npz(args.model_path, gen1) device_id = None if args.gpu >= 0: device_id = args.gpu cuda.get_device(device_id).use() gen1.to_gpu(device_id) out_vector_path = None np.random.seed(1) if args.vector_file1 and args.vector_index1 >= 0 and args.vector_file2 and args.vector_index2 >= 0: with open(args.vector_file1, 'rb') as f: z = np.load(f) z1 = z[args.vector_index1] with open(args.vector_file2, 'rb') as f: z = np.load(f) z2 = z[args.vector_index2] w = np.arange(10).astype(np.float32).reshape((-1, 1)) / 9 z = (1 - w) * z1 + w * z2 z = z / (np.linalg.norm(z, axis=1, keepdims=True) + 1e-12) else: z = np.random.normal(0, 1, (100, latent_size)).astype(np.float32) out_vector_path = '{}.npy'.format(args.output) z = z / (np.linalg.norm(z, axis=1, keepdims=True) + 1e-12) with chainer.no_backprop_mode(): if device_id is None: x = gen1(z, train=False) else: x = gen1(cuda.to_gpu(z, device_id), train=False) x = cuda.to_cpu(x.data) batch, ch, h, w = x.shape x = x.reshape((-1, 10, ch, h, w)).transpose((0, 3, 1, 4, 2)).reshape((-1, 10 * w, ch)) x = ((x + 1) * 127.5).clip(0, 255).astype(np.uint8) Image.fromarray(x).save('{}.jpg'.format(args.output)) if out_vector_path: with open(out_vector_path, 'wb') as f: np.save(f, z)
def init_state(self, param): with cuda.get_device(param.data): self.state['s'] = []
def to_gpu(self, device=None): """Copies parameter variables and persistent values to GPU. This method does not handle non-registered attributes. If some of such attributes must be copied to GPU, the link implementation must override this method to do so. Args: device: Target device specifier. If omitted, the current device is used. Returns: self """ cuda.check_cuda_available() if not self._cpu: return self d = self.__dict__ with cuda.get_device(device): for name in self._params: d[name].to_gpu() for name in self._persistent: value = d[name] if isinstance(value, numpy.ndarray): d[name] = cuda.to_gpu(value) self._cpu = False return self
def to_gpu(self, device=None): with cuda.get_device(device): super(Chain, self).to_gpu() d = self.__dict__ for name in self._children: d[name].to_gpu() return self
def to_gpu(self, device=None): with cuda.get_device(device): super(ChainList, self).to_gpu() for link in self._children: link.to_gpu() return self
def to_gpu(self, device=None): with cuda.get_device(device): self.paths = cuda.to_gpu(self.paths) self.codes = cuda.to_gpu(self.codes) self.begins = cuda.to_gpu(self.begins)
def to_gpu(self, device=None): with cuda.get_device(device): super(NegativeSampling, self).to_gpu() self.sampler.to_gpu()
def init_state(self, param, state): xp = cuda.get_array_module(param.data) with cuda.get_device(param.data): state['ms'] = xp.zeros_like(param.data)
def init_state(self, param, state): xp = cuda.get_array_module(param.data) with cuda.get_device(param.data): state['n'] = xp.zeros_like(param.data) state['g'] = xp.zeros_like(param.data) state['delta'] = xp.zeros_like(param.data)
def init_state(self, param, state): xp = cuda.get_array_module(param.data) with cuda.get_device(param.data): state['v'] = xp.zeros_like(param.data)
def init_state(self, param, state): xp = cuda.get_array_module(param.data) with cuda.get_device(param.data): state['m'] = xp.zeros_like(param.data) state['v'] = xp.zeros_like(param.data)
def init_state(self, param, state): xp = cuda.get_array_module(param.data) with cuda.get_device(param.data): state['h'] = xp.zeros_like(param.data)
def init_state(self, param, state): data = param.data xp = cuda.get_array_module(data) with cuda.get_device(data): state['msg'] = xp.zeros_like(data) state['msdx'] = xp.zeros_like(data)
def _sum_sqnorm(arr): sq_sum = collections.defaultdict(float) for x in arr: with cuda.get_device(x) as dev: x = x.ravel() s = x.dot(x) sq_sum[int(dev)] += s return sum([float(i) for i in six.itervalues(sq_sum)])
def prepare(self): """Prepares for an update. This method initializes missing optimizer states (e.g. for newly added parameters after the set up), and copies arrays in each state dictionary to CPU or GPU according to the corresponding parameter array. """ states = self._states for name, param in self.target.namedparams(): if name not in states: state = {} self.init_state(param, state) states[name] = state else: state = states[name] with cuda.get_device(param.data) as dev: if int(dev) == -1: # cpu for key, value in six.iteritems(state): if isinstance(value, cuda.ndarray): state[key] = value.get() else: # gpu cupy = cuda.cupy for key, value in six.iteritems(state): if isinstance(value, numpy.ndarray): state[key] = cuda.to_gpu(value) elif (isinstance(value, cupy.ndarray) and value.device != dev): state[key] = cupy.copy(value)
def update(self, lossfun=None, *args, **kwds): """Updates parameters based on a loss function or computed gradients. This method runs in two ways. - If ``lossfun`` is given, then use it as a loss function to compute gradients. - Otherwise, this method assumes that the gradients are already computed. In both cases, the computed gradients are used to update parameters. The actual update routines are defined by the :meth:`update_one` method (or its CPU/GPU versions, :meth:`update_one_cpu` and :meth:`update_one_gpu`). """ if lossfun is not None: self.target.zerograds() loss = lossfun(*args, **kwds) loss.backward() del loss self.call_hooks() self.prepare() self.t += 1 states = self._states for name, param in self.target.namedparams(): with cuda.get_device(param.data): self.update_one(param, states[name])
def __call__(self, opt): if cuda.available: kernel = cuda.elementwise( 'T p, T decay', 'T g', 'g += decay * p', 'weight_decay') rate = self.rate for param in opt.target.params(): p, g = param.data, param.grad with cuda.get_device(p) as dev: if int(dev) == -1: g += rate * p else: kernel(p, rate, g)
def __call__(self, opt): if cuda.available: kernel = cuda.elementwise( 'T s, T decay', 'T g', 'g += decay * s', 'lasso') rate = self.rate for param in opt.target.params(): p, g = param.data, param.grad xp = cuda.get_array_module(p) sign = xp.sign(p) with cuda.get_device(p) as dev: if int(dev) == -1: g += rate * sign else: kernel(sign, rate, g)
def __call__(self, opt): norm = numpy.sqrt(_sum_sqnorm([p.grad for p in opt.target.params()])) rate = self.threshold / norm if rate < 1: for param in opt.target.params(): grad = param.grad with cuda.get_device(grad): grad *= rate
def backward_gpu(self, x, gy): if self.out_device == -1: return cuda.to_gpu(gy[0], device=cuda.get_device(x[0])), else: return cuda.copy(gy[0], out_device=cuda.get_device(x[0])),
def to_gpu(self, device=None): """Copies the data and gradient arrays to specified GPU. Args: device: Target device specifier. If omitted, the current device is used. """ with cuda.get_device(device): self.data = cuda.to_gpu(self.data) if self._grad is not None: self._grad = cuda.to_gpu(self._grad)
def zerograd(self): """Initializes the gradient array by zeros.""" with cuda.get_device(self.data) as dev: if self._grad is None: xp = numpy if int(dev) == -1 else cuda.cupy self._grad = xp.zeros_like(self.data) else: self._grad.fill(0)
def accuracy_gpu(self, device=None): model = self.model optimizer = self.optimizer model.to_gpu(device=device) optimizer.setup(model) with cuda.get_device(device): return self._train_linear_classifier(model, optimizer, True)
def test_get_dummy_device(self): if not cuda.available: self.assertIs(cuda.get_device(), cuda.DummyDevice)
def test_get_dummy_device_for_empty_array(self): x = cuda.cupy.array([]).reshape((0, 10)) self.assertIs(cuda.get_device(x), cuda.DummyDevice)
def test_to_gpu_from_another_gpu(self): cp = cuda.cupy a = chainer.Variable(cp.zeros(3, dtype=np.float32)) a.grad = cuda.cupy.ones_like(a.data) b = a.data.copy() gb = a.grad.copy() a.to_gpu(1) self.assertEqual(int(cuda.get_device(a.data)), 1) self.assertEqual(int(cuda.get_device(a.grad)), 1) cp.testing.assert_array_equal(a.data, b) cp.testing.assert_array_equal(a.grad, gb)
def test_zerograds_multi_gpu(self): cupy = cuda.cupy with cuda.get_device(1): a = chainer.Variable(cupy.empty(3, dtype=np.float32)) a.zerograd() self.assertIsNot(a.grad, None) self.assertEqual(int(a.grad.device), 1) with cuda.get_device(1): g_expect = cupy.zeros_like(a.data) cupy.testing.assert_array_equal(a.grad, g_expect)
def test_zerograds_fill_multi_gpu(self): cupy = cuda.cupy with cuda.get_device(1): a = chainer.Variable(cupy.empty(3, dtype=np.float32)) a.grad = cupy.empty_like(a.data) a.zerograd() self.assertEqual(int(a.grad.device), 1) with cuda.get_device(1): g_expect = cupy.zeros_like(a.data) cupy.testing.assert_array_equal(a.grad, g_expect)