我们从Python开源项目中,提取了以下35个代码示例,用于说明如何使用chainer.cuda()。
def __init__(self, gpu=-1, optimizer=None, model=None, content_weight=1, texture_weight=1, average_pooling=False): self.content_weight = content_weight self.texture_weight = texture_weight self.average_pooling = average_pooling if optimizer is None: self.optimizer = chainer.optimizers.Adam(alpha=4.0) else: self.optimizer = optimizer if model is None: self.model = neural_art.utility.load_nn("vgg") else: self.model = model if gpu >= 0: chainer.cuda.get_device(gpu).use() self.xp = chainer.cuda.cupy self.model.model.to_gpu() else: self.xp = numpy
def __init__(self, texture_imgs, model, gpu, optimizer, content_weight=1, texture_weight=1): """ :type converter: multi_reference_image_converter.MultiReferenceImageConverter """ self.converter = neural_art.image_converters.MultiReferenceImageConverter( texture_imgs, gpu=gpu, content_weight=content_weight, texture_weight=1, model=model, average_pooling=True) self.model = model self.optimizer = optimizer self.content_weight = content_weight self.texture_weight = texture_weight if gpu >= 0: chainer.cuda.get_device(gpu).use() self.xp = chainer.cuda.cupy self.model.model.to_gpu() else: self.xp = numpy
def classify(self,x=None): if x is None: x=Tensor.context if not isinstance(x,ImageTensor): x=Input(x) xp = Deel.xp x_data = xp.asarray(self.x_batch) xv = chainer.Variable(x.value, volatile=True) h, w = xv.data.shape[2:] cls_score, bbox_pred = self.func(xv,np.array([[h, w, x.im_scale]])) draw_rois(x.content,x.im_scale,self.func.rois,bbox_pred,cls_score.data) if Deel.gpu >= 0: cls_score = chainer.cuda.cupy.asnumpy(cls_score) bbox_pred = chainer.cuda.cupy.asnumpy(bbox_pred) result = draw_result(x.content, 1.0, cls_score.data, bbox_pred,0.3,0.8) cv.imshow("res",result) cv.waitKey(0)
def out_generated_image(gen, dis, rows, cols, seed, dst): @chainer.training.make_extension() def make_image(trainer): np.random.seed(seed) n_images = rows * cols xp = gen.xp z = Variable(xp.asarray(gen.make_hidden(n_images))) with chainer.using_config('train', False): x = gen(z) x = chainer.cuda.to_cpu(x.data) np.random.seed() x = np.asarray(np.clip(x * 255, 0.0, 255.0), dtype=np.uint8) _, _, H, W = x.shape x = x.reshape((rows, cols, 3, H, W)) x = x.transpose(0, 3, 1, 4, 2) x = x.reshape((rows * H, cols * W, 3)) preview_dir = '{}/preview'.format(dst) preview_path = preview_dir +\ '/image{:0>8}.png'.format(trainer.updater.iteration) if not os.path.exists(preview_dir): os.makedirs(preview_dir) Image.fromarray(x).save(preview_path) return make_image
def check_crossing_model(gpu): communicator, rank_next, rank_prev = create_communicator(gpu) n, d = 100, 10 X = np.random.randn(n, d).astype(np.float32) Y = (np.random.rand(n) * 2).astype(np.int32) if communicator.rank == 0: model = L.Classifier(Cross0( d, communicator, rank_next, rank_prev)) else: model = L.Classifier(Cross1( d, communicator, rank_next, rank_prev)) if gpu: model.to_gpu() X = chainer.cuda.to_gpu(X) Y = chainer.cuda.to_gpu(Y) for i in range(n): err = model(X[i:i + 1], Y[i:i + 1]) err.backward()
def create_communicator(param, use_gpu): if not param.multi_node: ranks = _communication_utility.init_ranks(mpi_comm) inter_size = ranks[4] if inter_size > 1: pytest.skip('This test is for single node only') if use_gpu and not param.nccl1 and nccl.get_version() < 2000: pytest.skip('This test requires NCCL version >= 2.0') communicator = param.communicator_class(mpi_comm) if hasattr(communicator, 'intra_rank'): chainer.cuda.get_device(communicator.intra_rank).use() return communicator
def forward_gpu(self, inputs): x = inputs[0] xp = cuda.get_array_module(x) n_batch, c, N = x.shape N_coarse = len(self.pooling_inds) with cuda.get_device(x.data): x = x.transpose((2, 1, 0)) p_dim = self.pooling_inds.shape[1] y = xp.empty((N_coarse, c, n_batch), dtype=x.dtype) self.max_inds = xp.empty((N_coarse, c, n_batch), dtype=np.int32) pooling_inds = cuda.to_gpu(self.pooling_inds) gpu_graphpool_fwd(N_coarse, p_dim, pooling_inds, x, y, self.max_inds) y = y.transpose((2, 1, 0)) return y,
def forward_gpu(self, inputs): x, W = inputs[:2] n_batch, c_in, N = x.shape b = inputs[2] if len(inputs) == 3 else None xp = cuda.get_array_module(x) with cuda.get_device(x.data): K = self.K LmI_data, LmI_indices, LmI_indptr = self.LmI_tuple if x.dtype != LmI_data.dtype: LmI_data = LmI_data.astype(x.dtype) C = xp.empty((K, N, c_in, n_batch), dtype=x.dtype) chebyshev_matvec_gpu(C, x, K, n_batch, LmI_data, LmI_indices, LmI_indptr) C = C.transpose((3, 2, 0, 1)) self.C = C y = xp.tensordot(C, W, ((1, 2), (1, 2))) if b is not None: y += b return xp.rollaxis(y, 2, 1), # y.shape = (n_batch, c_out, N)
def test_forward_consistency(self, nobias=False): x_cpu = chainer.Variable(self.x) W_cpu = chainer.Variable(self.W) b_cpu = None if nobias else chainer.Variable(self.b) func_cpu = graph_convolution.GraphConvolutionFunction(self.L, self.K) func_cpu.to_cpu() args_cpu = (x_cpu, W_cpu) if b_cpu is not None: args_cpu += (b_cpu, ) y_cpu = func_cpu(*args_cpu) x_gpu = chainer.Variable(cuda.to_gpu(self.x)) W_gpu = chainer.Variable(cuda.to_gpu(self.W)) b_gpu = None if nobias else chainer.Variable(cuda.to_gpu(self.b)) func_gpu = graph_convolution.GraphConvolutionFunction(self.L, self.K) func_gpu.to_gpu() args_gpu = (x_gpu, W_gpu) if b_gpu is not None: args_gpu += (b_gpu, ) y_gpu = func_gpu(*args_gpu) testing.assert_allclose( y_cpu.data, y_gpu.data.get(), **self.check_forward_options)
def convert_debug(self, content_img, init_img, output_directory, max_iteration=1000, debug_span=100, random_init=False): init_array = self.xp.array(neural_art.utility.img2array(init_img)) if random_init: init_array = self.xp.array(self.xp.random.uniform(-20, 20, init_array.shape), dtype=init_array.dtype) content_array = self.xp.array(neural_art.utility.img2array(content_img)) content_layers = self.model.forward_layers(chainer.Variable(content_array), average_pooling=self.average_pooling) parameter_now = chainer.links.Parameter(init_array) self.optimizer.setup(parameter_now) for i in range(max_iteration + 1): neural_art.utility.print_ltsv({"iteration": i}) if i % debug_span == 0 and i > 0: print("dump to {}".format(os.path.join(output_directory, "{}.png".format(i)))) neural_art.utility.array2img(chainer.cuda.to_cpu(parameter_now.W.data)).save( os.path.join(output_directory, "{}.png".format(i))) parameter_now.zerograds() x = parameter_now.W layers = self.model.forward_layers(x, average_pooling=self.average_pooling) loss_texture = self._texture_loss(layers) loss_content = self._contents_loss(layers, content_layers) loss = self.texture_weight * loss_texture + self.content_weight * loss_content loss.backward() parameter_now.W.grad = x.grad self.optimizer.update() return neural_art.utility.array2img(chainer.cuda.to_cpu(parameter_now.W.data))
def __init__(self, config, Network): InitOpt= config.get('network', 'init_opt') InitOpt= [int(x) for x in InitOpt.split(';')] if config.getboolean('gpu', 'use'): list_gpu = config.get('gpu', 'index') print('Configuring the training for GPU calculation:') print(' using gpus: {}'.format(list_gpu)) self.list_gpu = [int(x) for x in list_gpu.split(';') ] chainer.cuda.get_device(self.list_gpu[0]).use() self.xp = chainer.cuda self.Networks = [Network(InitOpt)] else: print('Configuring the training for CPU calculation:') self.xp = np self.list_gpu = [] self.Networks[0].train = True self.Optimizer = optimizers.Adam(alpha=config.getfloat('train', 'learning_rate')) #TODO: Set type of Optimizer on Config File _inputs = config.get('data', 'labels') _inputs = [ x for x in _inputs.split(';')] self._inputs = len( _inputs) self._gaussian = config.getboolean('train', 'gaussian') if self._gaussian: self.eta = config.getfloat('train', 'eta_gn') self._lasso = config.getboolean('train', 'lasso') if self._lasso: self.lasso_dy = config.getfloat('train', 'decay_lasso') try: #only set on Recurrent Network self.sequence = config.getint('data', 'sequence') self.clip_threshold = config.getfloat('train', 'clip_threshold') self._use_clip = config.getboolean('train', 'use_clip') self._lstm = True print(' Setting Network for Sequential Training...') except: self._use_clip = False self._lstm = False self.train = False return
def test_gpu0(self): environment.init_gpu(0) self.assertEqual(environment.array_module(), chainer.cuda.cupy)
def test_nogpu(self): cupy = None if hasattr(chainer.cuda, "cupy"): cupy = getattr(chainer.cuda, "cupy") delattr(chainer.cuda, "cupy") environment.init_gpu(0) self.assertEqual(environment.array_module(), numpy) if cupy is not None: setattr(chainer.cuda, "cupy", cupy)
def test_random_gpu(self): environment.init_gpu(0) environment.init_random(self.seed) self.assertAlmostEqual(float(environment.array_module().random.random()), 0.5405254640354904) # TODO(philip30): # Seems that the cuda.cupy.random draws from a different distribution than # the one in numpy. For now it is important to first init all model using # One of the module and convert to the other to ensure that it is producing # the same result every time training is conducted
def test_forward_gpu(self): self.link.to_gpu() self.check_forward(cuda.to_gpu(self.x1), cuda.to_gpu(self.x2))
def test_backward_gpu(self): self.link.to_gpu() self.check_backward(cuda.to_gpu(self.x1), cuda.to_gpu(self.x2), cuda.to_gpu(self.gy))
def init_random(seed): if seed != 0: if hasattr(chainer.cuda, "cupy"): chainer.cuda.cupy.random.seed(seed) numpy.random.seed(seed)
def create_communicator(gpu): if gpu: communicator = chainermn.create_communicator('hierarchical') chainer.cuda.get_device(communicator.intra_rank).use() else: communicator = chainermn.create_communicator('naive') if communicator.size < 2: pytest.skip("This test is for multinode only") rank_next = (communicator.rank + 1) % communicator.size rank_prev = (communicator.rank - 1) % communicator.size return communicator, rank_next, rank_prev
def check_cycle_model(gpu): communicator, rank_next, rank_prev = create_communicator(gpu) n, d = 100, 10 if communicator.rank == 0: X = np.random.randn(n, d).astype(np.float32) Y = (np.random.rand(n) * 2).astype(np.int32) model = L.Classifier( Cycle0(d, communicator, rank_next, rank_prev)) if gpu: model.to_gpu() X = chainer.cuda.to_gpu(X) Y = chainer.cuda.to_gpu(Y) for i in range(n): err = model(X[i:i + 1], Y[i:i + 1]) err.backward() else: model = Cycle1( d, communicator, rank_next, rank_prev) if gpu: model.to_gpu() for i in range(n): err = model() err.backward()
def check_branching_model(gpu, communicator, rank_next, rank_prev, parent_model): n, d = 100, 10 X = np.random.randn(n, d).astype(np.float32) Y = (np.random.rand(n) * 2).astype(np.int32) if communicator.rank == 0: rank_children = [rank for rank in range(1, communicator.size)] model = L.Classifier(parent_model( d, communicator, rank_children)) if gpu: model.to_gpu() X = chainer.cuda.to_gpu(X) Y = chainer.cuda.to_gpu(Y) for i in range(n): err = model(X[i:i + 1], Y[i:i + 1]) err.backward() else: model = BranchChild(d, communicator, 0) if gpu: model.to_gpu() for i in range(n): err = model() err.backward()
def check_tuple_data_model(gpu): # This test only uses pairs (0, 1), (2, 3), ... (2m, 2m+1) communicator, rank_next, rank_prev = create_communicator(gpu) n, d = 100, 10 X = np.random.randn(n, d).astype(np.float32) Y = (np.random.rand(n) * 2).astype(np.int32) if communicator.rank % 2 == 0: if communicator.rank == communicator.size - 1: # in case 2m is the right end with odd number of nodes return model = L.Classifier( TupleDataParent(communicator, d, rank_next)) elif communicator.rank % 2 == 1: model = TupleDataChild(communicator, d, rank_prev) assert model is not None if gpu: model.to_gpu() X = chainer.cuda.to_gpu(X) Y = chainer.cuda.to_gpu(Y) for i in range(n): if communicator.rank % 2 == 0: err = model(X[i:i + 1], Y[i:i + 1]) elif communicator.rank % 2 == 1: err = model() assert err is not None err.backward()
def backward_gpu(self, inputs, grad_outputs): x = inputs[0] xp = cuda.get_array_module(x) n_batch, c_in, N = x.shape with cuda.get_device(x.data): x = x.transpose((2, 1, 0)) gy = grad_outputs[0] N_coarse = gy.shape[2] gy = gy.transpose((2, 1, 0)) gx = xp.zeros((N, c_in, n_batch), dtype=x.dtype) gpu_graphpool_bwd(N, N_coarse, self.max_inds, gy, gx) gx = gx.transpose((2, 1, 0)) return gx,
def to_cpu(self): self.LmI_tuple = tuple(map(cuda.to_cpu, self.LmI_tuple))
def to_gpu(self, device=None): with cuda.get_device(device): self.LmI_tuple = tuple(map(cuda.to_gpu, self.LmI_tuple))
def check_backward(self, x_data, W_data, b_data, y_grad, use_gpu=False): xp = cuda.get_array_module(x_data) if not self.c_contiguous: x_data = xp.asfortranarray(x_data) W_data = xp.asfortranarray(W_data) y_grad = xp.asfortranarray(y_grad) self.assertFalse(x_data.flags.c_contiguous) self.assertFalse(W_data.flags.c_contiguous) self.assertFalse(y_grad.flags.c_contiguous) if b_data is not None: b = xp.empty((len(b_data) * 2,), dtype=self.b.dtype) b[::2] = b_data b_data = b[::2] self.assertFalse(b_data.flags.c_contiguous) func = graph_convolution.GraphConvolutionFunction(self.L, self.K) if use_gpu: func.to_gpu() args = (x_data, W_data) if b_data is not None: args = args + (b_data,) gradient_check.check_backward( func, args, y_grad, **self.check_backward_options )
def test_backward_gpu(self): self.check_backward(cuda.to_gpu(self.x), cuda.to_gpu(self.W), cuda.to_gpu(self.b), cuda.to_gpu(self.gy), use_gpu=True)
def test_backward_gpu_nobias(self): self.check_backward(cuda.to_gpu(self.x), cuda.to_gpu(self.W), None, cuda.to_gpu(self.gy), use_gpu=True)
def test_csr_matvec(self): import scipy.sparse x = np.random.randn(5) A = scipy.sparse.csr_matrix( np.random.randn(4, 5).reshape((4, 5)), dtype=x.dtype) y = A.dot(x) y_gpu = cupy.empty((4,), dtype=x.dtype) A_data = cuda.to_gpu(A.data) A_indices = cuda.to_gpu(A.indices) A_indptr = cuda.to_gpu(A.indptr) x_gpu = cuda.to_gpu(x) graph_convolution.csr_matvec(y.shape[0], A_data, A_indices, A_indptr, x_gpu, y_gpu) testing.assert_allclose( y, cuda.to_cpu(y_gpu), **self.check_forward_options)
def out_generated_image(gen, dis, rows, cols, seed, dst): @chainer.training.make_extension() def make_image(trainer): np.random.seed(seed) n_images = rows * cols xp = gen.xp z = Variable(xp.asarray(gen.make_hidden(n_images))) with chainer.using_config('train', False): x = gen(z) x = chainer.cuda.to_cpu(x.data) np.random.seed() # gen_output_activation_func is sigmoid x = np.asarray(np.clip(x * 255, 0.0, 255.0), dtype=np.uint8) # gen output_activation_func is tanh #x = np.asarray(np.clip((x+1) * 0.5 * 255, 0.0, 255.0), dtype=np.uint8) _, _, H, W = x.shape x = x.reshape((rows, cols, 1, H, W)) x = x.transpose(0, 3, 1, 4, 2) x = x.reshape((rows * H, cols * W)) preview_dir = '{}/preview_LSGAN_pixel_shuffler'.format(dst) preview_path = preview_dir +\ '/image{:0>8}.png'.format(trainer.updater.iteration) if not os.path.exists(preview_dir): os.makedirs(preview_dir) Image.fromarray(x).save(preview_path) return make_image
def out_generated_image(gen, dis, rows, cols, seed, dst): @chainer.training.make_extension() def make_image(trainer): np.random.seed(seed) n_images = rows * cols xp = gen.xp z = Variable(xp.asarray(gen.make_hidden(n_images))) #z = Variable(xp.zeros((n_images, 100, 1), dtype=xp.float32)) label = [i for i in range(rows) for j in range(cols)] with chainer.using_config('train', False): x = gen(z, label) x = chainer.cuda.to_cpu(x.data) np.random.seed() # gen_output_activation_func is sigmoid x = np.asarray(np.clip(x * 255, 0.0, 255.0), dtype=np.uint8) # gen output_activation_func is tanh #x = np.asarray(np.clip((x+1) * 0.5 * 255, 0.0, 255.0), dtype=np.uint8) _, _, H, W = x.shape x = x.reshape((rows, cols, 1, H, W)) x = x.transpose(0, 3, 1, 4, 2) x = x.reshape((rows * H, cols * W)) preview_dir = '{}/preview'.format(dst) preview_path = preview_dir +\ '/image{:0>6}.png'.format(trainer.updater.iteration) if not os.path.exists(preview_dir): os.makedirs(preview_dir) Image.fromarray(x).save(preview_path) return make_image
def convert_debug(self, content_img, init_img, output_directory, max_iteration=1000, debug_span=100, random_init=False, xsplit=3, ysplit=3, overwrap=50, average_pooling=False): init_array = self.xp.array(neural_art.utility.img2array(init_img)) content_array = neural_art.utility.img2array(content_img) if random_init: init_array = self.xp.random.uniform(-20, 20, init_array.shape, dtype=init_array.dtype) subrects = [] ### (step-wrap)*(split-1) = w-step xstep = (init_array.shape[2]+(xsplit-1)*overwrap-1) / xsplit ystep = (init_array.shape[3]+(ysplit-1)*overwrap-1) / ysplit for x in range(0, init_array.shape[2]-xstep, xstep-overwrap): for y in range(0, init_array.shape[3]-ystep, ystep-overwrap): subrects.append((x, y, x+xstep, y+ystep)) rects_content_layers = [] target_texture_ratios = [] for x1, y1, x2, y2 in subrects: subimg = self.xp.asarray(content_array[:, :, x1:x2, y1:y2]) layers = self.model.forward_layers(chainer.Variable(subimg, volatile=True)) texture_feature = self.converter._to_texture_feature(layers) target_texture_ratio = self.converter.optimize_texture_feature(texture_feature) target_texture_ratios.append(target_texture_ratio) parameter_now = chainer.links.Parameter(init_array) self.optimizer.setup(parameter_now) for i in xrange(max_iteration+1): neural_art.utility.print_ltsv({"iteration": i}) if i % debug_span == 0 and i > 0: print("save") neural_art.utility.array2img(chainer.cuda.to_cpu(parameter_now.W.data)).save( os.path.join(output_directory, "{}.png".format(i))) parameter_now.zerograds() for (x1, y1, x2, y2), target_texture_ratio in zip(subrects, target_texture_ratios): subimg = self.xp.asarray(content_array[:, :, x1:x2, y1:y2]) contents_layers = self.model.forward_layers(chainer.Variable(subimg, volatile=True)) contents_layers = [ chainer.Variable(layer.data) for layer in contents_layers ] x = chainer.Variable(self.xp.ascontiguousarray(parameter_now.W.data[:, :, x1:x2, y1:y2])) layers = self.model.forward_layers(x, average_pooling=average_pooling) texture_feature = self.converter._to_texture_feature(layers) target_texture_feature = self.converter._constructed_feature(target_texture_ratio) loss_texture = self.converter.squared_error( texture_feature, target_texture_feature ) loss_content = self.converter._contents_loss(layers, contents_layers) loss = self.texture_weight * loss_texture + self.content_weight * loss_content loss.backward() parameter_now.W.grad[:, :, x1:x2, y1:y2] += x.grad self.optimizer.update() return neural_art.utility.array2img(chainer.cuda.to_cpu(parameter_now.W.data))
def __call__(self, inputs, outputs, disable=(), train=True,tuning_layer='fc1000'): """Executes a sub-network of the network. This function acts as an interpreter of the network definition for Caffe. On execution, it interprets each layer one by one, and if the bottom blobs are already computed, then emulates the layer and stores output blobs as :class:`~chainer.Variable` objects. Args: inputs (dict): A dictionary whose key-value pairs indicate initial correspondences between blob names and :class:`~chainer.Variable` objects. outputs (Iterable): A list of blob names whose corresponding :class:`~chainer.Variable` objects are returned. disable (Iterable): A list of layer names that will be ignored during the forward computation. train (bool): If ``True``, this function emulates the TRAIN phase of the Caffe layers. Otherwise, it emulates the TEST phase. Returns: tuple: A tuple of output :class:`~chainer.Variable` objects corresponding to elements of the `outputs` argument. """ self.train = False variables = dict(inputs) #exit() cnt=1 self.cleargrads() for func_name, bottom, top in self.layers: cnt+=1 if (func_name in disable or func_name not in self.forwards or any(blob not in variables for blob in bottom)): continue #import cupy.cuda.runtime as rt #print cnt,func_name,rt.memGetInfo()[0]/1024 #print cnt,func_name func = self.forwards[func_name] input_vars = tuple(variables[blob] for blob in bottom) if func_name==tuning_layer: volatile = 'off' if train else 'on' new_input_vars =[] for blob in input_vars: new_input_vars.append( chainer.Variable(blob.data,volatile=volatile)) input_vars = new_input_vars self.train=True output_vars = func(*input_vars) #if cnt==tuning_layer: if not isinstance(output_vars, collections.Iterable): output_vars = output_vars, for var, name in zip(output_vars, top): variables[name] = var self.variables = variables #print variables return tuple(variables[blob] for blob in outputs)
def __call__(self, x, im_info): h, n = self.trunk(x), x.data.shape[0] rpn_cls_score = self.rpn_cls_score(h) c, hh, ww = rpn_cls_score.data.shape[1:] rpn_bbox_pred = self.rpn_bbox_pred(h) rpn_cls_score = F.reshape(rpn_cls_score, (n, 2, -1)) # RoI Proposal rpn_cls_prob = F.softmax(rpn_cls_score) rpn_cls_prob_reshape = F.reshape(rpn_cls_prob, (n, c, hh, ww)) rois = self.proposal_layer( rpn_cls_prob_reshape, rpn_bbox_pred, im_info, self.train) if self.gpu >= 0: rois = to_gpu(rois, device=self.gpu) im_info = to_gpu(im_info, device=self.gpu) with chainer.cuda.Device(self.gpu): boxes = rois[:, 1:5] / im_info[0][2] else: boxes = rois[:, 1:5] / im_info[0][2] self.rois = rois rois = chainer.Variable(rois, volatile=not self.train) # RCNN pool5 = roi_pooling_2d(self.trunk.feature, rois, 7, 7, 0.0625) fc6 = F.relu(self.fc6(pool5)) fc7 = F.relu(self.fc7(fc6)) self.score_fc7 = self.cls_score(fc7) self.scores = F.softmax(self.score_fc7) #print "score",self.score_fc7.shape box_deltas = self.bbox_pred(fc7).data self.deltas = box_deltas #print "box_delta",box_deltas.shape pred_boxes = bbox_transform_inv(boxes, box_deltas, self.gpu) self.pred_boxes = clip_boxes(pred_boxes, im_info[0][:2], self.gpu) if self.train: # loss_cls = F.softmax_cross_entropy(cls_score, labels) # huber loss with delta=1 means SmoothL1Loss return None else: return self.scores, self.pred_boxes