我们从Python开源项目中,提取了以下14个代码示例,用于说明如何使用mxnet.Context()。
def check_elementwise_sum_with_shape(shape, n): # forward inputs = [mx.symbol.Variable('arg%d' % i) for i in range(n)] out = mx.symbol.ElementWiseSum(*inputs, name='esum') arr = [mx.nd.empty(shape) for i in range(n)] arr_grad = [mx.nd.empty(shape) for i in range(n)] for i in range(n): arr[i][:] = np.random.uniform(-10, 10, shape) exec1 = out.bind(mx.Context('cpu'), args=arr, args_grad=arr_grad) out1 = exec1.outputs[0].asnumpy() exec1.forward() out1 = exec1.outputs[0].asnumpy() out = sum(a.asnumpy() for a in arr) assert reldiff(out, out1) < 1e-6 out_grad = mx.nd.empty(shape) out_grad[:] = np.random.uniform(-10, 10, shape) # backward exec1.backward([out_grad]) for a in arr_grad: assert same(a.asnumpy(), out_grad.asnumpy())
def __init__(self, config: model.ModelConfig, context: List[mx.context.Context], train_iter: data_io.BaseParallelSampleIter, bucketing: bool, lr_scheduler, gradient_compression_params: Optional[Dict[str, Any]] = None) -> None: super().__init__(config) self.context = context self.lr_scheduler = lr_scheduler self.bucketing = bucketing self.gradient_compression_params = gradient_compression_params self._build_model_components() self.module = self._build_module(train_iter) self.training_monitor = None # type: Optional[callback.TrainingMonitor]
def determine_context(args: argparse.Namespace, exit_stack: ExitStack) -> List[mx.Context]: """ Determine the context we should run on (CPU or GPU). :param args: Arguments as returned by argparse. :param exit_stack: An ExitStack from contextlib. :return: A list with the context(s) to run on. """ if args.use_cpu: logger.info("Training Device: CPU") context = [mx.cpu()] else: num_gpus = utils.get_num_gpus() check_condition(num_gpus >= 1, "No GPUs found, consider running on the CPU with --use-cpu " "(note: check depends on nvidia-smi and this could also mean that the nvidia-smi " "binary isn't on the path).") if args.disable_device_locking: context = utils.expand_requested_device_ids(args.device_ids) else: context = exit_stack.enter_context(utils.acquire_gpus(args.device_ids, lock_dir=args.lock_dir)) if args.batch_type == C.BATCH_TYPE_SENTENCE: check_condition(args.batch_size % len(context) == 0, "When using multiple devices the batch size must be " "divisible by the number of devices. Choose a batch " "size that is a multiple of %d." % len(context)) logger.info("Training Device(s): GPU %s", context) context = [mx.gpu(gpu_id) for gpu_id in context] return context
def create_training_model(model_config: model.ModelConfig, args: argparse.Namespace, context: List[mx.Context], train_iter: data_io.BaseParallelSampleIter, lr_scheduler_instance: lr_scheduler.LearningRateScheduler, resume_training: bool, training_state_dir: str) -> training.TrainingModel: """ Create a training model and load the parameters from disk if needed. :param model_config: The configuration for the model. :param args: Arguments as returned by argparse. :param context: The context(s) to run on. :param train_iter: The training data iterator. :param lr_scheduler_instance: The learning rate scheduler. :param resume_training: When True, the model will be loaded from disk. :param training_state_dir: Directory where the training state is stored. :return: The training model. """ training_model = training.TrainingModel(config=model_config, context=context, train_iter=train_iter, bucketing=not args.no_bucketing, lr_scheduler=lr_scheduler_instance, gradient_compression_params=gradient_compression_params(args)) # We may consider loading the params in TrainingModule, for consistency # with the training state saving if resume_training: logger.info("Found partial training in directory %s. Resuming from saved state.", training_state_dir) training_model.load_params_from_file(os.path.join(training_state_dir, C.TRAINING_STATE_PARAMS_NAME)) elif args.params: logger.info("Training will initialize from parameters loaded from '%s'", args.params) training_model.load_params_from_file(args.params) return training_model
def __init__(self, num_classes, data_shape, max_iter, dtype): self.batch_size = data_shape[0] self.cur_iter = 0 self.max_iter = max_iter self.dtype = dtype label = np.random.randint(0, num_classes, [self.batch_size,]) data = np.random.uniform(-1, 1, data_shape) self.data = mx.nd.array(data, dtype=self.dtype, ctx=mx.Context('cpu_pinned', 0)) self.label = mx.nd.array(label, dtype=self.dtype, ctx=mx.Context('cpu_pinned', 0))
def test_aggregator(): """aggregate value on muliple devices""" kv = init_kv() # devices num_devs = 4 devs = [mx.Context('cpu', i) for i in range(num_devs)] # single vals = [mx.nd.ones(shape, d) for d in devs] kv.push(3, vals) kv.pull(3, out = vals) for v in vals: check_diff_to_scalar(v, num_devs) # list vals = [[mx.nd.ones(shape, d)*2.0 for d in devs]] * len(keys) kv.push(keys, vals) kv.pull(keys, out = vals) for vv in vals: for v in vv: check_diff_to_scalar(v, num_devs * 2.0)
def test_updater(dev = 'cpu'): """updater""" kv = init_kv() kv._set_updater(updater) # devices num_devs = 4 devs = [mx.Context(dev, i) for i in range(num_devs)] # single vals = [mx.nd.ones(shape, d) for d in devs] kv.push(3, vals) kv.pull(3, out = vals) for v in vals: check_diff_to_scalar(v, num_devs) # list vals = [[mx.nd.ones(shape, d) for d in devs]] * len(keys) num_push = 4 for i in range(num_push): kv.push(keys, vals) kv.pull(keys, out = vals) for vv in vals: for v in vv: check_diff_to_scalar(v, num_devs * num_push)
def test_ndarray_copy(): c = mx.nd.array(np.random.uniform(-10, 10, (10, 10))) d = c.copyto(mx.Context('cpu', 0)) assert np.sum(np.abs(c.asnumpy() != d.asnumpy())) == 0.0
def test_chain(): n = 2 data1 = mx.sym.Variable('data1') data2 = mx.sym.Variable('data2') with mx.AttrScope(ctx_group='dev1'): net = data1 + data2 net = net * 3 with mx.AttrScope(ctx_group='dev2'): net = net + data1 with mx.Context(mx.cpu(0)): shape = (4, 5) arr = [mx.nd.empty(shape) for i in range(n)] arr_grad = [mx.nd.empty(shape) for i in range(n)] exec1 = net.bind(mx.cpu(), args=arr, args_grad=arr_grad, group2ctx={'dev1': mx.cpu(0), 'dev2': mx.cpu(1)}) arr[0][:] = 1.0 arr[1][:] = 2.0 arr2 = [a.copyto(mx.cpu()) for a in arr] arr_grad2 = [a.copyto(mx.cpu()) for a in arr_grad] exec2 = net.bind(mx.cpu(), args=arr2, args_grad=arr_grad2) # Show the execution plan that involves copynode print(exec1.debug_str()) exec1.forward() exec2.forward() assert reldiff(exec1.outputs[0].asnumpy(), exec2.outputs[0].asnumpy()) < 1e-6 out_grad = mx.nd.empty(shape, mx.cpu(1)) out_grad[:] = 1.0 exec1.backward([out_grad]) exec2.backward([out_grad.copyto(mx.cpu())]) for a, b in zip(arr_grad, arr_grad2): assert reldiff(a.asnumpy(), b.asnumpy()) < 1e-6
def determine_decode_and_evaluate_context(args: argparse.Namespace, exit_stack: ExitStack, train_context: List[mx.Context]) -> Tuple[int, Optional[mx.Context]]: """ Determine the number of sentences to decode and the context we should run on (CPU or GPU). :param args: Arguments as returned by argparse. :param exit_stack: An ExitStack from contextlib. :param train_context: Context for training. :return: The number of sentences to decode and a list with the context(s) to run on. """ num_to_decode = args.decode_and_evaluate if args.optimized_metric == C.BLEU and num_to_decode == 0: logger.info("You chose BLEU as the optimized metric, will turn on BLEU monitoring during training. " "To control how many validation sentences are used for calculating bleu use " "the --decode-and-evaluate argument.") num_to_decode = -1 if num_to_decode == 0: return 0, None if args.use_cpu or args.decode_and_evaluate_use_cpu: context = mx.cpu() elif args.decode_and_evaluate_device_id is not None: # decode device is defined from the commandline num_gpus = utils.get_num_gpus() check_condition(num_gpus >= 1, "No GPUs found, consider running on the CPU with --use-cpu " "(note: check depends on nvidia-smi and this could also mean that the nvidia-smi " "binary isn't on the path).") if args.disable_device_locking: context = utils.expand_requested_device_ids([args.decode_and_evaluate_device_id]) else: context = exit_stack.enter_context(utils.acquire_gpus([args.decode_and_evaluate_device_id], lock_dir=args.lock_dir)) context = mx.gpu(context[0]) else: # default decode context is the last training device context = train_context[-1] logger.info("Decode and Evaluate Device(s): %s", context) return num_to_decode, context
def check_concat_with_shape(shapes, dimension, skip_second): # if skip_second is True, second argument will not have gradient. # it is to test #1130 n = len(shapes) # forward target_dim = 0 for shape in shapes: target_dim += shape[dimension] inputs = [mx.symbol.Variable('arg%d' % i) for i in range(n)] out = mx.symbol.Concat(*inputs, name='conc',dim=dimension) arr = [mx.nd.empty(shape) for shape in shapes] for i in range(n): arr[i][:] = shapes[i][dimension] arr_np = [np.copy(narray.asnumpy()) for narray in arr] arr_grad = [mx.nd.empty(shape) for shape in shapes] dict_grad = {} arg_names = out.list_arguments() for name, g in zip(arg_names, arr_grad): if not skip_second or name != 'arg1': dict_grad[name] = g args = out.list_arguments() arg_shapes, out_shapes, aux_shapes = out.infer_shape(**dict(zip(args, shapes))) out_grad = mx.nd.empty(out_shapes[0]) exec1 = out.bind(mx.Context('cpu'), args=arr, args_grad=dict_grad) exec1.forward() out1 = exec1.outputs[0] ret = np.concatenate([narray.asnumpy() for narray in arr], axis=dimension) assert same(out1.asnumpy(), ret) # backward out1.copyto(out_grad) out_grad[:] += 1 exec1.backward([out_grad]) for i, name in enumerate(arg_names): if not skip_second or name != 'arg1': grad = dict_grad[name] np_grad = arr_np[i] assert same(grad.asnumpy(), np_grad + 1)
def check_bind_with_uniform(uf, gf, dim, sf=None, lshape=None, rshape=None): """check function consistency with uniform random numbers""" shape = tuple(np.random.randint(1, int(1000**(1.0/dim)), size=dim)) lhs = mx.symbol.Variable('lhs') rhs = mx.symbol.Variable('rhs') if sf is not None: ret = sf(lhs, rhs) else: ret = uf(lhs, rhs) assert ret.list_arguments() == ['lhs', 'rhs'] lshape = shape if lshape is None else lshape rshape = shape if rshape is None else rshape lhs_arr = mx.nd.array(np.random.uniform(-1, 1, lshape)) rhs_arr = mx.nd.array(np.random.uniform(-1, 1, rshape)) lhs_grad = mx.nd.empty(lshape) rhs_grad = mx.nd.empty(rshape) executor = ret.bind(mx.Context('cpu'), args=[lhs_arr, rhs_arr], args_grad=[lhs_grad, rhs_grad]) exec3 = ret.bind(mx.Context('cpu'), args=[lhs_arr, rhs_arr]) exec4 = ret.bind(mx.Context('cpu'), args={'rhs': rhs_arr, 'lhs': lhs_arr}, args_grad={'lhs': lhs_grad, 'rhs': rhs_grad}) executor.forward() exec3.forward() exec4.forward() out2 = executor.outputs[0].asnumpy() out1 = uf(lhs_arr.asnumpy(), rhs_arr.asnumpy()) out3 = exec3.outputs[0].asnumpy() out4 = exec4.outputs[0].asnumpy() assert reldiff(out1, out2) < 1e-6 assert reldiff(out1, out3) < 1e-6 assert reldiff(out1, out4) < 1e-6 # test gradient out_grad = mx.nd.array(np.ones(out2.shape)) lhs_grad2, rhs_grad2 = gf(out_grad.asnumpy(), lhs_arr.asnumpy(), rhs_arr.asnumpy()) executor.backward([out_grad]) assert reldiff(lhs_grad.asnumpy(), lhs_grad2) < 1e-6 assert reldiff(rhs_grad.asnumpy(), rhs_grad2) < 1e-6