我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用theano.sandbox()。
def print_graph_linker(print_prog=True): if 1: imap = {None:'-'} def blah(i, node, thunk): imap[node] = str(i) if print_prog:# and node.op.__class__ is T.DimShuffle: if False and node.op == T.DimShuffle((), ['x', 'x'], inplace = True): print(node.op == T.DimShuffle((), ['x', 'x'], inplace=True), end=' ') print(node.inputs[0], type(node.inputs[0]), end=' ') print(node.inputs[0].equals(T.constant(2)), end=' ') outputs = node.outputs inputs = theano.gof.graph.inputs(outputs) print('node ', i, node, end=' ') print(':'.join([imap[inp.owner] for inp in node.inputs])) #print theano.sandbox.pprint.pp.process_graph(inputs, outputs) return theano.sandbox.wraplinker.WrapLinkerMany( [theano.gof.OpWiseCLinker()], [theano.sandbox.wraplinker.run_all ,blah #,theano.sandbox.wraplinker.numpy_notall_isfinite ]) else: return theano.gof.OpWiseCLinker()
def test_output_broadcast_cuda(self): from theano.sandbox import cuda if not cuda.cuda_available: raise SkipTest("Optional package Cuda disabled") if cuda.use.device_number is None: # We should normally set VecAsRowAndCol as a GPUOp But we # don't want to do this here as this will disable others # tests in this file. So we manually init the GPU if # needed to remove warning. cuda.use("gpu", force=True, default_to_move_computation_to_gpu=False, move_shared_float32_to_gpu=False, enable_cuda=False) v = cuda.fvector('v') c, r = VecAsRowAndCol()(v) f = theano.function([v], [c, r]) v_val = cuda.CudaNdarray(self.rng.randn(5).astype('float32')) f(v_val)
def test_simple_shared_mrg_random(self): theano_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(utt.fetch_seed()) values, updates = theano.scan(lambda: theano_rng.uniform((2,), -1, 1), [], [], [], n_steps=5, truncate_gradient=-1, go_backwards=False) my_f = theano.function([], values, updates=updates, allow_input_downcast=True) # Just check for run-time errors theano_v = my_f() theano_v = my_f()
def compile_sampling(self, data_train, data_valid, data_test, training_n_samples): X = tt.matrix('X') batch = tt.iscalar('batch') n_samples = tt.iscalar('n_samples') n_layers = len(self.layers) samples = [None] * n_layers samples[0] = replicate_batch(X, n_samples) if "gpu" in theano.config.device: from theano.sandbox import rng_mrg srng = rng_mrg.MRG_RandomStreams(seed=42) else: srng = tt.shared_randomstreams.RandomStreams(seed=42) for layer in range(n_layers - 1): samples[layer + 1] = self.compute_samples(srng, samples[layer], layer) givens = dict() givens[X] = data_valid[batch * self.batch_size:(batch + 1) * self.batch_size] self.sample_convergence = theano.function([batch, n_samples], samples, givens=givens) givens[n_samples] = np.int32(training_n_samples) givens[X] = data_train[batch * self.batch_size:(batch + 1) * self.batch_size] self.sample_train = theano.function([batch], samples, givens=givens) givens[X] = data_valid[batch * self.batch_size:(batch + 1) * self.batch_size] self.sample_valid = theano.function([batch], samples, givens=givens) givens[X] = data_test[batch * self.batch_size:(batch + 1) * self.batch_size] self.sample_test = theano.function([batch], samples, givens=givens)
def time_theano_fn(fn, index, GPU_bool): if GPU_bool: theano.sandbox.cuda.synchronize() start = time.time()*1000 fn(index) if GPU_bool: theano.sandbox.cuda.synchronize() elapsed_time = time.time()*1000 - start return elapsed_time
def print_mem(context=None): if theano.sandbox.cuda.cuda_enabled: rvals = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info() # Avaliable memory in Mb available = float(rvals[0]) / 1024. / 1024. # Total memory in Mb total = float(rvals[1]) / 1024. / 1024. if context == None: print ('Used %.3f Mb Free %.3f Mb, total %.3f Mb' % (total - available, available, total)) else: info = str(context) print (('GPU status : Used %.3f Mb Free %.3f Mb,' 'total %.3f Mb [context %s]') % (total - available, available, total, info))
def gpu_mem_free(): """ Memory free on the GPU Returns ------- megs_free : float Number of megabytes of memory free on the GPU used by Theano """ global cuda if cuda is None: from theano.sandbox import cuda return cuda.mem_info()[0]/1024./1024
def test_multinomial_0(): # This tests the MultinomialFromUniform Op directly, not going through the # multinomial() call in GPU random generation. p = tensor.fmatrix() u = tensor.fvector() for dtype in ['int64', 'float32', 'auto']: m = theano.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u) # the m*2 allows the multinomial to reuse output f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu) assert any([type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort()]) # test that both first and second samples can be drawn utt.assert_allclose(f([[1, 0], [0, 1]], [.1, .1]), [[2, 0], [0, 2]]) # test that both second labels can be drawn r = f([[.2, .8], [.3, .7]], [.31, .31]) utt.assert_allclose(r, [[0, 2], [0, 2]]) # test that both first labels can be drawn r = f([[.2, .8], [.3, .7]], [.21, .21]) utt.assert_allclose(r, [[0, 2], [2, 0]]) # change the size to make sure output gets reallocated ok # and also make sure that the GPU version doesn't screw up the # transposed-ness r = f([[.2, .8]], [.25]) utt.assert_allclose(r, [[0, 2]]) # TODO: check a bigger example (make sure blocking on GPU is handled correctly)
def test_multinomial_large(): # DEBUG_MODE will test this on GPU p = tensor.fmatrix() u = tensor.fvector() m = theano.sandbox.multinomial.MultinomialFromUniform('auto')(p, u) f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu) assert any([type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort()]) pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = numpy.ones_like(pval[:, 0]) * 0.5 mval = f(pval, uval) assert mval.shape == pval.shape if config.cast_policy == 'custom': assert mval.dtype == pval.dtype elif config.cast_policy == 'numpy+floatX': assert mval.dtype == config.floatX elif config.cast_policy == 'numpy': assert mval.dtype == 'float64' else: raise NotImplementedError(config.cast_policy) utt.assert_allclose(mval.sum(axis=1), 2) asdf = numpy.asarray([0, 0, 2, 0]) + 0 * pval utt.assert_allclose(mval, asdf) # broadcast over all rows
def test_gpu_opt_dtypes(): # Test if the returned samples are of the datatype specified for dtype in ['uint32', 'float32', 'int64', 'float64']: p = tensor.fmatrix() u = tensor.fvector() m = theano.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u) f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort()]) pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = numpy.ones_like(pval[:, 0]) * 0.5 samples = f(pval, uval) assert samples.dtype == dtype, "%s != %s" % (samples.dtype, dtype)
def test_gpu_opt(): # Does have some overlap with test_multinomial_0 # We test the case where we put the op on the gpu when the output # is moved to the gpu. p = tensor.fmatrix() u = tensor.fvector() m = theano.sandbox.multinomial.MultinomialFromUniform('auto')(p, u) assert m.dtype == 'float32', m.dtype f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort()]) pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = numpy.ones_like(pval[:, 0]) * 0.5 f(pval, uval) # Test with a row, it was failing in the past. r = tensor.frow() m = theano.sandbox.multinomial.MultinomialFromUniform('auto')(r, u) assert m.dtype == 'float32', m.dtype f = function([r, u], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort()]) pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = numpy.ones_like(pval[:, 0]) * 0.5 f(pval, uval)
def __init__(self, atexit_print=True, flag_time_thunks=None, **kwargs): if (hasattr(theano, 'sandbox') and hasattr(theano.sandbox, 'cuda') and theano.sandbox.cuda.cuda_enabled): if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1': raise Exception( "You are running the Theano profiler with CUDA enabled." " Theano GPU ops execution is asynchronous by default." " So by default, the profile is useless." " You must set the environment variable" " CUDA_LAUNCH_BLOCKING to 1 to tell the CUDA driver to" " synchronize the execution to get a meaningful profile.") self.apply_callcount = {} self.output_size = {} self.apply_time = {} self.apply_cimpl = {} self.variable_shape = {} self.variable_strides = {} if flag_time_thunks is None: self.flag_time_thunks = config.profiling.time_thunks else: self.flag_time_thunks = flag_time_thunks self.__dict__.update(kwargs) if atexit_print: global _atexit_print_list _atexit_print_list.append(self) global _atexit_registered if not _atexit_registered: atexit.register(_atexit_print_fn) _atexit_registered = True self.ignore_first_call = theano.config.profiling.ignore_first_call
def __init__(self, *args, **kwargs): from theano.sandbox import cuda self.gpu_backend = cuda self.mode_with_gpu = mode_with_gpu self.mode_with_gpu_nodebug = mode_with_gpu_nodebug super(T_Scan_Cuda, self).__init__(*args, **kwargs)
def test_consistent_inner_fct(self): # Test that scan does not falsely detect inconsistencies in a valid # inner graph rs = theano.sandbox.rng_mrg.MRG_RandomStreams(use_cuda=True) output, _ = theano.scan(lambda : rs.uniform((3,), dtype="float32"), n_steps=3) pickle.loads(pickle.dumps(output)) # Also ensure that, after compilation, the Scan has been moved # on the gpu fct = theano.function([], output, mode=self.mode_with_gpu) scan_nodes = scan_nodes_from_fct(fct) assert len(scan_nodes) == 1 assert self.is_scan_on_gpu(scan_nodes[0])
def test_n_samples_compatibility(): """ This test checks if the new change to MultinomialFromUniform is still compatible with old interface. Here I will load a graph created (using the old interface) as follows: RandomStreams = theano.sandbox.rng_mrg.MRG_RandomStreams th_rng = RandomStreams(12345) X = T.matrix('X') pvals = T.exp(X) pvals = pvals / pvals.sum(axis=1, keepdims=True) samples = th_rng.multinomial(pvals=pvals) pickle.dump([X, samples], open("multinomial_test_graph.pkl", "w")) """ folder = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(folder, "multinomial_test_graph.pkl"), "rb") as pkl_file: if PY3: u = CompatUnpickler(pkl_file, encoding="latin1") else: u = CompatUnpickler(pkl_file) try: X, samples = u.load() except ImportError: # Windows sometimes fail with nonsensical errors like: # ImportError: No module named type # ImportError: No module named copy_reg # when "type" and "copy_reg" are builtin modules. if sys.platform == 'win32': exc_type, exc_value, exc_trace = sys.exc_info() reraise(SkipTest, exc_value, exc_trace) raise f = theano.function([X], samples) res = f(numpy.random.randn(20, 10)) assert numpy.all(res.sum(axis=1) == 1)
def t_binomial(mean, size, const_size, var_input, input, steps, rtol): R = MRG_RandomStreams(234, use_cuda=False) u = R.binomial(size=size, p=mean) f = theano.function(var_input, u, mode=mode) out = f(*input) # Increase the number of steps if sizes implies only a few samples if numpy.prod(const_size) < 10: steps_ = steps * 100 else: steps_ = steps basictest(f, steps_, const_size, prefix='mrg cpu', inputs=input, allow_01=True, target_avg=mean, mean_rtol=rtol) if mode != 'FAST_COMPILE' and cuda_available: R = MRG_RandomStreams(234, use_cuda=True) u = R.binomial(size=size, p=mean, dtype='float32') # well, it's really that this test w GPU doesn't make sense otw assert u.dtype == 'float32' f = theano.function(var_input, theano.Out( theano.sandbox.cuda.basic_ops.gpu_from_host(u), borrow=True), mode=mode_with_gpu) gpu_out = numpy.asarray(f(*input)) basictest(f, steps_, const_size, prefix='mrg gpu', inputs=input, allow_01=True, target_avg=mean, mean_rtol=rtol) numpy.testing.assert_array_almost_equal(out, gpu_out, decimal=6) RR = theano.tensor.shared_randomstreams.RandomStreams(234) uu = RR.binomial(size=size, p=mean) ff = theano.function(var_input, uu, mode=mode) # It's not our problem if numpy generates 0 or 1 basictest(ff, steps_, const_size, prefix='numpy', allow_01=True, inputs=input, target_avg=mean, mean_rtol=rtol)
def gemm_conv_op(img, kern, border_mode): kern = theano.sandbox.cuda.basic_ops.gpu_contiguous( kern[:, :, ::-1, ::-1]) y = theano.sandbox.cuda.blas.GpuCorrMM(border_mode=border_mode)( img, kern) return y
def gemm_op(mode, subsample): return theano.sandbox.cuda.blas.GpuCorrMM(mode, subsample)
def test_viewop_gpu(): from theano.sandbox import cuda if cuda.cuda_available is False: raise SkipTest('Optional package cuda disabled') _x = theano.tensor.fvector('x') x = cuda.gpu_from_host(_x) _out = theano.compile.ViewOp()(x) out = cuda.host_from_gpu(_out) f = theano.function([x], out, mode=mode_with_gpu) data = numpy.array([1, 2, 3], dtype='float32') assert numpy.allclose(f(data), data)
def contains_inf(arr, node=None, var=None): """ Test whether a numpy.ndarray contains any `np.inf` values. Parameters ---------- arr : np.ndarray or output of any Theano op node : None or an Apply instance. If the output of a Theano op, the node associated to it. var : The Theano symbolic variable. Returns ------- contains_inf : bool `True` if the array contains any `np.inf` values, `False` otherwise. Notes ----- Tests for the presence of `np.inf`'s by determining whether the values returned by `np.nanmin(arr)` and `np.nanmax(arr)` are finite. This approach is more memory efficient than the obvious alternative, calling `np.any(np.isinf(ndarray))`, which requires the construction of a boolean array with the same shape as the input array. """ if isinstance(arr, theano.gof.type._cdata_type): return False elif isinstance(arr, np.random.mtrand.RandomState): return False elif var and getattr(var.tag, 'is_rng', False): return False elif isinstance(arr, slice): return False elif arr.size == 0: return False elif cuda.cuda_available and isinstance(arr, cuda.CudaNdarray): if (node and hasattr(theano.sandbox, 'rng_mrg') and isinstance( node.op, # It store ints in float container theano.sandbox.rng_mrg.GPU_mrg_uniform)): return False else: compile_gpu_func(False, True, False) return (np.isinf(f_gpumin(arr.reshape(arr.size))) or np.isinf(f_gpumax(arr.reshape(arr.size)))) elif pygpu_available and isinstance(arr, GpuArray): return (np.isinf(f_gpua_min(arr.reshape(arr.size))) or np.isinf(f_gpua_max(arr.reshape(arr.size)))) return np.isinf(np.nanmax(arr)) or np.isinf(np.nanmin(arr))
def traverse(out, x, x_copy, d, visited=None): """ Function used by scan to parse the tree and figure out which nodes it needs to replace. There are two options : 1) x and x_copy or on host, then you would replace x with x_copy 2) x is on gpu, x_copy on host, then you need to replace host_from_gpu(x) with x_copy This happens because initially shared variables are on GPU... which is fine for the main computational graph but confuses things a bit for the inner graph of scan. """ # ``visited`` is a set of nodes that are already known and don't need to be # checked again, speeding up the traversal of multiply-connected graphs. # if a ``visited`` set is given, it will be updated in-place so the callee # knows which nodes we have seen. if visited is None: visited = set() if out in visited: return d visited.add(out) from theano.sandbox import cuda from theano.gpuarray.basic_ops import gpu_from_host, host_from_gpu from theano.gpuarray import pygpu_activated from theano.gpuarray.type import GpuArrayType if out == x: if isinstance(x.type, cuda.CudaNdarrayType): d[out] = cuda.gpu_from_host(x_copy) else: assert isinstance(x.type, GpuArrayType) d[out] = gpu_from_host(x.type.context_name)(x_copy) return d elif out.owner is None: return d elif (cuda.cuda_available and out.owner.op == cuda.host_from_gpu and out.owner.inputs == [x]): d[out] = tensor.as_tensor_variable(x_copy) return d elif (pygpu_activated and out.owner.op == host_from_gpu and out.owner.inputs == [x]): d[out] = tensor.as_tensor_variable(x_copy) return d else: for inp in out.owner.inputs: d = traverse(inp, x, x_copy, d, visited) return d # Hashing a dictionary/list/tuple by xoring the hash of each element
def test_multinomial(): steps = 100 mode_ = mode if mode == 'FAST_COMPILE': mode_ = 'FAST_RUN' if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or mode == 'Mode' and config.linker in ['py']): sample_size = (49, 5) else: sample_size = (450, 6) mode_ = theano.compile.mode.get_mode(mode_) # print '' # print 'ON CPU:' pvals = numpy.asarray(numpy.random.uniform(size=sample_size)) pvals = numpy.apply_along_axis(lambda row: row / numpy.sum(row), 1, pvals) R = MRG_RandomStreams(234, use_cuda=False) # Note: we specify `nstreams` to avoid a warning. m = R.multinomial(pvals=pvals, dtype=config.floatX, nstreams=30 * 256) f = theano.function([], m, mode=mode_) # theano.printing.debugprint(f) out = f() basic_multinomialtest(f, steps, sample_size, pvals, n_samples=1, prefix='mrg ') sys.stdout.flush() if mode != 'FAST_COMPILE' and cuda_available: # print '' # print 'ON GPU:' R = MRG_RandomStreams(234, use_cuda=True) pvals = numpy.asarray(pvals, dtype='float32') # We give the number of streams to avoid a warning. n = R.multinomial(pvals=pvals, dtype='float32', nstreams=30 * 256) # well, it's really that this test w GPU doesn't make sense otw assert n.dtype == 'float32' f = theano.function( [], theano.sandbox.cuda.basic_ops.gpu_from_host(n), mode=mode_.including('gpu')) # theano.printing.debugprint(f) gpu_out = f() sys.stdout.flush() basic_multinomialtest(f, steps, sample_size, pvals, n_samples=1, prefix='gpu mrg ') numpy.testing.assert_array_almost_equal(out, gpu_out, decimal=6)
def gemm_directly(bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsx, subsy, direction): ishape = (bs, ch, rImg1, rImg2) kshape = (nf, ch, rFlt1, rFlt2) subsample = (subsx, subsy) npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32') npy_kern = theano._asarray(numpy.random.rand(*kshape), dtype='float32') if direction == 'fprop': i = cuda.CudaNdarrayType( broadcastable=[sh == 1 for sh in npy_img.shape])() k = cuda.CudaNdarrayType( broadcastable=[sh == 1 for sh in npy_kern.shape])() cpuval = py_conv(npy_img, npy_kern, 'valid', subsample) op = theano.sandbox.cuda.blas.GpuCorrMM(border_mode='valid', subsample=subsample)(i, k) f = theano.function([i, k], op, mode=theano_mode) gpuval = f(npy_img, npy_kern[:, :, ::-1, ::-1]) elif direction == 'bprop img': i = cuda.CudaNdarrayType( broadcastable=[sh == 1 for sh in npy_kern.transpose(1, 0, 2, 3).shape])() k = cuda.CudaNdarrayType( broadcastable=[sh == 1 for sh in npy_img.shape])() cpuval = py_conv(npy_img, npy_kern, 'full', subsample) op = theano.sandbox.cuda.blas.GpuCorrMM_gradInputs( border_mode='valid', subsample=subsample)(i, k) f = theano.function([i, k], op, mode=theano_mode) gpuval = f(npy_kern.transpose(1, 0, 2, 3), npy_img) elif direction == 'bprop kern': i = cuda.CudaNdarrayType( broadcastable=[sh == 1 for sh in npy_img.transpose(1, 0, 2, 3).shape])() k = cuda.CudaNdarrayType( broadcastable=[sh == 1 for sh in npy_kern.transpose(1, 0, 2, 3).shape])() cpuval = py_conv(npy_img, npy_kern, 'valid', subsample) op = theano.sandbox.cuda.blas.GpuCorrMM_gradWeights( border_mode='valid', subsample=subsample)(i, k) f = theano.function([i, k], op, mode=theano_mode) gpuval = numpy.array(f( npy_img.transpose(1, 0, 2, 3), npy_kern.transpose(1, 0, 2, 3)[:, :, ::-1, ::-1]) ).transpose(1, 0, 2, 3) assert_allclose(cpuval, gpuval, rtol=1e-4)