我们从Python开源项目中,提取了以下6个代码示例,用于说明如何使用torch.backends.cudnn.get_handle()。
def backward_weight(fn, input, hx, output, weight, grad_weight): with torch.cuda.device_of(input): handle = cudnn.get_handle() if fn.mode == cudnn.CUDNN_LSTM: hx, cx = hx else: cx = None if fn.batch_first: input = input.transpose(1, 2) output = output.transpose(1, 2) input_size = _input_size(fn) hidden_size = _hidden_size(fn) if not fn.train: raise RuntimeError('backward_weight can only be called when training!') if fn.dropout != 0 and lib.version < 5103: raise RuntimeError('dropout supported only in cudnn v 5.1 and above') if tuple(input.size()) != input_size: raise RuntimeError('Expected input size {}, got {}'.format( input_size, tuple(input.size()))) if not fn.train: raise RuntimeError('backward_weight can only be called when training!') if tuple(hx.size()) != hidden_size: raise RuntimeError('Expected input size {}, got {}'.format( hidden_size, hx.size())) x = input.contiguous() y = output dw = fn.weight_buf.new().resize_as_(fn.weight_buf).zero_() check_error(cudnn.lib.cudnnRNNBackwardWeights( handle, fn.rnn_desc, fn.seq_length, fn.x_descs, ctypes.c_void_p(x.data_ptr()), fn.hx_desc, ctypes.c_void_p(hx.data_ptr()), fn.y_descs, ctypes.c_void_p(y.data_ptr()), ctypes.c_void_p(fn.workspace.data_ptr()), fn.workspace.size(0), fn.w_desc, ctypes.c_void_p(dw.data_ptr()), ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0) )) # copy the weights from the weight_buf into grad_weight grad_params = get_parameters(fn, handle, dw) _copyParams(grad_params, grad_weight) return grad_weight
def backward_weight(fn, input, hx, output, weight, grad_weight): with torch.cuda.device_of(input): is_input_packed = fn.batch_sizes is not None handle = cudnn.get_handle() if fn.mode == cudnn.CUDNN_LSTM: hx, cx = hx else: cx = None if fn.batch_first and not is_input_packed: input = input.transpose(0, 1) output = output.transpose(0, 1) input_size = _input_size(fn, input) hidden_size = _hidden_size(fn) if not fn.requires_grad: raise RuntimeError('backward_weight can only be called when the function requires grad!') if fn.dropout != 0 and cudnn.version() < 5103: raise RuntimeError('dropout supported only in cudnn v 5.1 and above') if tuple(input.size()) != input_size: raise RuntimeError('Expected input size {}, got {}'.format( input_size, tuple(input.size()))) if tuple(hx.size()) != hidden_size: raise RuntimeError('Expected input size {}, got {}'.format( hidden_size, hx.size())) assert hx.is_contiguous() assert cx is None or cx.is_contiguous() x = input.contiguous() y = output dw = fn.weight_buf.new().resize_as_(fn.weight_buf).zero_() check_error(cudnn.lib.cudnnRNNBackwardWeights( handle, fn.rnn_desc, fn.seq_length, fn.x_descs, ctypes.c_void_p(x.data_ptr()), fn.hx_desc, ctypes.c_void_p(hx.data_ptr()), fn.y_descs, ctypes.c_void_p(y.data_ptr()), ctypes.c_void_p(fn.workspace.data_ptr()), fn.workspace.size(0), fn.w_desc, ctypes.c_void_p(dw.data_ptr()), ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0) )) # copy the weights from the weight_buf into grad_weight grad_params = get_parameters(fn, handle, dw) _copyParams(grad_params, grad_weight) return grad_weight
def backward_weight(fn, input, hx, output, weight, grad_weight): with torch.cuda.device_of(input): is_input_packed = fn.batch_sizes is not None handle = cudnn.get_handle() if fn.mode == cudnn.CUDNN_LSTM: hx, cx = hx else: cx = None if fn.batch_first and not is_input_packed: input = input.transpose(0, 1) output = output.transpose(0, 1) input_size = _input_size(fn, input) hidden_size = _hidden_size(fn) if not fn.requires_grad: raise RuntimeError('backward_weight can only be called when the function requires grad!') if fn.dropout != 0 and cudnn.version() < 5103: raise RuntimeError('dropout supported only in cudnn v 5.1 and above') if tuple(input.size()) != input_size: raise RuntimeError('Expected input size {}, got {}'.format( input_size, tuple(input.size()))) if tuple(hx.size()) != hidden_size: raise RuntimeError('Expected input size {}, got {}'.format( hidden_size, hx.size())) assert hx.is_contiguous() assert cx is None or cx.is_contiguous() x = input.contiguous() y = output dw = fn.weight_buf.new().resize_as_(fn.weight_buf).zero_() with torch.cuda.device_of(input): workspace = torch.cuda.ByteTensor(fn.workspace_size) check_error(cudnn.lib.cudnnRNNBackwardWeights( handle, fn.rnn_desc, fn.seq_length, fn.x_descs, ctypes.c_void_p(x.data_ptr()), fn.hx_desc, ctypes.c_void_p(hx.data_ptr()), fn.y_descs, ctypes.c_void_p(y.data_ptr()), ctypes.c_void_p(workspace.data_ptr()), workspace.size(0), fn.w_desc, ctypes.c_void_p(dw.data_ptr()), ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0) )) # copy the weights from the weight_buf into grad_weight grad_params = get_parameters(fn, handle, dw) _copyParams(grad_params, grad_weight) return grad_weight
def flatten_parameters(self): """Resets parameter data pointer so that they can use faster code paths. Right now, this works only if the module is on the GPU and cuDNN is enabled. Otherwise, it's a no-op. """ any_param = next(self.parameters()).data if not any_param.is_cuda or not torch.backends.cudnn.is_acceptable(any_param): self._data_ptrs = [] return with torch.cuda.device_of(any_param): # This is quite ugly, but it allows us to reuse the cuDNN code without larger # modifications. It's really a low-level API that doesn't belong in here, but # let's make this exception. from torch.backends.cudnn import rnn from torch.backends import cudnn from torch.nn._functions.rnn import CudnnRNN handle = cudnn.get_handle() with warnings.catch_warnings(record=True): fn = CudnnRNN( self.mode, self.input_size, self.hidden_size, num_layers=self.num_layers, batch_first=self.batch_first, dropout=self.dropout, train=self.training, bidirectional=self.bidirectional, dropout_state=self.dropout_state, ) # Initialize descriptors fn.datatype = cudnn._typemap[any_param.type()] fn.x_descs = cudnn.descriptor(any_param.new(1, self.input_size), 1) fn.rnn_desc = rnn.init_rnn_descriptor(fn, handle) # Allocate buffer to hold the weights self._param_buf_size = rnn.get_num_weights(handle, fn.rnn_desc, fn.x_descs[0], fn.datatype) fn.weight_buf = any_param.new(self._param_buf_size).zero_() fn.w_desc = rnn.init_weight_descriptor(fn, fn.weight_buf) # Slice off views into weight_buf params = rnn.get_parameters(fn, handle, fn.weight_buf) all_weights = [[p.data for p in l] for l in self.all_weights] # Copy weights and update their storage rnn._copyParams(all_weights, params) for orig_layer_param, new_layer_param in zip(all_weights, params): for orig_param, new_param in zip(orig_layer_param, new_layer_param): orig_param.set_(new_param.view_as(orig_param)) self._data_ptrs = list(p.data.data_ptr() for p in self.parameters())