我们从Python开源项目中,提取了以下10个代码示例,用于说明如何使用torch.Variable()。
def get_dropout_mask(dropout_probability: float, tensor_for_masking: torch.autograd.Variable): """ Computes and returns an element-wise dropout mask for a given tensor, where each element in the mask is dropped out with probability dropout_probability. Note that the mask is NOT applied to the tensor - the tensor is passed to retain the correct CUDA tensor type for the mask. Parameters ---------- dropout_probability : float, required. Probability of dropping a dimension of the input. tensor_for_masking : torch.Variable, required. Returns ------- A torch.FloatTensor consisting of the binary mask scaled by 1/ (1 - dropout_probability). This scaling ensures expected values and variances of the output of applying this mask and the original tensor are the same. """ binary_mask = tensor_for_masking.clone() binary_mask.data.copy_(torch.rand(tensor_for_masking.size()) > dropout_probability) # Scale mask by 1/keep_prob to preserve output statistics. dropout_mask = binary_mask.float().div(1.0 - dropout_probability) return dropout_mask
def add(self, v): if isinstance(v, Variable): count = v.data.numel() v = v.data.sum() elif isinstance(v, torch.Tensor): count = v.numel() v = v.sum() self.n_count += count self.sum += v
def get_text_field_mask(text_field_tensors: Dict[str, torch.Tensor], num_wrapping_dims: int = 0) -> torch.LongTensor: """ Takes the dictionary of tensors produced by a ``TextField`` and returns a mask with 0 where the tokens are padding, and 1 otherwise. We also handle ``TextFields`` wrapped by an arbitrary number of ``ListFields``, where the number of wrapping ``ListFields`` is given by ``num_wrapping_dims``. If ``num_wrapping_dims == 0``, the returned mask has shape ``(batch_size, num_tokens)``. If ``num_wrapping_dims > 0`` then the returned mask has ``num_wrapping_dims`` extra dimensions, so the shape will be ``(batch_size, ..., num_tokens)``. There could be several entries in the tensor dictionary with different shapes (e.g., one for word ids, one for character ids). In order to get a token mask, we use the tensor in the dictionary with the lowest number of dimensions. After subtracting ``num_wrapping_dims``, if this tensor has two dimensions we assume it has shape ``(batch_size, ..., num_tokens)``, and use it for the mask. If instead it has three dimensions, we assume it has shape ``(batch_size, ..., num_tokens, num_features)``, and sum over the last dimension to produce the mask. Most frequently this will be a character id tensor, but it could also be a featurized representation of each token, etc. NOTE: Our functions for generating masks create torch.LongTensors, because using torch.ByteTensors inside Variables makes it easy to run into overflow errors when doing mask manipulation, such as summing to get the lengths of sequences - see below. >>> mask = torch.ones([260]).byte() >>> mask.sum() # equals 260. >>> var_mask = torch.autograd.Variable(mask) >>> var_mask.sum() # equals 4, due to 8 bit precision - the sum overflows. """ tensor_dims = [(tensor.dim(), tensor) for tensor in text_field_tensors.values()] tensor_dims.sort(key=lambda x: x[0]) smallest_dim = tensor_dims[0][0] - num_wrapping_dims if smallest_dim == 2: token_tensor = tensor_dims[0][1] return (token_tensor != 0).long() elif smallest_dim == 3: character_tensor = tensor_dims[0][1] return ((character_tensor > 0).long().sum(dim=-1) > 0).long() else: raise ValueError("Expected a tensor with dimension 2 or 3, found {}".format(smallest_dim))
def replace_masked_values(tensor: Variable, mask: Variable, replace_with: float) -> Variable: """ Replaces all masked values in ``tensor`` with ``replace_with``. ``mask`` must be broadcastable to the same shape as ``tensor``. We require that ``tensor.dim() == mask.dim()``, as otherwise we won't know which dimensions of the mask to unsqueeze. """ # We'll build a tensor of the same shape as `tensor`, zero out masked values, then add back in # the `replace_with` value. if tensor.dim() != mask.dim(): raise ConfigurationError("tensor.dim() (%d) != mask.dim() (%d)" % (tensor.dim(), mask.dim())) one_minus_mask = 1.0 - mask values_to_add = replace_with * one_minus_mask return tensor * mask + values_to_add
def get_range_vector(size: int, is_cuda: bool) -> torch.Tensor: """ Returns a range vector with the desired size, starting at 0. The CUDA implementation is meant to avoid copy data from CPU to GPU. """ if is_cuda: indices = torch.cuda.LongTensor(size).fill_(1).cumsum(0) - 1 else: indices = torch.arange(0, size).long() return Variable(indices, requires_grad=False)
def remove_sentence_boundaries(tensor: torch.Tensor, mask: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """ Remove begin/end of sentence embeddings from the batch of sentences. Given a batch of sentences with size ``(batch_size, timesteps, dim)`` this returns a tensor of shape ``(batch_size, timesteps - 2, dim)`` after removing the beginning and end sentence markers. The sentences are assumed to be padded on the right, with the beginning of each sentence assumed to occur at index 0 (i.e., ``mask[:, 0]`` is assumed to be 1). Returns both the new tensor and updated mask. This function is the inverse of ``add_sentence_boundary_token_ids``. Parameters ---------- tensor : ``torch.Tensor`` A tensor of shape ``(batch_size, timesteps, dim)`` mask : ``torch.Tensor`` A tensor of shape ``(batch_size, timesteps)`` Returns ------- tensor_without_boundary_tokens : ``torch.Tensor`` The tensor after removing the boundary tokens of shape ``(batch_size, timesteps - 2, dim)`` new_mask : ``torch.Tensor`` The new mask for the tensor of shape ``(batch_size, timesteps - 2)``. """ # TODO: matthewp, profile this transfer sequence_lengths = mask.sum(dim=1).data.cpu().numpy() tensor_shape = list(tensor.data.shape) new_shape = list(tensor_shape) new_shape[1] = tensor_shape[1] - 2 tensor_without_boundary_tokens = Variable(tensor.data.new(*new_shape).fill_(0)) new_mask = Variable(tensor.data.new(new_shape[0], new_shape[1]).fill_(0)).long() for i, j in enumerate(sequence_lengths): if j > 2: tensor_without_boundary_tokens[i, :(j - 2), :] = tensor[i, 1:(j - 1), :] new_mask[i, :(j - 2)] = 1 return tensor_without_boundary_tokens, new_mask
def query(self, images): # images: torch.Variable of size [batch_size, channel * 2, w, h] if self.pool_size == 0: return images return_images = [] for image in images.data: # traverse data in batch dimension image = torch.unsqueeze(image, 0) if self.num_imgs < self.pool_size: self.num_imgs = self.num_imgs + 1 self.images.append(image) return_images.append(image) else: p = random.uniform(0, 1) # randomly substitute if p > 0.5: random_id = random.randint(0, self.pool_size-1) tmp = self.images[random_id].clone() self.images[random_id] = image return_images.append(tmp) else: return_images.append(image) return_images = Variable(torch.cat(return_images, 0)) return return_images
def sort_batch_by_length(tensor: torch.autograd.Variable, sequence_lengths: torch.autograd.Variable): """ Sort a batch first tensor by some specified lengths. Parameters ---------- tensor : Variable(torch.FloatTensor), required. A batch first Pytorch tensor. sequence_lengths : Variable(torch.LongTensor), required. A tensor representing the lengths of some dimension of the tensor which we want to sort by. Returns ------- sorted_tensor : Variable(torch.FloatTensor) The original tensor sorted along the batch dimension with respect to sequence_lengths. sorted_sequence_lengths : Variable(torch.LongTensor) The original sequence_lengths sorted by decreasing size. restoration_indices : Variable(torch.LongTensor) Indices into the sorted_tensor such that ``sorted_tensor.index_select(0, restoration_indices) == original_tensor`` permuation_index : Variable(torch.LongTensor) The indices used to sort the tensor. This is useful if you want to sort many tensors using the same ordering. """ if not isinstance(tensor, Variable) or not isinstance(sequence_lengths, Variable): raise ConfigurationError("Both the tensor and sequence lengths must be torch.autograd.Variables.") sorted_sequence_lengths, permutation_index = sequence_lengths.sort(0, descending=True) sorted_tensor = tensor.index_select(0, permutation_index) # This is ugly, but required - we are creating a new variable at runtime, so we # must ensure it has the correct CUDA vs non-CUDA type. We do this by cloning and # refilling one of the inputs to the function. index_range = sequence_lengths.data.clone().copy_(torch.arange(0, len(sequence_lengths))) # This is the equivalent of zipping with index, sorting by the original # sequence lengths and returning the now sorted indices. index_range = Variable(index_range.long()) _, reverse_mapping = permutation_index.sort(0, descending=False) restoration_indices = index_range.index_select(0, reverse_mapping) return sorted_tensor, sorted_sequence_lengths, restoration_indices, permutation_index
def add_positional_features(tensor: torch.Tensor, min_timescale: float = 1.0, max_timescale: float = 1.0e4): # pylint: disable=line-too-long """ Implements the frequency-based positional encoding described in `Attention is all you Need <https://www.semanticscholar.org/paper/Attention-Is-All-You-Need-Vaswani-Shazeer/0737da0767d77606169cbf4187b83e1ab62f6077>`_ . Adds sinusoids of different frequencies to a ``Tensor``. A sinusoid of a different frequency and phase is added to each dimension of the input ``Tensor``. This allows the attention heads to use absolute and relative positions. The number of timescales is equal to hidden_dim / 2 within the range (min_timescale, max_timescale). For each timescale, the two sinusoidal signals sin(timestep / timescale) and cos(timestep / timescale) are generated and concatenated along the hidden_dim dimension. Parameters ---------- tensor : ``torch.Tensor`` a Tensor with shape (batch_size, timesteps, hidden_dim). min_timescale : ``float``, optional (default = 1.0) The smallest timescale to use. max_timescale : ``float``, optional (default = 1.0e4) The largest timescale to use. Returns ------- The input tensor augmented with the sinusoidal frequencies. """ _, timesteps, hidden_dim = tensor.size() timestep_range = get_range_vector(timesteps, tensor.is_cuda).data.float() # We're generating both cos and sin frequencies, # so half for each. num_timescales = hidden_dim // 2 timescale_range = get_range_vector(num_timescales, tensor.is_cuda).data.float() log_timescale_increments = math.log(float(max_timescale) / float(min_timescale)) / float(num_timescales - 1) inverse_timescales = min_timescale * torch.exp(timescale_range * -log_timescale_increments) # Broadcasted multiplication - shape (timesteps, num_timescales) scaled_time = timestep_range.unsqueeze(1) * inverse_timescales.unsqueeze(0) # shape (timesteps, 2 * num_timescales) sinusoids = Variable(torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], 1)) if hidden_dim % 2 != 0: # if the number of dimensions is odd, the cos and sin # timescales had size (hidden_dim - 1) / 2, so we need # to add a row of zeros to make up the difference. sinusoids = torch.cat([sinusoids, Variable(sinusoids.data.new(timesteps, 1).fill_(0))], 1) return tensor + sinusoids.unsqueeze(0)