我们从Python开源项目中,提取了以下41个代码示例,用于说明如何使用torch.multinomial()。
def sample(self, seed, maximumLength, T = 1): h = self.h0(seed).view(self.layers, 1, self.H) accumulator = ["START"] for _ in range(maximumLength): i = self.targetsOfSymbols([accumulator[-1]])[:,0] output, h = self(i,h) distribution = output.data.view(-1)/T distribution = F.log_softmax(distribution).data distribution = distribution.exp() c = torch.multinomial(distribution,1)[0] if self.lexicon[c] == "END": break accumulator.append(self.lexicon[c]) return accumulator[1:]
def sample(self, features): result = ["START"] # (1,1,F) features = features.view(-1).unsqueeze(0).unsqueeze(0) #features: 1x1x2560 states = None while True: e = self.embedding(variable([symbolToIndex[result[-1]]]).view((1,-1))) recurrentInput = torch.cat((features,e),2) output, states = self.rnn(recurrentInput,states) distribution = self.tokenPrediction(output).view(-1) distribution = F.log_softmax(distribution).data.exp() draw = torch.multinomial(distribution,1)[0] c = LEXICON[draw] if len(result) > 20 or c == "END": return result[1:] else: result.append(c)
def generate(self, prime_str, predict_len=100, temperature=0.8): predicted = prime_str hidden = self.decoder.init_hidden() prime_input = char_tensor(prime_str, self.decoder.gpu) # Use prime string to build up hidden state for p in range(len(prime_str) - 1): _, hidden = self.decoder(prime_input[p], hidden) inp = prime_input[-1] for p in range(predict_len): out, hidden = self.decoder(inp, hidden) # sample from network as a multinomial distribution out_dist = out.data.view(-1).div(temperature).exp() out_dist = out.data.view(-1).div(temperature).exp() top_i = torch.multinomial(out_dist, 1)[0] # Add predicted character to string and use as next input predicted_char = all_characters[top_i] predicted += predicted_char inp = char_tensor(predicted_char, self.decoder.gpu) return predicted
def sample(self, num_samples, start_letter=0): """ Samples the network and returns num_samples samples of length max_seq_len. Outputs: samples, hidden - samples: num_samples x max_seq_length (a sampled sequence in each row) """ samples = torch.zeros(num_samples, self.max_seq_len).type(torch.LongTensor) h = self.init_hidden(num_samples) inp = autograd.Variable(torch.LongTensor([start_letter]*num_samples)) if self.gpu: samples = samples.cuda() inp = inp.cuda() for i in range(self.max_seq_len): out, h = self.forward(inp, h) # out: num_samples x vocab_size out = torch.multinomial(torch.exp(out), 1) # num_samples x 1 (sampling from each row) samples[:, i] = out.data inp = out.view(-1) return samples
def test_multinomial(self): # with replacement n_row = 3 for n_col in range(4, 5+1): prob_dist = torch.rand(n_row, n_col) prob_dist.select(1, n_col-1).fill_(0) #index n_col shouldn't be sampled n_sample = n_col sample_indices = torch.multinomial(prob_dist, n_sample, True) self.assertEqual(prob_dist.dim(), 2) self.assertEqual(sample_indices.size(1), n_sample) for index in product(range(n_row), range(n_sample)): self.assertNotEqual(sample_indices[index], n_col, "sampled an index with zero probability") # without replacement n_row = 3 for n_col in range(4, 5+1): prob_dist = torch.rand(n_row, n_col) prob_dist.select(1, n_col-1).fill_(0) #index n_col shouldn't be sampled n_sample = 3 sample_indices = torch.multinomial(prob_dist, n_sample, False) self.assertEqual(prob_dist.dim(), 2) self.assertEqual(sample_indices.size(1), n_sample) for i in range(n_row): row_samples = {} for j in range(n_sample): sample_idx = sample_indices[i,j] self.assertNotEqual(sample_idx, n_col-1, "sampled an index with zero probability") self.assertNotIn(sample_idx, row_samples, "sampled an index twice") row_samples[sample_idx] = True # vector n_col = 4 prob_dist = torch.rand(n_col) n_sample = n_col sample_indices = torch.multinomial(prob_dist, n_sample, True) s_dim = sample_indices.dim() self.assertEqual(sample_indices.dim(), 1, "wrong number of dimensions") self.assertEqual(prob_dist.dim(), 1, "wrong number of prob_dist dimensions") self.assertEqual(sample_indices.size(0), n_sample, "wrong number of samples")
def torch_multinomial(input, num_samples, replacement=False): """ Like `torch.multinomial()` but works with cuda tensors. Does not support keyword argument `out`. """ if input.is_cuda: return torch_multinomial(input.cpu(), num_samples, replacement).cuda() else: return torch.multinomial(input, num_samples, replacement)
def generate(model, prime_str='A', predict_len=100, temperature=0.8, cuda=False): hidden = model.init_hidden(1) tensor = char_tensor(prime_str, model.mapping) prime_input = Variable(tensor.unsqueeze(0)) #print(prime_input) if cuda: hidden = tuple(h.cuda() for h in hidden) prime_input = prime_input.cuda() predicted = prime_str model.seq_length = 1 #print(hidden) #print(prime_input[:,0]) # Use priming string to "build up" hidden state for p in range(len(prime_str) - 1): _, hidden = model(prime_input[:,p], hidden) inp = prime_input[:,-1] for p in range(predict_len): output, hidden = model(inp, hidden) # Sample from the network as a multinomial distribution output_dist = output.data.view(-1).div(temperature).exp() top_i = torch.multinomial(output_dist, 1)[0] # Add predicted character to string and use as next input predicted_char = model.mapping[top_i] predicted += predicted_char inp = Variable(char_tensor(predicted_char, model.mapping).unsqueeze(0)) if cuda: inp = inp.cuda() return predicted # Run as standalone script
def generate(self, prime_str='int ', predict_len=100, temperature=0.1, cuda=False, args=None, hidden=None): prime_input = Variable(char_tensor(prime_str).unsqueeze(0)) if not hidden: hidden = decoder.init_hidden(1) prime_input = Variable(char_tensor(prime_str).unsqueeze(0)) if cuda: hidden = hidden.cuda() prime_input = prime_input.cuda() # Use priming string to "build up" hidden state for p in range(len(prime_str) - 1): _, hidden = decoder(prime_input[:,p], hidden) predicted = '' inp = prime_input[:,-1] p_list = [] for p in range(predict_len): output, hidden = decoder(inp, hidden) # Sample from the network as a multinomial distribution output_dist = output.data.view(-1).div(temperature).exp() top_i = torch.multinomial(output_dist, 1)[0] p_list.append(top_i) # Add predicted character to string and use as next input predicted_char = all_characters[top_i] predicted += predicted_char inp = Variable(char_tensor(predicted_char).unsqueeze(0)) if cuda: inp = inp.cuda() # print (p_list) return predicted, hidden
def generate(decoder, prime_str='int ', predict_len=100, temperature=0.35, cuda=False, args=None, hidden=None): prime_input = Variable(char_tensor(prime_str).unsqueeze(0)) if not hidden: hidden = decoder.init_hidden(1) prime_input = Variable(char_tensor(prime_str).unsqueeze(0)) if cuda: hidden = hidden.cuda() prime_input = prime_input.cuda() # Use priming string to "build up" hidden state for p in range(len(prime_str) - 1): _, hidden = decoder(prime_input[:,p], hidden) predicted = '' inp = prime_input[:,-1] p_list = [] for p in range(predict_len): output, hidden = decoder(inp, hidden) # Sample from the network as a multinomial distribution output_dist = output.data.view(-1).div(temperature).exp() top_i = torch.multinomial(output_dist, 1)[0] p_list.append(top_i) # Add predicted character to string and use as next input predicted_char = all_characters[top_i] predicted += predicted_char inp = Variable(char_tensor(predicted_char).unsqueeze(0)) if cuda: inp = inp.cuda() # print (p_list) return predicted, hidden
def sample_from_probs(probs, top_n=10): """ truncated weighted random choice. """ _, indices = torch.sort(probs) # set probabilities after top_n to 0 probs[indices.data[:-top_n]] = 0 sampled_index = torch.multinomial(probs, 1) return sampled_index
def node_forward(self, inputs, child_c, child_h, training): child_h_sum = F.torch.sum(torch.squeeze(child_h, 1), 0, keepdim = True) i = F.sigmoid(self.ix(inputs)+self.ih(child_h_sum)) o = F.sigmoid(self.ox(inputs)+self.oh(child_h_sum)) u = F.tanh(self.ux(inputs)+self.uh(child_h_sum)) # add extra singleton dimension fx = F.torch.unsqueeze(self.fx(inputs), 1) f = F.torch.cat([self.fh(child_hi) + torch.squeeze(fx, 1) for child_hi in child_h], 0) # f = torch.squeeze(f, 0) f = F.sigmoid(f) # removing extra singleton dimension f = F.torch.unsqueeze(f, 1) fc = F.torch.squeeze(F.torch.mul(f, child_c), 1) idx = Var(torch.multinomial(torch.ones(child_c.size(0)), 1), requires_grad=False) if self.cuda_flag: idx = idx.cuda() c = zoneout( current_input=F.torch.mul(i, u) + F.torch.sum(fc, 0, keepdim=True), previous_input=F.torch.squeeze(child_c.index_select(0, idx), 0) if self.zoneout_choose_child else F.torch.sum(torch.squeeze(child_c, 1), 0, keepdim=True), p=self.recurrent_dropout_c, training=training, mask=self.mask if self.commons_mask else None ) h = zoneout( current_input=F.torch.mul(o, F.tanh(c)), previous_input=F.torch.squeeze(child_h.index_select(0, idx), 0) if self.zoneout_choose_child else child_h_sum, p=self.recurrent_dropout_h, training=training, mask=self.mask if self.commons_mask else None ) return c, h
def sample(self, num_sample): """ draws a sample from classes based on weights """ return t.multinomial(self.weights, num_sample, True)
def __iter__(self): return iter(torch.multinomial(self.weights, self.num_samples, self.replacement))
def test_multinomial(self): # with replacement n_row = 3 for n_col in range(4, 5 + 1): prob_dist = torch.rand(n_row, n_col) prob_dist.select(1, n_col - 1).fill_(0) # index n_col shouldn't be sampled n_sample = n_col sample_indices = torch.multinomial(prob_dist, n_sample, True) self.assertEqual(prob_dist.dim(), 2) self.assertEqual(sample_indices.size(1), n_sample) for index in product(range(n_row), range(n_sample)): self.assertNotEqual(sample_indices[index], n_col, "sampled an index with zero probability") # without replacement n_row = 3 for n_col in range(4, 5 + 1): prob_dist = torch.rand(n_row, n_col) prob_dist.select(1, n_col - 1).fill_(0) # index n_col shouldn't be sampled n_sample = 3 sample_indices = torch.multinomial(prob_dist, n_sample, False) self.assertEqual(prob_dist.dim(), 2) self.assertEqual(sample_indices.size(1), n_sample) for i in range(n_row): row_samples = {} for j in range(n_sample): sample_idx = sample_indices[i, j] self.assertNotEqual(sample_idx, n_col - 1, "sampled an index with zero probability") self.assertNotIn(sample_idx, row_samples, "sampled an index twice") row_samples[sample_idx] = True # vector n_col = 4 prob_dist = torch.rand(n_col) n_sample = n_col sample_indices = torch.multinomial(prob_dist, n_sample, True) s_dim = sample_indices.dim() self.assertEqual(sample_indices.dim(), 1, "wrong number of dimensions") self.assertEqual(prob_dist.dim(), 1, "wrong number of prob_dist dimensions") self.assertEqual(sample_indices.size(0), n_sample, "wrong number of samples")
def neg_samples(self, batch_size: int): n_samples = batch_size * self.n_neg_samples return multinomial(self.output_dist, num_samples=n_samples, replacement=True)
def sample(self, n_samples=1): mass_function = self.mass_function.data res = torch.multinomial(mass_function, n_samples, replacement=True) # Sample dimension is first if res.ndimension() == 2: res = res.t() return res
def prepare_mixture_gm_data(arguments): dataset = [] arguments.L2 = 2 arguments.L1 = 2 arguments.K = 200 sig0 = 5 sig = 0.1 num_means = arguments.num_means means = 5*torch.randn(num_means, arguments.L2) arguments.means = means.numpy() N = 2000 mixinds = torch.multinomial(torch.ones(num_means), N, replacement=True) obsnoise = torch.randn(N, arguments.L2) data = means[mixinds] + obsnoise inp = torch.randn(N, arguments.L1) dataset1 = TensorDataset(inp, data, [1]*N) datasetmix = dataset1 kwargs = {'num_workers': 1, 'pin_memory': True} if arguments.cuda else {} loader1 = data_utils.DataLoader(dataset1, batch_size=arguments.batch_size, shuffle=False, **kwargs) loader_mix = data_utils.DataLoader(datasetmix, batch_size=arguments.batch_size, shuffle=False, **kwargs) return loader1, loader_mix
def forward(self, fc_feats, att_feats, seq): batch_size = fc_feats.size(0) state = self.init_hidden(batch_size) outputs = [] for i in range(seq.size(1)): if i == 0: xt = self.img_embed(fc_feats) else: if self.training and i >= 2 and self.ss_prob > 0.0: # otherwiste no need to sample sample_prob = fc_feats.data.new(batch_size).uniform_(0, 1) sample_mask = sample_prob < self.ss_prob if sample_mask.sum() == 0: it = seq[:, i-1].clone() else: sample_ind = sample_mask.nonzero().view(-1) it = seq[:, i-1].data.clone() #prob_prev = torch.exp(outputs[-1].data.index_select(0, sample_ind)) # fetch prev distribution: shape Nx(M+1) #it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1)) prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1) it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind)) it = Variable(it, requires_grad=False) else: it = seq[:, i-1].clone() # break if all the sequences end if i >= 2 and seq[:, i-1].data.sum() == 0: break xt = self.embed(it) output, state = self.core(xt, state) output = F.log_softmax(self.logit(output)) outputs.append(output) return torch.cat([_.unsqueeze(1) for _ in outputs[1:]], 1).contiguous()
def forward(self, fc_feats, att_feats, seq): batch_size = fc_feats.size(0) state = self.init_hidden(fc_feats) outputs = [] for i in range(seq.size(1) - 1): if self.training and i >= 1 and self.ss_prob > 0.0: # otherwiste no need to sample sample_prob = fc_feats.data.new(batch_size).uniform_(0, 1) sample_mask = sample_prob < self.ss_prob if sample_mask.sum() == 0: it = seq[:, i].clone() else: sample_ind = sample_mask.nonzero().view(-1) it = seq[:, i].data.clone() #prob_prev = torch.exp(outputs[-1].data.index_select(0, sample_ind)) # fetch prev distribution: shape Nx(M+1) #it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1)) prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1) it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind)) it = Variable(it, requires_grad=False) else: it = seq[:, i].clone() # break if all the sequences end if i >= 1 and seq[:, i].data.sum() == 0: break xt = self.embed(it) output, state = self.core(xt, fc_feats, att_feats, state) output = F.log_softmax(self.logit(self.dropout(output))) outputs.append(output) return torch.cat([_.unsqueeze(1) for _ in outputs], 1)
def forward(self, fc_feats, att_feats, seq): batch_size = fc_feats.size(0) state = self.init_hidden(batch_size) outputs = [] for i in range(seq.size(1)): if i == 0: xt = self.img_embed(fc_feats) else: if self.training and i >= 2 and self.ss_prob > 0.0: # otherwiste no need to sample sample_prob = fc_feats.data.new(batch_size).uniform_(0, 1) sample_mask = sample_prob < self.ss_prob if sample_mask.sum() == 0: it = seq[:, i-1].clone() else: sample_ind = sample_mask.nonzero().view(-1) it = seq[:, i-1].data.clone() #prob_prev = torch.exp(outputs[-1].data.index_select(0, sample_ind)) # fetch prev distribution: shape Nx(M+1) #it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1)) prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1) it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind)) it = Variable(it, requires_grad=False) else: it = seq[:, i-1].clone() # break if all the sequences end if i >= 2 and seq[:, i-1].data.sum() == 0: break xt = self.embed(it) output, state = self.core(xt.unsqueeze(0), state) output = F.log_softmax(self.logit(self.dropout(output.squeeze(0)))) outputs.append(output) return torch.cat([_.unsqueeze(1) for _ in outputs[1:]], 1).contiguous()
def forward(self, fc_feats, att_feats, seq): batch_size = fc_feats.size(0) state = self.init_hidden(batch_size) outputs = [] # Project the attention feats first to reduce memory and computation comsumptions. p_att_feats = self.ctx2att(att_feats.view(-1, self.att_feat_size)) p_att_feats = p_att_feats.view(*(att_feats.size()[:-1] + (self.att_hid_size,))) for i in range(seq.size(1) - 1): if self.training and i >= 1 and self.ss_prob > 0.0: # otherwiste no need to sample sample_prob = fc_feats.data.new(batch_size).uniform_(0, 1) sample_mask = sample_prob < self.ss_prob if sample_mask.sum() == 0: it = seq[:, i].clone() else: sample_ind = sample_mask.nonzero().view(-1) it = seq[:, i].data.clone() #prob_prev = torch.exp(outputs[-1].data.index_select(0, sample_ind)) # fetch prev distribution: shape Nx(M+1) #it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1)) prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1) it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind)) it = Variable(it, requires_grad=False) else: it = seq[:, i].clone() # break if all the sequences end if i >= 1 and seq[:, i].data.sum() == 0: break xt = self.embed(it) output, state = self.core(xt, fc_feats, att_feats, p_att_feats, state) output = F.log_softmax(self.logit(output)) outputs.append(output) return torch.cat([_.unsqueeze(1) for _ in outputs], 1)
def __iter__(self): base_samples = torch.arange(0, len(self.weights)).long() remaining = self.num_samples - len(self.weights) over_samples = torch.multinomial(self.weights, remaining, True) samples = torch.cat((base_samples, over_samples), dim=0) print('num samples', len(samples)) return (samples[i] for i in torch.randperm(len(samples)))
def sampleEnvironment(self, s, environments, T = 1): problem = self.encodeProblem(s).view(1,-1) environmentScores = self.environmentLogLikelihoods(environments, problem) distribution = (environmentScores/T).exp() i = torch.multinomial(distribution.data, 1)[0] return environments[i]
def sample(self, sample_shape=torch.Size()): num_events = self.probs.size()[-1] sample_shape = self._extended_shape(sample_shape) param_shape = sample_shape + self.probs.size()[-1:] probs = self.probs.expand(param_shape) probs_2d = probs.contiguous().view(-1, num_events) sample_2d = torch.multinomial(probs_2d, 1, True) return sample_2d.contiguous().view(sample_shape)
def sample(self, input, temperature=1., hidden=None): hidden = self.module_.init_hidden(1) if hidden is None else hidden output, hidden = self.module_(input, hidden) probas = output.squeeze().data.div(temperature).exp() sample = torch.multinomial(probas, 1)[-1] if probas.dim() > 1: sample = sample[0] return sample, self.repackage_hidden(hidden)
def generate(decoder, prime_str='A', predict_len=100, temperature=0.8, cuda=False): hidden = decoder.init_hidden(1) prime_input = torch.autograd.Variable(char_tensor(prime_str).unsqueeze(0)) if cuda: hidden = hidden.cuda() prime_input = prime_input.cuda() predicted = prime_str # Use priming string to "build up" hidden state for p in range(len(prime_str) - 1): _, hidden = decoder(prime_input[:,p], hidden) inp = prime_input[:,-1] for p in range(predict_len): output, hidden = decoder(inp, hidden) # Sample from the network as a multinomial distribution output_dist = output.data.view(-1).div(temperature).exp() top_i = torch.multinomial(output_dist, 1)[0] # Add predicted character to string and use as next input predicted_char = chars[top_i] predicted += predicted_char inp = torch.autograd.Variable(char_tensor(predicted_char).unsqueeze(0)) if cuda: inp = inp.cuda() return predicted # Run as standalone script
def vis_generate(decoder, input_str, temperature=0.8): hidden = decoder.init_hidden(1) test_len = len(input_str) prime_input = torch.autograd.Variable(char_tensor(input_str[0]).unsqueeze(0)) for p in range(len(prime_input) - 1): _, hidden = decoder(prime_input[:,p], hidden) hidden_matrix = np.copy(hidden.unsqueeze(0).data.numpy()) hidden_matrix = hidden_matrix.reshape((1,hidden_matrix.size)) inp = prime_input[:,-1] for p in range(1,test_len): output, hidden = decoder(inp, hidden) hidden_matrix = np.vstack((hidden_matrix, hidden[0,0,:].data.numpy())) # print hidden[0,0,:].data.numpy() # Sample from the network as a multinomial distribution # output_dist = output.data.view(-1).div(temperature).exp() # top_i = torch.multinomial(output_dist, 1)[0] # predicted_char = chars[top_i] inp = torch.autograd.Variable(char_tensor(input_str[p]).unsqueeze(0)) hidden_matrix = np.delete(hidden_matrix, 0, 0) df = pd.DataFrame(hidden_matrix) df.to_csv('paran-data-df.csv') np.savetxt("paren-data.csv", hidden_matrix, delimiter=",") np.savetxt("paren-data.tsv", hidden_matrix, delimiter="\t") # Run as standalone script
def write_batch(self, bsz, lang_h, ctx_h, temperature, max_words=100): """Generate sentenses for a batch simultaneously.""" eod = self.word_dict.get_idx('<selection>') # resize the language hidden and context hidden states lang_h = lang_h.squeeze(0).expand(bsz, lang_h.size(2)) ctx_h = ctx_h.squeeze(0).expand(bsz, ctx_h.size(2)) # start the conversation with 'YOU:' inpt = torch.LongTensor(bsz).fill_(self.word_dict.get_idx('YOU:')) inpt = Variable(self.to_device(inpt)) outs, lang_hs = [], [lang_h.unsqueeze(0)] done = set() # generate until max_words are generated, or all the dialogues are done for _ in range(max_words): # embed the input inpt_emb = torch.cat([self.word_encoder(inpt), ctx_h], 1) # pass it through the writer and get new hidden state lang_h = self.writer(inpt_emb, lang_h) out = self.decoder(lang_h) # tie weights with encoder scores = F.linear(out, self.word_encoder.weight).div(temperature) # subtract max to make softmax more stable scores.sub_(scores.max(1, keepdim=True)[0].expand(scores.size(0), scores.size(1))) out = torch.multinomial(scores.exp(), 1).squeeze(1) # save outputs and hidden states outs.append(out.unsqueeze(0)) lang_hs.append(lang_h.unsqueeze(0)) inpt = out data = out.data.cpu() # check if all the dialogues in the batch are done for i in range(bsz): if data[i] == eod: done.add(i) if len(done) == bsz: break # run it for the last word to get correct hidden states inpt_emb = torch.cat([self.word_encoder(inpt), ctx_h], 1) lang_h = self.writer(inpt_emb, lang_h) lang_hs.append(lang_h.unsqueeze(0)) # concatenate outputs and hidden states into single tensors return torch.cat(outs, 0), torch.cat(lang_hs, 0)
def show_examples_pytorch(model, es, rlut1, rlut2, embed2, mxlen, sample, prob_clip, max_examples, reverse): si = np.random.randint(0, len(es)) batch_dict = es[si] src_array = batch_dict['src'] tgt_array = batch_dict['dst'] src_len = batch_dict['src_len'] #src_array, tgt_array, src_len, _ = es[si] if max_examples > 0: max_examples = min(max_examples, src_array.size(0)) src_array = src_array[0:max_examples] tgt_array = tgt_array[0:max_examples] src_len = src_len[0:max_examples] GO = embed2.vocab['<GO>'] EOS = embed2.vocab['<EOS>'] # TODO: fix this, check for GPU first src_array = src_array.cuda() for src_len,src_i,tgt_i in zip(src_len, src_array, tgt_array): print('========================================================================') sent = lookup_sentence(rlut1, src_i.cpu().numpy(), reverse=reverse) print('[OP] %s' % sent) sent = lookup_sentence(rlut2, tgt_i) print('[Actual] %s' % sent) dst_i = torch.zeros(1, mxlen).long() #if use_gpu: dst_i = dst_i.cuda() next_value = GO src_i = src_i.view(1, -1) for j in range(mxlen): dst_i[0,j] = next_value probv = model((torch.autograd.Variable(src_i), torch.autograd.Variable(dst_i))) output = probv.squeeze()[j] if sample is False: _, next_value = torch.max(output, 0) next_value = int(next_value.data[0]) else: probs = output.data.exp() # This is going to zero out low prob. events so they are not # sampled from best, ids = probs.topk(prob_clip, 0, largest=True, sorted=True) probs.zero_() probs.index_copy_(0, ids, best) probs.div_(torch.sum(probs)) fv = torch.multinomial(probs, 1)[0] next_value = fv if next_value == EOS: break sent = lookup_sentence(rlut2, dst_i.squeeze()) print('Guess: %s' % sent) print('------------------------------------------------------------------------')
def sample(self, fc_feats, att_feats, opt={}): sample_max = opt.get('sample_max', 1) beam_size = opt.get('beam_size', 1) temperature = opt.get('temperature', 1.0) if beam_size > 1: return self.sample_beam(fc_feats, att_feats, opt) batch_size = fc_feats.size(0) state = self.init_hidden(batch_size) seq = [] seqLogprobs = [] for t in range(self.seq_length + 2): if t == 0: xt = self.img_embed(fc_feats) else: if t == 1: # input <bos> it = fc_feats.data.new(batch_size).long().zero_() elif sample_max: sampleLogprobs, it = torch.max(logprobs.data, 1) it = it.view(-1).long() else: if temperature == 1.0: prob_prev = torch.exp(logprobs.data).cpu() # fetch prev distribution: shape Nx(M+1) else: # scale logprobs by temperature prob_prev = torch.exp(torch.div(logprobs.data, temperature)).cpu() it = torch.multinomial(prob_prev, 1).cuda() sampleLogprobs = logprobs.gather(1, Variable(it, requires_grad=False)) # gather the logprobs at sampled positions it = it.view(-1).long() # and flatten indices for downstream processing xt = self.embed(Variable(it, requires_grad=False)) if t >= 2: # stop when all finished if t == 2: unfinished = it > 0 else: unfinished = unfinished * (it > 0) if unfinished.sum() == 0: break it = it * unfinished.type_as(it) seq.append(it) #seq[t] the input of t+2 time step seqLogprobs.append(sampleLogprobs.view(-1)) output, state = self.core(xt, state) logprobs = F.log_softmax(self.logit(output)) return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
def sample(self, fc_feats, att_feats, opt={}): sample_max = opt.get('sample_max', 1) beam_size = opt.get('beam_size', 1) temperature = opt.get('temperature', 1.0) if beam_size > 1: return self.sample_beam(fc_feats, att_feats, opt) batch_size = fc_feats.size(0) state = self.init_hidden(fc_feats) seq = [] seqLogprobs = [] for t in range(self.seq_length + 1): if t == 0: # input <bos> it = fc_feats.data.new(batch_size).long().zero_() elif sample_max: sampleLogprobs, it = torch.max(logprobs.data, 1) it = it.view(-1).long() else: if temperature == 1.0: prob_prev = torch.exp(logprobs.data).cpu() # fetch prev distribution: shape Nx(M+1) else: # scale logprobs by temperature prob_prev = torch.exp(torch.div(logprobs.data, temperature)).cpu() it = torch.multinomial(prob_prev, 1).cuda() sampleLogprobs = logprobs.gather(1, Variable(it, requires_grad=False)) # gather the logprobs at sampled positions it = it.view(-1).long() # and flatten indices for downstream processing xt = self.embed(Variable(it, requires_grad=False)) if t >= 1: # stop when all finished if t == 1: unfinished = it > 0 else: unfinished = unfinished * (it > 0) if unfinished.sum() == 0: break it = it * unfinished.type_as(it) seq.append(it) #seq[t] the input of t+2 time step seqLogprobs.append(sampleLogprobs.view(-1)) output, state = self.core(xt, fc_feats, att_feats, state) logprobs = F.log_softmax(self.logit(self.dropout(output))) return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
def forward(self, fc_feats, att_feats, seq): batch_size = fc_feats.size(0) state = self.init_hidden(batch_size) outputs = [] # embed fc and att feats fc_feats = self.fc_embed(fc_feats) _att_feats = self.att_embed(att_feats.view(-1, self.att_feat_size)) att_feats = _att_feats.view(*(att_feats.size()[:-1] + (self.rnn_size,))) # Project the attention feats first to reduce memory and computation comsumptions. p_att_feats = self.ctx2att(att_feats.view(-1, self.rnn_size)) p_att_feats = p_att_feats.view(*(att_feats.size()[:-1] + (self.att_hid_size,))) for i in range(seq.size(1) - 1): if self.training and i >= 1 and self.ss_prob > 0.0: # otherwiste no need to sample sample_prob = fc_feats.data.new(batch_size).uniform_(0, 1) sample_mask = sample_prob < self.ss_prob if sample_mask.sum() == 0: it = seq[:, i].clone() else: sample_ind = sample_mask.nonzero().view(-1) it = seq[:, i].data.clone() #prob_prev = torch.exp(outputs[-1].data.index_select(0, sample_ind)) # fetch prev distribution: shape Nx(M+1) #it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1)) prob_prev = torch.exp(outputs[-1].data) # fetch prev distribution: shape Nx(M+1) it.index_copy_(0, sample_ind, torch.multinomial(prob_prev, 1).view(-1).index_select(0, sample_ind)) it = Variable(it, requires_grad=False) else: it = seq[:, i].clone() # break if all the sequences end if i >= 1 and seq[:, i].data.sum() == 0: break xt = self.embed(it) output, state = self.core(xt, fc_feats, att_feats, p_att_feats, state) output = F.log_softmax(self.logit(output)) outputs.append(output) return torch.cat([_.unsqueeze(1) for _ in outputs], 1)
def sample(self, fc_feats, att_feats, opt={}): sample_max = opt.get('sample_max', 1) beam_size = opt.get('beam_size', 1) temperature = opt.get('temperature', 1.0) if beam_size > 1: return self.sample_beam(fc_feats, att_feats, opt) batch_size = fc_feats.size(0) state = self.init_hidden(batch_size) seq = [] seqLogprobs = [] for t in range(self.seq_length + 2): if t == 0: xt = self.img_embed(fc_feats) else: if t == 1: # input <bos> it = fc_feats.data.new(batch_size).long().zero_() elif sample_max: sampleLogprobs, it = torch.max(logprobs.data, 1) it = it.view(-1).long() else: if temperature == 1.0: prob_prev = torch.exp(logprobs.data).cpu() # fetch prev distribution: shape Nx(M+1) else: # scale logprobs by temperature prob_prev = torch.exp(torch.div(logprobs.data, temperature)).cpu() it = torch.multinomial(prob_prev, 1).cuda() sampleLogprobs = logprobs.gather(1, Variable(it, requires_grad=False)) # gather the logprobs at sampled positions it = it.view(-1).long() # and flatten indices for downstream processing xt = self.embed(Variable(it, requires_grad=False)) if t >= 2: # stop when all finished if t == 2: unfinished = it > 0 else: unfinished = unfinished * (it > 0) if unfinished.sum() == 0: break it = it * unfinished.type_as(it) seq.append(it) #seq[t] the input of t+2 time step seqLogprobs.append(sampleLogprobs.view(-1)) output, state = self.core(xt.unsqueeze(0), state) logprobs = F.log_softmax(self.logit(self.dropout(output.squeeze(0)))) return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
def sample(self, fc_feats, att_feats, opt={}): sample_max = opt.get('sample_max', 1) beam_size = opt.get('beam_size', 1) temperature = opt.get('temperature', 1.0) if beam_size > 1: return self.sample_beam(fc_feats, att_feats, opt) batch_size = fc_feats.size(0) state = self.init_hidden(batch_size) # Project the attention feats first to reduce memory and computation comsumptions. p_att_feats = self.ctx2att(att_feats.view(-1, self.att_feat_size)) p_att_feats = p_att_feats.view(*(att_feats.size()[:-1] + (self.att_hid_size,))) seq = [] seqLogprobs = [] for t in range(self.seq_length + 1): if t == 0: # input <bos> it = fc_feats.data.new(batch_size).long().zero_() elif sample_max: sampleLogprobs, it = torch.max(logprobs.data, 1) it = it.view(-1).long() else: if temperature == 1.0: prob_prev = torch.exp(logprobs.data).cpu() # fetch prev distribution: shape Nx(M+1) else: # scale logprobs by temperature prob_prev = torch.exp(torch.div(logprobs.data, temperature)).cpu() it = torch.multinomial(prob_prev, 1).cuda() sampleLogprobs = logprobs.gather(1, Variable(it, requires_grad=False)) # gather the logprobs at sampled positions it = it.view(-1).long() # and flatten indices for downstream processing xt = self.embed(Variable(it, requires_grad=False)) if t >= 1: # stop when all finished if t == 1: unfinished = it > 0 else: unfinished = unfinished * (it > 0) if unfinished.sum() == 0: break it = it * unfinished.type_as(it) seq.append(it) #seq[t] the input of t+2 time step seqLogprobs.append(sampleLogprobs.view(-1)) output, state = self.core(xt, fc_feats, att_feats, p_att_feats, state) logprobs = F.log_softmax(self.logit(output)) return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)