我们从Python开源项目中,提取了以下12个代码示例,用于说明如何使用chainer.functions.stack()。
def predict(self, xs): """ batch: list of splitted sentences """ batchsize = len(xs) fs = [self.extractor.process(x)[:2] for x in xs] ws, cs = concat_examples(fs, padding=IGNORE) cat_ys, dep_ys = self.forward(ws, cs) cat_ys = F.transpose(F.stack(cat_ys, 2), (0, 2, 1)) # dep_ys = F.transpose(F.stack(dep_ys, 2), (0, 2, 1)) cat_ys = [F.log_softmax( F.reshape(y, (y.shape[1], -1))[1:len(x) + 1]).data for x, y in \ zip(xs, F.split_axis(cat_ys, batchsize, 0))] dep_ys = [F.log_softmax(y[1:len(x) + 1, :len(x) + 1]).data \ for x, y in zip(xs, dep_ys)] assert len(cat_ys) == len(dep_ys) return zip(cat_ys, dep_ys)
def forward(self, ws, cs): batchsize, length, max_word_len = cs.shape ws = self.emb_word(ws) # (batch, length, word_dim) cs = F.reshape( F.max_pooling_2d( self.conv_char( F.reshape( self.emb_char(cs), (batchsize * length, 1, max_word_len, 50))), (max_word_len, 1)), (batchsize, length, self.char_dim)) hs = F.transpose(F.concat([ws, cs], 2), (1, 0, 2)) hs = F.dropout(hs, self.dropout_ratio, train=self.train) hs = F.split_axis(hs, length, 0) hs_f = [] hs_b = [] self._init_state() for h_in_f, h_in_b in zip(hs, reversed(hs)): h_f = self.lstm_f2(self.lstm_f1(F.reshape(h_in_f, (batchsize, -1)))) hs_f.append(h_f) h_b = self.lstm_b2(self.lstm_b1(F.reshape(h_in_b, (batchsize, -1)))) hs_b.append(h_b) hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, reversed(hs_b))] cat_ys = [self.linear_cat2(F.dropout( F.elu(self.linear_cat1(h)), 0.5, train=self.train)) for h in hs] hs = [F.reshape(h, (length, -1)) for h in \ F.split_axis(F.transpose(F.stack(hs, 2), (0, 2, 1)), batchsize, 0)] dep_ys = [self.biaffine( F.relu(F.dropout(self.linear_dep(h), 0.32, train=self.train)), F.relu(F.dropout(self.linear_head(h), 0.32, train=self.train))) for h in hs] return cat_ys, dep_ys
def getAllLSTMStates(self): lstm_state_list_out = [0] * len(self) * 2 for z in six.moves.range(len(self)): lstm_state_list_out[2 * z] = self[z].c lstm_state_list_out[2 * z + 1] = self[z].h # ???????????stack??????? Chainer Variable????? return chaFunc.stack(lstm_state_list_out) # ?????????getAllLSTMStates???????????????????
def __call__(self, hx, cx, xs, flag_train, args): if hx is None: hx = self.init_hx(xs) if cx is None: cx = self.init_hx(xs) # hx, cx ? (layer?, minibatch???????)?tensor # xs? (???, minibatch???????)?tensor # Note: chaFunc.n_step_lstm() ?????????dropout????? if args.chainer_version_check[0] == 2: hy, cy, ys = chaFunc.n_step_lstm( self.n_layers, self.dropout_rate, hx, cx, self.ws, self.bs, xs) else: hy, cy, ys = chaFunc.n_step_lstm( self.n_layers, self.dropout_rate, hx, cx, self.ws, self.bs, xs, train=flag_train, use_cudnn=self.use_cudnn) # hy, cy ? (layer?, minibatch???????) ????? # ys???????????????????? # ???? (minibatch???????) # ??????????stack???????????chainer.Variable??? # (???, minibatch???????)?tensor hlist = chaFunc.stack(ys) return hy, cy, hlist # LSTM???????????????????????????????????
def read(self, h): #M_key = F.swapaxes(F.stack(self.key_buff, axis=0), axis1=0, axis2=1) # (B, M, m) M_key = F.stack(self.key_buff, axis=1) # (B, M, m) self.p = F.softmax(F.reshape(F.batch_matmul(M_key, h, transa=False, transb=False), (h.shape[0], M_key.shape[1]))) # (B, M) #p = F.reshape(p, (h.shape[0], 1, M_key.shape[1])) # (B, 1, M) #print("p", p.shape) #M_val = F.swapaxes(F.stack(self.val_buff, axis=0), axis1=0, axis2=1) # (B, M, m) M_val = F.stack(self.val_buff, axis=1) # (B, M, m) #print("M_val", M_val.shape) o = F.batch_matmul(self.p, M_val, transa=True, transb=False) # (B, 1, m) o = F.reshape(o, (o.shape[0], o.shape[2])) # (B, m) #print("o", o.shape) return o, self.p
def __call__(self, x, train=True): h_x = self.embed(x) h_x = [h for h in h_x] self.nstep_lstm.reset_state() h_x = self.nstep_lstm(h_x, train) h_x = [h[-1] for h in h_x] h_x = F.stack(h_x, 0) return self.l1(F.dropout(h_x, train=train))
def __call__(self, x): # Obtain parameters for q(z|x) encoding_time = time.time() self.encode(x) encoding_time = float(time.time() - encoding_time) decoding_time_average = 0. xp = cuda.cupy self.importance_weights = 0 self.w_holder = [] self.kl = 0 self.logp = 0 for j in xrange(self.num_zsamples): # Sample z ~ q(z|x) z = F.gaussian(self.qmu, self.qln_var) # Compute log q(z|x) encoder_log = gaussian_logp(z, self.qmu, self.qln_var) # Obtain parameters for p(x|z) decoding_time = time.time() self.decode(z) decoding_time = time.time() - decoding_time decoding_time_average += decoding_time # Compute log p(x|z) decoder_log = bernoulli_logp(x, self.p_ber_prob_logit) # Compute log p(z). prior_log = gaussian_logp0(z) # Store the latest log weight' current_temperature = min(self.temperature['value'],1.0) self.w_holder.append(decoder_log + current_temperature*(prior_log - encoder_log)) # Store the KL and Logp equivalents. They are not used for computation but for recording and reporting. self.kl += (encoder_log-prior_log) self.logp += (decoder_log) self.temperature['value'] += self.temperature['increment'] # Compute w' for this sample (batch) logps = F.stack(self.w_holder) self.obj_batch = F.logsumexp(logps, axis=0) - np.log(self.num_zsamples) self.kl /= self.num_zsamples self.logp /= self.num_zsamples decoding_time_average /= self.num_zsamples batch_size = self.obj_batch.shape[0] self.obj = -F.sum(self.obj_batch)/batch_size self.timing_info = np.array([encoding_time,decoding_time_average]) return self.obj
def __call__(self, x): # Obtain parameters for q(z|x) encoding_time = time.time() self.encode(x) encoding_time = float(time.time() - encoding_time) decoding_time_average = 0. xp = cuda.cupy self.importance_weights = 0 self.w_holder = [] self.kl = 0 self.logp = 0 for j in xrange(self.num_zsamples): # Sample z ~ q(z|x) z = F.gaussian(self.qmu, self.qln_var) # Compute log q(z|x) encoder_log = gaussian_logp(z, self.qmu, self.qln_var) # Obtain parameters for p(x|z) decoding_time = time.time() self.decode(z) decoding_time = time.time() - decoding_time decoding_time_average += decoding_time # Compute log p(x|z) decoder_log = gaussian_logp(x, self.pmu, self.pln_var) # Compute log p(z). The odd notation being used is to supply a mean of 0 and covariance of 1 prior_log = gaussian_logp(z, self.qmu*0, self.qln_var/self.qln_var) # Store the latest log weight' current_temperature = min(self.temperature['value'],1.0) self.w_holder.append(decoder_log + current_temperature*(prior_log - encoder_log)) # Store the KL and Logp equivalents. They are not used for computation but for recording and reporting. self.kl += (encoder_log-prior_log) self.logp += (decoder_log) self.temperature['value'] += self.temperature['increment'] # Compute w' for this sample (batch) logps = F.stack(self.w_holder) self.obj_batch = F.logsumexp(logps, axis=0) - np.log(self.num_zsamples) self.kl /= self.num_zsamples self.logp /= self.num_zsamples decoding_time_average /= self.num_zsamples batch_size = self.obj_batch.shape[0] self.obj = -F.sum(self.obj_batch)/batch_size self.timing_info = np.array([encoding_time,decoding_time_average]) return self.obj
def beam_search(dec,state,y,data,beam_width,mydict_inv): beam_width=beam_width xp=cuda.cupy batchsize=data.shape[0] vocab_size=len(mydict_inv) topk=20 route = np.zeros((batchsize,beam_width,50)).astype(np.int32) for j in range(50): if j == 0: y = Variable(xp.array(np.argmax(y.data.get(), axis=1)).astype(xp.int32)) state,y = dec(y, state, train=False) h=state['h1'].data c=state['c1'].data h=xp.tile(h.reshape(batchsize,1,-1), (1,beam_width,1)) c=xp.tile(c.reshape(batchsize,1,-1), (1,beam_width,1)) ptr=F.log_softmax(y).data.get() pred_total_city = np.argsort(ptr)[:,::-1][:,:beam_width] pred_total_score = np.sort(ptr)[:,::-1][:,:beam_width] route[:,:,j] = pred_total_city pred_total_city=pred_total_city.reshape(batchsize,beam_width,1) else: pred_next_score=np.zeros((batchsize,beam_width,topk)) pred_next_city=np.zeros((batchsize,beam_width,topk)).astype(np.int32) score2idx=np.zeros((batchsize,beam_width,topk)).astype(np.int32) for b in range(beam_width): state={'c1':Variable(c[:,b,:]), 'h1':Variable(h[:,b,:])} cur_city = xp.array([pred_total_city[i,b,j-1] for i in range(batchsize)]).astype(xp.int32) state,y = dec(cur_city,state, train=False) h[:,b,:]=state['h1'].data c[:,b,:]=state['c1'].data ptr=F.log_softmax(y).data.get() pred_next_score[:,b,:]=np.sort(ptr, axis=1)[:,::-1][:,:topk] pred_next_city[:,b,:]=np.argsort(ptr, axis=1)[:,::-1][:,:topk] h=F.stack([h for i in range(topk)], axis=2).data c=F.stack([c for i in range(topk)], axis=2).data pred_total_city = np.tile(route[:,:,:j],(1,1,topk)).reshape(batchsize,beam_width,topk,j) pred_next_city = pred_next_city.reshape(batchsize,beam_width,topk,1) pred_total_city = np.concatenate((pred_total_city,pred_next_city),axis=3) pred_total_score = np.tile(pred_total_score.reshape(batchsize,beam_width,1),(1,1,topk)).reshape(batchsize,beam_width,topk,1) pred_next_score = pred_next_score.reshape(batchsize,beam_width,topk,1) pred_total_score += pred_next_score idx = pred_total_score.reshape(batchsize,beam_width * topk).argsort(axis=1)[:,::-1][:,:beam_width] pred_total_city = pred_total_city[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,j+1) pred_total_score = pred_total_score[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,1) h = h[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,-1) c = c[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,-1) route[:,:,:j+1] =pred_total_city if (pred_total_city[:,:,j] == 15).all(): break return route[:,0,:j+1].tolist()
def encodeSentenceFWD(self, train_mode, sentence, args, dropout_rate): if args.gpu_enc != args.gpu_dec: # enc?dec??GPU??? chainer.cuda.get_device(args.gpu_enc).use() encLen = len(sentence) # ?? cMBSize = len(sentence[0]) # minibatch size # ?????embedding??? ?????????? encEmbList = self.getEncoderInputEmbeddings(sentence, args) flag_train = (train_mode > 0) lstmVars = [0] * self.n_layers * 2 if self.flag_merge_encfwbw == 0: # fw?bw?????????????? hyf, cyf, fwHout = self.model.encLSTM_f( None, None, encEmbList, flag_train, args) # ??? hyb, cyb, bkHout = self.model.encLSTM_b( None, None, encEmbList[::-1], flag_train, args) # ??? for z in six.moves.range(self.n_layers): lstmVars[2 * z] = cyf[z] + cyb[z] lstmVars[2 * z + 1] = hyf[z] + hyb[z] elif self.flag_merge_encfwbw == 1: # fw?bw???????? sp = (cMBSize, self.hDim) for z in six.moves.range(self.n_layers): if z == 0: # ??? embedding??? biH = encEmbList else: # ????? ???????? # ????????bkHout???????????? biH = fwHout + bkHout[::-1] # z????? hyf, cyf, fwHout = self.model.encLSTM_f( z, biH, flag_train, dropout_rate, args) # z?????? hyb, cyb, bkHout = self.model.encLSTM_b( z, biH[::-1], flag_train, dropout_rate, args) # ?????????????????????????? # ??????? lstmVars[2 * z] = chaFunc.reshape(cyf + cyb, sp) lstmVars[2 * z + 1] = chaFunc.reshape(hyf + hyb, sp) else: assert 0, "ERROR" # ????? if self.flag_enc_boseos == 0: # default # fwHout?[:,]??????????? biHiddenStack = fwHout[:, ] + bkHout[::-1] elif self.flag_enc_boseos == 1: bkHout2 = bkHout[::-1] # ????? biHiddenStack = fwHout[1:encLen - 1, ] + bkHout2[1:encLen - 1, ] # BOS, EOS?????? TODO ??????0?????????? encLen -= 2 else: assert 0, "ERROR" # (enc????, minibatch??, ??????) # => (minibatch??, enc????, ??????)??? biHiddenStackSW01 = chaFunc.swapaxes(biHiddenStack, 0, 1) # ?LSTM???????????decoder?LSTM???????? lstmVars = chaFunc.stack(lstmVars) # encoder????encInfoObject??????? retO = self.encInfoObject(biHiddenStackSW01, lstmVars, encLen, cMBSize) return retO