我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用chainer.functions.softmax()。
def mellowmax(values, omega=1., axis=1): """Mellowmax function. This is a kind of softmax function that is, unlike the Boltzmann softmax, non-expansion. See: http://arxiv.org/abs/1612.05628 Args: values (Variable or ndarray): Input values. Mellowmax is taken along the second axis. omega (float): Parameter of mellowmax. axis (int): Axis along which mellowmax is taken. Returns: outputs (Variable) """ n = values.shape[axis] return (F.logsumexp(omega * values, axis=axis) - np.log(n)) / omega
def get_index_a(_model): _model.predictor.reset_state() _sentence_index_a = [] index = BOS_INDEX while index != EOS_INDEX: y = _model.predictor(xp.array([index], dtype=xp.int32)) probability = F.softmax(y) probability.data[0] /= sum(probability.data[0]) try: #??????????????????? #index = np.argmax(probability.data[0]) index = xp.random.choice(range(len(probability.data[0])), p=probability.data[0]) if index!=EOS_INDEX: #??<EOS>??????? _sentence_index_a.append(index) except Exception as e: print('probability error') break return _sentence_index_a
def get_next_word_prob(_model, word, next_word, needModelStateReset=False): if needModelStateReset: _model.predictor.reset_state() _sentence_index_a = [] index = vocab[word] while index != EOS_INDEX: y = _model.predictor(xp.array([index], dtype=xp.int32)) probability = F.softmax(y) next_probs = probability.data[0] m = np.argsort(probability.data[0]) break # In this case, the input could be an unknow word. if next_word not in vocab: return (0.0, 0.0) next_index = vocab[next_word] k, = np.where(m == next_index) order_prob = k[0] / len(m) next_prob = next_probs[k[0]] return (order_prob, next_prob, k[0])
def get_next_word_prob(_model, word, next_word, needModelStateReset=False): if needModelStateReset: _model.predictor.reset_state() _sentence_index_a = [] index = vocab[word] while index != EOS_INDEX: y = _model.predictor(xp.array([index], dtype=xp.int32)) probability = F.softmax(y) next_probs = probability.data[0] m = np.argsort(probability.data[0]) break # In this case, the input could be an unknow word. if next_word not in vocab: return (0.0, 0.0,-1) next_index = vocab[next_word] k, = np.where(m == next_index) order_prob = k[0] / len(m) next_prob = next_probs[k[0]] return (order_prob, next_prob, k[0])
def _context(self, p, fb_mat, fbe_mat): batch_size, source_length, _ = fb_mat.data.shape # {pe,e}_mat: shape = [batch * srclen, atten] pe_mat = F.reshape( F.broadcast_to( F.expand_dims(self.p_e(p), 1), [batch_size, source_length, self.atten_size]), [batch_size * source_length, self.atten_size]) e_mat = F.tanh(fbe_mat + pe_mat) # a_mat: shape = [batch, srclen] a_mat = F.softmax(F.reshape(self.e_a(e_mat), [batch_size, source_length])) # q: shape = [batch, 2 * hidden] q = F.reshape( F.batch_matmul(a_mat, fb_mat, transa=True), [batch_size, 2 * self.hidden_size]) return q
def attend(self, query, key, value, mask, minfs=None): """ Input shapes: q=(b, units, dec_l), k=(b, units, enc_l), v=(b, units, dec_l, enc_l), m=(b, dec_l, enc_l) """ # Calculate Attention Scores with Mask for Zero-padded Areas pre_a = F.batch_matmul(query, key, transa=True) # (b, dec_l, enc_l) minfs = self.xp.full(pre_a.shape, -np.inf, pre_a.dtype) \ if minfs is None else minfs pre_a = F.where(mask, pre_a, minfs) a = F.softmax(pre_a, axis=2) # if values in axis=2 are all -inf, they become nan. thus do re-mask. a = F.where(self.xp.isnan(a.data), self.xp.zeros(a.shape, dtype=a.dtype), a) reshaped_a = a[:, None] # (b, 1, dec_xl, enc_l) # Calculate Weighted Sum pre_c = F.broadcast_to(reshaped_a, value.shape) * value c = F.sum(pre_c, axis=3, keepdims=True) # (b, units, dec_xl, 1) return c
def test_decode(self, start, eos, limit): output = [] y = chainer.Variable(np.array([[start]], dtype=np.int32)) for i in range(limit): decode0 = self.output_embed(y) decode1 = self.decode1(decode0) decode2 = self.decode2(decode1) z = self.output(decode2) prob = F.softmax(z) index = np.argmax(cuda.to_cpu(prob.data)) if index == eos: break output.append(index) y = chainer.Variable(np.array([index], dtype=np.int32)) return output
def test(model, xs, ts, uss=None): model.reset_state() tags = model([Variable( np.array([x], dtype=np.int32) ) for x in xs]) zss = [] y_mat = np.zeros((2, 2)) zs_mat = tuple( np.zeros((clf.n_output, clf.n_output)) for clf in model.tagger.classifiers ) for t, (y, zs) in zip(ts, tags): y_mat[t, int(cf.sigmoid(y).data[0, 0] > 0.5)] += 1.0 if t: zss.append(zs) if uss: assert len(uss) == len(zss) for us, zs in zip(uss, zss): for m, u, z in zip(zs_mat, us, zs): m[u, cf.softmax(z).data.argmax(1)[0]] += 1 return y_mat, zs_mat
def generate(model, xs): model.reset_state() tags = model([Variable( np.array([x], dtype=np.int32) ) for x in xs]) buf = bytearray() for x, (y, zs) in zip(xs, tags): buf.append(x) if cf.sigmoid(y).data[0, 0] > 0.5: yield ( buf.decode('utf-8', 'replace'), tuple( cf.softmax(z).data.argmax(1)[0] for z in zs ) ) buf = bytearray()
def entropy_filter(self, x, b, ent_T): xp = cuda.get_array_module(b) eb = entropy(F.softmax(b))/np.log(b.shape[1]) eb.to_cpu() if hasattr(eb.data,'get'): with cuda.get_device(eb.data): exited = eb.data < ent_T exited = exited.get() else: exited = eb.data < ent_T y_exit = [] y_cont = [] for i,idx in enumerate(exited): if idx: y_exit.append(b[i:i+1]) else: y_cont.append(x[i:i+1]) if len(y_exit) > 0: y_exit = F.vstack(y_exit) if len(y_cont) > 0: y_cont = F.vstack(y_cont) return y_exit,y_cont,exited
def to_function(self): if self.nonlinearity.lower() == "clipped_relu": return clipped_relu() if self.nonlinearity.lower() == "crelu": return crelu() if self.nonlinearity.lower() == "elu": return elu() if self.nonlinearity.lower() == "hard_sigmoid": return hard_sigmoid() if self.nonlinearity.lower() == "leaky_relu": return leaky_relu() if self.nonlinearity.lower() == "relu": return relu() if self.nonlinearity.lower() == "sigmoid": return sigmoid() if self.nonlinearity.lower() == "softmax": return softmax() if self.nonlinearity.lower() == "softplus": return softplus() if self.nonlinearity.lower() == "tanh": return tanh() if self.nonlinearity.lower() == "bst": return bst() raise NotImplementedError()
def __call__(self, y, a, ht, y_lex): y_dict = F.squeeze(F.batch_matmul(y_lex, a, transa=True), axis=2) return (y + F.log(y_dict + self.alpha)) #class LinearInterpolationLexicon(chainer.Chain): # def __init__(self, hidden_size): # super(LinearInterpolationLexicon, self).__init__( # perceptron = chainer.links.Linear(hidden_size, 1) # ) # # def __call__(self, y, a, ht, y_lex): # y = F.softmax(y) # y_dict = F.squeeze(F.batch_matmul(y_lex, a, transa=True), axis=2) # gamma = F.broadcast_to(F.sigmoid(self.perceptron(ht)), y_dict.data.shape) # return (gamma * y_dict + (1-gamma) * y) #
def classify(self,x=None,train=False): if x is None: x=Tensor.context image = x.value self.x_batch = image xp = Deel.xp x_data = xp.asarray(self.x_batch,dtype=Deel.xp.float32) x = chainer.Variable(x_data, volatile='off' if train else 'on') score = self.forward(x) score = F.softmax(score) score = Variable(score.data) #Unchain t = ChainerTensor(score) t.owner=self t.use() return t
def classify(self,x=None): if x is None: x=Tensor.context if not isinstance(x,ImageTensor): x=Input(x) image = x.value self.x_batch = image xp = Deel.xp x_data = xp.asarray(self.x_batch) x = chainer.Variable(x_data, volatile=True) score = self.predict(x) score = F.softmax(score) score = Variable(score.data) #Unchain t = ChainerTensor(score) t.owner=self t.use() return t
def classify(self,x=None): if x is None: x=Tensor.context image = x.value self.x_batch = image xp = Deel.xp x_data = xp.asarray(self.x_batch) x = chainer.Variable(x_data, volatile=True) score = self.forward(x) score = F.softmax(score) score = Variable(score.data) #Unchain t = ChainerTensor(score) t.owner=self t.use() return t
def __call__(self, X): # generate random values R = np.random.randn(X.data.shape[0], self.rand_sz) R = Variable(R.astype("float32")) # attach random to the inputs h = F.concat([R, X]) #h = R h = self.ipt(h) #h = F.dropout(h) y = self.out(h) # prior knowledge: environment observation is one - hot vector obs = F.softmax(y[:, :-2]) # prior knowledge: reward is in [0,1] rew = F.sigmoid(y[:,[-2]]) fin = F.sigmoid(y[:, [-1]]) y = F.concat([obs, rew, fin]) return y
def forward(self, batch): label_onehot_batch = [self._onehot_encode(pair[1]) for pair in batch] input_img, ground_truth = self.converter(batch, self.device) ground_truth_onehot = self.converter(label_onehot_batch, self.device) input_img = Variable(input_img, volatile=not self.gen.train) ground_truth = Variable(ground_truth, volatile=not self.gen.train) ground_truth_onehot = Variable(ground_truth_onehot, volatile=not self.gen.train) x_real = self._make_dis_input(input_img, ground_truth_onehot) y_real = self.dis(x_real) pred_label_map = self.gen(input_img) x_fake = self._make_dis_input(input_img, F.softmax(pred_label_map)) y_fake = self.dis(x_fake) self.y_fake = y_fake self.y_real = y_real self.pred_label_map = pred_label_map self.ground_truth = ground_truth
def get_boxes(ans, block_x, block_y, bb_num,class_num,th, im_w,im_h,biases): sorted_boxes = [] for by in range(block_y): for bx in range(block_x): for j in range(bb_num): box = ans[by,bx,j,0:4] conf = sigmoid(ans[by,bx,j,4]) probs = softmax(ans[by,bx,j,5:(5+class_num)])[0] p_class = probs*conf if np.max(p_class)<th: continue class_id = np.argmax(p_class) x = (bx+sigmoid(box[0]))*(im_w/float(block_x)) y = (by+sigmoid(box[1]))*(im_h/float(block_y)) w = np.exp(box[2])*biases[j][0]*(im_w/float(block_x)) h = np.exp(box[3])*biases[j][1]*(im_h/float(block_y)) b = Box(x,y,w,h) sorted_boxes.append([b,j,class_id, max(p_class)]) return sorted_boxes
def to_function(self): if self.nonlinearity.lower() == "clipped_relu": return clipped_relu() if self.nonlinearity.lower() == "crelu": return crelu() if self.nonlinearity.lower() == "elu": return elu() if self.nonlinearity.lower() == "hard_sigmoid": return hard_sigmoid() if self.nonlinearity.lower() == "leaky_relu": return leaky_relu() if self.nonlinearity.lower() == "relu": return relu() if self.nonlinearity.lower() == "sigmoid": return sigmoid() if self.nonlinearity.lower() == "softmax": return softmax() if self.nonlinearity.lower() == "softplus": return softplus() if self.nonlinearity.lower() == "tanh": return tanh() raise NotImplementedError()
def generate_and_save_samples(sample_fn, length, count, dir, rate, levels): def save_samples(data): data = (data * np.reshape(np.arange(levels) / (levels-1), [levels, 1, 1])).sum( axis=1, keepdims=True) value = np.iinfo(np.int16).max audio = (utils.inverse_mulaw(data * 2 - 1) * value).astype(np.int16) for idx, sample in enumerate(audio): filename = os.path.join(dir, 'sample_{}.wav'.format(idx)) wavfile.write(filename, rate, np.squeeze(sample)) samples = chainer.Variable( chainer.cuda.cupy.zeros([count, levels, 1, length], dtype='float32')) one_hot_ref = chainer.cuda.cupy.eye(levels).astype('float32') with tqdm.tqdm(total=length) as bar: for i in range(length): probs = F.softmax(sample_fn(samples))[:, :, 0, 0, i] samples.data[:, :, 0, i] = one_hot_ref[utils.sample_from(probs.data.get())] bar.update() samples.to_cpu() save_samples(samples.data)
def generate_and_save_samples(sample_fn, height, width, channels, count, filename): def save_images(images): images = images.reshape((count, count, channels, height, width)) images = images.transpose(1, 3, 0, 4, 2) images = images.reshape((height * count, width * count, channels)) scipy.misc.toimage(images, cmin=0.0, cmax=255.0).save(filename) samples = chainer.Variable( chainer.cuda.cupy.zeros((count ** 2, channels, height, width), dtype='float32')) with tqdm.tqdm(total=height*width*channels) as bar: for i in range(height): for j in range(width): for k in range(channels): probs = F.softmax(sample_fn(samples))[:, :, k, i, j] _, level_count = probs.shape samples.data[:, k, i, j] = chainer.cuda.to_gpu(utils.sample_from(probs.data.get()) / (level_count - 1)) bar.update() samples.to_cpu() save_images(samples.data * 255.0)
def compute_loss(self, dist_pos, dist_neg, margin_factor=1.0): """ Use Softmax on the distances as a ratio measure and compare it to a vector of [[0, 0, ...] [1, 1, ...]] (Mean Squared Error). This function also computes the accuracy and the 'max_distance'. """ # apply margin factor and take square root dist = sqrt(F.concat((dist_pos * margin_factor, dist_neg))) sm = F.softmax(dist) self.loss = mse_zero_one(sm) self.accuracy = self._accuracy(dist_pos, dist_neg) self.mean_diff = self._mean_difference(dist_pos, dist_neg) self.max_diff = self._max_difference(dist_pos, dist_neg) return self.loss
def generate(net, image_model, image_path): feature = image_model.feature(image_path) net.initialize(feature) candidates = [(net, [bos], 0)] for i in range(max_length): next_candidates = [] for prev_net, tokens, likelihood in candidates: if tokens[-1] == eos: next_candidates.append((None, tokens, likelihood)) continue net = prev_net.copy() x = xp.asarray([tokens[-1]]).astype(np.int32) y = F.softmax(net(x)) token_likelihood = np.log(cuda.to_cpu(y.data[0])) order = token_likelihood.argsort()[-beam_width:][::-1] next_candidates.extend([(net, tokens + [i], likelihood + token_likelihood[i]) for i in order]) candidates = sorted(next_candidates, key=lambda x: -x[2])[:beam_width] if all([candidate[1][-1] == eos for candidate in candidates]): break return [candidate[1] for candidate in candidates]
def compute_fisher(self, dataset): fisher_accum_list = [ np.zeros(var[1].shape) for var in self.variable_list] for _ in range(self.num_samples): x, _ = dataset[np.random.randint(len(dataset))] y = self.predictor(np.array([x])) prob_list = F.softmax(y)[0].data class_index = np.random.choice(len(prob_list), p=prob_list) loss = F.log_softmax(y)[0, class_index] self.cleargrads() loss.backward() for i in range(len(self.variable_list)): fisher_accum_list[i] += np.square( self.variable_list[i][1].grad) self.fisher_list = [ F_accum / self.num_samples for F_accum in fisher_accum_list] return self.fisher_list
def sample_ax_label(self, a, x, argmax=True, test=False): a = self.to_variable(a) x = self.to_variable(x) batchsize = x.data.shape[0] y_distribution = self.q_y_ax(a, x, test=test, softmax=True).data n_labels = y_distribution.shape[1] if self.gpu_enabled: y_distribution = cuda.to_cpu(y_distribution) if argmax: sampled_label = np.argmax(y_distribution, axis=1) else: sampled_label = np.zeros((batchsize,), dtype=np.int32) labels = np.arange(n_labels) for b in xrange(batchsize): label_id = np.random.choice(labels, p=y_distribution[b]) sampled_label[b] = 1 return sampled_label
def sample_x_y(self, x, argmax=False, test=False): batchsize = x.data.shape[0] y_distribution = self.encoder_x_y(x, test=test, softmax=True).data n_labels = y_distribution.shape[1] if self.gpu: y_distribution = cuda.to_cpu(y_distribution) sampled_y = np.zeros((batchsize, n_labels), dtype=np.float32) if argmax: args = np.argmax(y_distribution, axis=1) for b in xrange(batchsize): sampled_y[b, args[b]] = 1 else: for b in xrange(batchsize): label_id = np.random.choice(np.arange(n_labels), p=y_distribution[b]) sampled_y[b, label_id] = 1 sampled_y = Variable(sampled_y) if self.gpu: sampled_y.to_gpu() return sampled_y
def encode(self, x_input, x_query, answer): m = self.encode_input(x_input) u = self.encode_query(x_query) # print "m.data.shape", m.data.shape # print "u.data.shape", u.data.shape mu = functions.matmul(m, u, transb=True) # print "mu.data.shape", mu.data.shape # print "mu.data", mu.data p = functions.softmax(mu) c = self.encode_output(x_input) # print "p.data.shape:", p.data.shape # print "c.data.shape:", c.data.shape # print "functions.swapaxes(c ,2, 1):", functions.swapaxes(c ,2, 1).data.shape o = functions.matmul(functions.swapaxes(c ,1, 0), p) # (2, 50, 1) o = functions.swapaxes(o ,1, 0) # (2, 50) # print "u.data.shape:", u.data.shape # print "o.data.shape:", o.data.shape # print "u.data.shape:", u.data # print "o.data.shape:", o.data # print (u+o).data.shape predict = self.W(u + o) # print predict.data.shape loss = functions.softmax_cross_entropy(predict, answer) return loss
def use_model(model): data = np.loadtxt('data/human_test_1_32_32_32.txt').reshape(1, 1, 32, 32, 32).astype(np.float32) y= model.fwd(data) A= F.softmax(y).data print(A.argmax(axis=1)) print(A[0,A.argmax(axis=1)])
def __call__(self, x): """Return a softmax probability distribution over predicted classes.""" # Convolutional layers hs, _ = self.feature_map_activations(x) h = hs[-1] # Fully connected layers h = F.dropout(F.relu(self.fc6(h))) h = F.dropout(F.relu(self.fc7(h))) h = self.fc8(h) return F.softmax(h)
def predict(self, input_x): if isinstance(input_x, chainer.Variable): device = cuda.get_device(input_x.data) else: device = cuda.get_device(input_x) xp = self.predictor.xp with device: output = self.predictor(input_x) batch_size, input_channel, input_h, input_w = input_x.shape batch_size, _, grid_h, grid_w = output.shape x, y, w, h, conf, prob = F.split_axis(F.reshape(output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes+5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2) x = F.sigmoid(x) y = F.sigmoid(y) conf = F.sigmoid(conf) prob = F.transpose(prob, (0, 2, 1, 3, 4)) prob = F.softmax(prob) prob = F.transpose(prob, (0, 2, 1, 3, 4)) # convert coordinates to those on the image x_shift = xp.asarray(np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape)) y_shift = xp.asarray(np.broadcast_to(np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape)) w_anchor = xp.asarray(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape)) h_anchor = xp.asarray(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape)) box_x = (x + x_shift) / grid_w box_y = (y + y_shift) / grid_h box_w = F.exp(w) * w_anchor / grid_w box_h = F.exp(h) * h_anchor / grid_h return box_x, box_y, box_w, box_h, conf, prob
def listnet(x, t): """ The Top-1 approximated ListNet loss as in Cao et al (2006), Learning to Rank: From Pairwise Approach to Listwise Approach :param x: The activation of the previous layer :param t: The target labels :return: The loss """ # ListNet top-1 reduces to a softmax and simple cross entropy st = F.softmax(t, axis=0) sx = F.softmax(x, axis=0) return -F.mean(st * F.log(sx))
def select_action(self, t, greedy_action_func, action_value=None): assert action_value is not None assert isinstance(action_value, chainerrl.action_value.DiscreteActionValue) n_actions = action_value.q_values.shape[1] with chainer.no_backprop_mode(): probs = chainer.cuda.to_cpu( F.softmax(action_value.q_values / self.T).data).ravel() return np.random.choice(np.arange(n_actions), p=probs)
def all_prob(self): with chainer.force_backprop_mode(): if self.min_prob > 0: return (F.softmax(self.beta * self.logits) * (1 - self.min_prob * self.n)) + self.min_prob else: return F.softmax(self.beta * self.logits)
def predict(self,batch,randFlag): t = [[bi] for bi in [1] * batch] t = self.makeEmbedBatch(t) ys_d = self.dec(t, train=False) ys_w = [self.h2w(y) for y in ys_d] name_arr_arr = [] if randFlag: t = [predictRandom(F.softmax(y_each)) for y_each in ys_w] else: t = [y_each.data[-1].argmax(0) for y_each in ys_w] name_arr_arr.append(t) t = [self.embed(xp.array([t_each], dtype=xp.int32)) for t_each in t] count_len = 0 while count_len < 50: ys_d = self.dec(t, train=False) ys_w = [self.h2w(y) for y in ys_d] if randFlag: t = [predictRandom(F.softmax(y_each)) for y_each in ys_w] else: t = [y_each.data[-1].argmax(0) for y_each in ys_w] name_arr_arr.append(t) t = [self.embed(xp.array([t_each], dtype=xp.int32)) for t_each in t] count_len += 1 tenti = xp.array(name_arr_arr).T for name in tenti: name = [self.vocab.itos(nint) for nint in name] if "</s>" in name: print(" Gen:{}".format("".join(name[:name.index("</s>")])))
def weighted_cross_entropy(p,t,weight_arr,sec_arr,weigh_flag=True): print("p:{}".format(p.data.shape)) b = np.zeros(p.shape,dtype=np.float32) b[np.arange(p.shape[0]), t] = 1 soft_arr = F.softmax(p) log_arr = -F.log(soft_arr) xent = b*log_arr # # print("sec_arr:{}".format(sec_arr)) # print("xent_shape:{}".format(xent.data.shape)) xent = F.split_axis(xent,sec_arr,axis=0) print([xent_e.data.shape[0] for xent_e in xent]) x_sum = [F.reshape(F.sum(xent_e)/xent_e.data.shape[0],(1,1)) for xent_e in xent] # print("x_sum:{}".format([x_e.data for x_e in x_sum])) xent = F.concat(x_sum,axis=0) # # print("xent1:{}".format(xent.data)) xent = F.max(xent,axis=1)/p.shape[0] # print("xent2:{}".format(xent.data)) if not weigh_flag: return F.sum(xent) # print("wei_arr:{}".format(weight_arr)) # print("wei_arr:{}".format(weight_arr.data.shape)) print("xent3:{}".format(xent.data.shape)) wxent= F.matmul(weight_arr,xent,transa=True) wxent = F.sum(F.sum(wxent,axis=0),axis=0) print("wxent:{}".format(wxent.data)) return wxent
def probs(self): return F.softmax(self.logits)
def attention_sum(encoding, query): alpha = F.softmax(F.batch_matmul(encoding, query, transb=True)) alpha, encoding = F.broadcast(alpha[:, :, :, None], encoding[:, :, None, :]) return F.sum(alpha * encoding, axis=1)
def _attend(self, p): weight = F.batch_matmul(self.source_hiddens, p) weight = F.where(self.mask, weight, self.minf) attention = F.softmax(weight) return attention
def _attend(self, p): p = self.xh(p) p = F.expand_dims(p, 1) p = F.broadcast_to(p, self.shape2) h = F.tanh(self.h + p) shape3 = (self.batchsize * self.src_len, self.dim_hid) h_reshaped = F.reshape(h, shape3) weight_reshaped = self.hw(h_reshaped) weight = F.reshape(weight_reshaped, (self.batchsize, self.src_len, 1)) weight = F.where(self.mask, weight, self.minf) attention = F.softmax(weight) return attention
def __call__(self, x): return functions.softmax(x, self.axis)
def main(): model = load_model(args.model_dir) assert model is not None vocab, vocab_inv = load_vocab(args.model_dir) assert vocab is not None assert vocab_inv is not None vocab_size = model.vocab_size with chainer.using_config("train", False): for n in range(args.num_generate): word_ids = np.arange(0, vocab_size, dtype=np.int32) token = ID_BOS x = np.asarray([[token]]).astype(np.int32) model.reset_state() while token != ID_EOS and x.shape[1] < args.max_sentence_length: u = model.forward_one_step(x) p = F.softmax(u).data[-1] token = np.random.choice(word_ids, size=1, p=p) x = np.append(x, np.asarray([token]).astype(np.int32), axis=1) sentence = [] for token in x[0]: word = vocab_inv[token] sentence.append(word) print(" ".join(sentence))
def check_forward(self, x_data, use_cudnn=True): x = chainer.Variable(x_data) y = functions.softmax(x, use_cudnn) self.assertEqual(y.data.dtype, self.dtype) y_expect = numpy.exp(self.x) y_roll = numpy.rollaxis(y_expect, 1, y_expect.ndim) for i in numpy.ndindex(y_roll.shape[:-1]): y_roll[i] /= y_roll[i].sum() gradient_check.assert_allclose( y_expect, y.data, **self.check_forward_options)
def forward(self): x = chainer.Variable(self.x) return functions.softmax(x, use_cudnn=self.use_cudnn)
def __call__(self, input_blob, test_mode=False): # explicit and very flexible DAG! ################################# data = input_blob[0] labels = input_blob[1] if(len(input_blob) >= 3): weights_classes = input_blob[2] else: weights_classes = chainer.Variable(cuda.cupy.ones((self.classes, 1), dtype='float32')) # ---- CONTRACTION BLOCKS ---- # blob_b0 = self.bnorm0(data) (blob_b1, indices_b1, size_b1) = F.max_pooling_2dIndices(self.bnorm1(F.relu(self.conv1(blob_b0)), test=test_mode), (2, 2), stride=(2,2), pad=(0, 0)) (blob_b2, indices_b2, size_b2) = F.max_pooling_2dIndices(self.bnorm2(F.relu(self.conv2(blob_b1)), test=test_mode), (2, 2), stride=(2,2), pad=(0, 0)) (blob_b3, indices_b3, size_b3) = F.max_pooling_2dIndices(self.bnorm3(F.relu(self.conv3(blob_b2)), test=test_mode), (2, 2), stride=(2,2), pad=(0, 0)) (blob_b4, indices_b4, size_b4) = F.max_pooling_2dIndices(self.bnorm4(F.relu(self.conv4(blob_b3)), test=test_mode), (2, 2), stride=(2,2), pad=(0, 0)) # ---- EXPANSION BLOCKS ---- # blob_b5 = self.bnorm5(F.relu(self.conv5(F.unpooling_2d(blob_b4, indices_b4, size_b4))), test=test_mode) blob_b6 = self.bnorm6(F.relu(self.conv6(F.unpooling_2d(blob_b5, indices_b3, size_b3))), test=test_mode) blob_b7 = self.bnorm7(F.relu(self.conv7(F.unpooling_2d(blob_b6, indices_b2, size_b2))), test=test_mode) blob_b8 = self.bnorm8(F.relu(self.conv8(F.unpooling_2d(blob_b7, indices_b1, size_b1))), test=test_mode) #ipdb.set_trace() # ---- SOFTMAX CLASSIFIER ---- # self.blob_class = self.classi(blob_b8) self.probs = F.softmax(self.blob_class) # ---- CROSS-ENTROPY LOSS ---- # #ipdb.set_trace() self.loss = F.weighted_cross_entropy(self.probs, labels, weights_classes, normalize=True) self.output_point = self.probs return self.loss