def _cascade_evaluation(self, X_test, y_test): """ Evaluate the accuracy of the cascade using X and y. :param X_test: np.array Array containing the test input samples. Must be of the same shape as training data. :param y_test: np.array Test target values. :return: float the cascade accuracy. """ casc_pred_prob = np.mean(self.cascade_forest(X_test), axis=0) casc_pred = np.argmax(casc_pred_prob, axis=1) casc_accuracy = accuracy_score(y_true=y_test, y_pred=casc_pred) print('Layer validation accuracy = {}'.format(casc_accuracy)) return casc_accuracy
def generate(self, src, sampled=False): dynet.renew_cg() embedding = self.embed_seq(src) encoding = self.encode_seq(embedding)[-1] w = dynet.parameter(self.decoder_w) b = dynet.parameter(self.decoder_b) s = self.dec_lstm.initial_state().add_input(encoding) out = [] for _ in range(5*len(src)): out_vector = dynet.affine_transform([b, w, s.output()]) probs = dynet.softmax(out_vector) selection = np.argmax(probs.value()) out.append(self.tgt_vocab[selection]) if out[-1].s == self.tgt_vocab.END_TOK: break embed_vector = self.tgt_lookup[selection] s = s.add_input(embed_vector) return out
def gen_sent_on_topic(idxvocab, vocabxid, start_symbol, end_symbol, cf): output = codecs.open(args.gen_sent_on_topic, "w", "utf-8") topics, entropy = tm.get_topics(sess, topn=topn) with tf.variable_scope("model", reuse=True, initializer=initializer): mgen = LM(is_training=False, vocab_size=len(idxvocab), batch_size=1, num_steps=1, config=cf, \ reuse_conv_variables=True) for t in range(cf.topic_number): output.write("\n" + "="*100 + "\n") output.write("Topic " + str(t) + ":\n") output.write(" ".join([ idxvocab[item] for item in topics[t] ]) + "\n\n") output.write("\nSentence generation (greedy; argmax):" + "\n") s = mgen.generate_on_topic(sess, t, vocabxid[start_symbol], 0, cf.lm_sent_len+10, vocabxid[end_symbol]) output.write("[0] " + " ".join([ idxvocab[item] for item in s ]) + "\n") for temp in gen_temps: output.write("\nSentence generation (random; temperature = " + str(temp) + "):\n") for i in xrange(gen_num): s = mgen.generate_on_topic(sess, t, vocabxid[start_symbol], temp, cf.lm_sent_len+10, \ vocabxid[end_symbol]) output.write("[" + str(i) + "] " + " ".join([ idxvocab[item] for item in s ]) + "\n")
def write_predictions(self, inputs): ''' Outputs predictions in a file named <model_name_prefix>.predictions. ''' predictions = numpy.argmax(self.model.predict(inputs), axis=1) test_output_file = open("%s.predictions" % self.model_name_prefix, "w") for input_indices, prediction in zip(inputs, predictions): # The predictions are indices of words in padded sentences. We need to readjust them. padding_length = 0 for index in input_indices: if numpy.all(index == 0): padding_length += 1 else: break prediction = prediction - padding_length + 1 # +1 because the indices start at 1. print >>test_output_file, prediction
def _infer_multiplets_from_observed(n_obs_multiplets, n_cells0, n_cells1): """ Given a number of observed multiplets and cell counts for two transcriptomes, infer the total number of multiplets (observed + unobserved) """ if n_cells0 == 0 or n_cells1 == 0: return 0 # Prior probability of a doublet given counts for each cell type (ignore N_cells > 2) p_obs_multiplet = 2*(float(n_cells0)/float(n_cells0+n_cells1))*(float(n_cells1)/float(n_cells0+n_cells1)) # Brute force MLE of binomial n n_mle = 0 if n_obs_multiplets > 0: likelihood = scipy.stats.binom.pmf(n_obs_multiplets, xrange(0, n_cells0 + n_cells1), p_obs_multiplet) n_mle = np.argmax(likelihood) return n_mle
def has_tomatoes(self, im_path): # load the image im = Image.open(im_path) im = np.asarray(im, dtype=np.float32) im = self.prepare_image(im) # launch an inference with the image pred = self.sess.run( self.output_logits, feed_dict={ self.img_feed: im.eval( session=self.sess)}) if np.argmax(pred) == 0: print("NOT a tomato ! (confidence : ", pred[0, 0], "%)") else: print("We have a tomato ! (confidence : ", pred[0, 1], "%)")
def play(self, nb_rounds): img_saver = save_image() img_saver.next() game_cnt = it.count(1) for i in xrange(nb_rounds): game = self.game(width=self.width, height=self.height) screen, _ = game.next() img_saver.send(screen) frame_cnt = it.count() try: state = np.asarray([screen] * self.nb_frames) while True: frame_cnt.next() act_idx = np.argmax( self.model.predict(state[np.newaxis]), axis=-1)[0] screen, _ = game.send(self.actions[act_idx]) state = np.roll(state, 1, axis=0) state[0] = screen img_saver.send(screen) except StopIteration: print 'Saved %4i frames for game %3i' % ( frame_cnt.next(), game_cnt.next()) img_saver.close()
def test(self, input_path, output_path): if not self.load()[0]: raise Exception("No model is found, please train first") mean, std = self.sess.run([self.mean, self.std]) images = np.empty((1, self.im_size[0], self.im_size[1], self.im_size[2], 1), dtype=np.float32) #labels = np.empty((1, self.im_size[0], self.im_size[1], self.im_size[2], self.nclass), dtype=np.float32) for f in input_path: images[0, ..., 0], read_info = read_testing_inputs(f, self.roi[0], self.im_size, output_path) probs = self.sess.run(self.probs, feed_dict = { self.images: (images - mean) / std, self.is_training: True, self.keep_prob: 1 }) #print(self.roi[1] + os.path.basename(f) + ":" + str(dice)) output_file = os.path.join(output_path, self.roi[1] + '_' + os.path.basename(f)) f_h5 = h5py.File(output_file, 'w') if self.roi[0] < 0: f_h5['predictions'] = restore_labels(np.argmax(probs[0], 3), self.roi[0], read_info) else: f_h5['probs'] = restore_labels(probs[0, ..., 1], self.roi[0], read_info) f_h5.close()
def __init__(self, channels=3, n_class=2, cost="cross_entropy", cost_kwargs={}, **kwargs): tf.reset_default_graph() self.n_class = n_class self.summaries = kwargs.get("summaries", True) self.x = tf.placeholder("float", shape=[None, None, None, channels]) self.y = tf.placeholder("float", shape=[None, None, None, n_class]) self.keep_prob = tf.placeholder(tf.float32) #dropout (keep probability) logits, self.variables, self.offset = create_conv_net(self.x, self.keep_prob, channels, n_class, **kwargs) self.cost = self._get_cost(logits, cost, cost_kwargs) self.gradients_node = tf.gradients(self.cost, self.variables) self.cross_entropy = tf.reduce_mean(cross_entropy(tf.reshape(self.y, [-1, n_class]), tf.reshape(pixel_wise_softmax_2(logits), [-1, n_class]))) self.predicter = pixel_wise_softmax_2(logits) self.correct_pred = tf.equal(tf.argmax(self.predicter, 3), tf.argmax(self.y, 3)) self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
def validate(model): dice_coefs = [] for image_path, label_path in zip(df_val["image"], df_val["label"]): image = load_nifti(image_path) label = load_nifti(label_path) centers = [[], [], []] for img_len, len_out, center, n_tile in zip(image.shape, args.output_shape, centers, args.n_tiles): assert img_len < len_out * n_tile, "{} must be smaller than {} x {}".format(img_len, len_out, n_tile) stride = int((img_len - len_out) / (n_tile - 1)) center.append(len_out / 2) for i in range(n_tile - 2): center.append(center[-1] + stride) center.append(img_len - len_out / 2) output = np.zeros((dataset["n_classes"],) + image.shape[:-1]) for x, y, z in itertools.product(*centers): patch = crop_patch(image, [x, y, z], args.input_shape) patch = np.expand_dims(patch, 0) patch = xp.asarray(patch) slices_out = [slice(center - len_out / 2, center + len_out / 2) for len_out, center in zip(args.output_shape, [x, y, z])] slices_in = [slice((len_in - len_out) / 2, len_in - (len_in - len_out) / 2) for len_out, len_in, in zip(args.output_shape, args.input_shape)] output[slice(None), slices_out[0], slices_out[1], slices_out[2]] += chainer.cuda.to_cpu(model(patch).data[0, slice(None), slices_in[0], slices_in[1], slices_in[2]]) y = np.argmax(output, axis=0).astype(np.int32) dice_coefs.append(dice_coefficients(y, label, labels=range(dataset["n_classes"]))) dice_coefs = np.array(dice_coefs) return np.mean(dice_coefs, axis=0)
def correct_for_multipol(pol): """ Inputs are: pol, Suspected Multipolygon Takes the main polygon of a multipolygon. Typically used to solve the problem of non-overlapping polygons being substracted. """ pol_type = pol.geom_type if pol_type == 'MultiPolygon': area = np.zeros(len(pol.geoms)) for k, p in enumerate(pol.geoms): area[k] = p.area max_area_id = np.argmax(area) pol = pol.geoms[max_area_id] return pol
def eval(flags): name = flags.pred_path yp = pd.read_csv(name) classes = len([i for i in yp.columns.values if 'class' in i]) yp = yp[['class%d'%i for i in range(1,classes+1)]].values myDB = personalDB(flags,name="full") if "stage1" in name: y=myDB.data['test_variants_filter']['Class']-1 else: myDB.get_split() va = myDB.split[flags.fold][1] y = np.argmax(myDB.y[va],axis=1) if np.max(y)>classes: y = np.argmax(to4c(onehot_encode(y)),axis=1) score = cross_entropy(y,yp) print(name,score,'\n')
def eval(name,clip=False,bar=0.9): base = pd.read_csv('../input/stage1_solution_filtered.csv') base['Class'] = np.argmax(base[['class%d'%i for i in range(1,10)]].values,axis=1) sub = pd.read_csv(name) #sub = pd.merge(sub,base[['ID','Class']],on="ID",how='right') #print(sub.head()) y = base['Class'].values yp = sub[['class%d'%i for i in range(1,10)]].values if clip: yp = np.clip(yp,(1.0-bar)/8,bar) yp = yp/np.sum(yp,axis=1).reshape([yp.shape[0],1]) print(name,cross_entropy(y,yp),multiclass_log_loss(y,yp)) for i in range(9): y1 = y[y==i] yp1 = yp[y==i] print(i,y1.shape,cross_entropy(y1,yp1),multiclass_log_loss(y1,yp1))
def post(self): if self.flags.task == "test_cnn_stage1": docs = self.DB.clean_doc['test_text_filter'] elif self.flags.task == "test_cnn_stage2": docs = self.DB.clean_doc['stage2_test_text'] else: self.mDB.get_split() docs = self.mDB.split[self.flags.fold][1] nrows = len(docs) p = np.zeros([nrows,9]) for i in range(self.flags.epochs): if i==0: skiprows=None else: skiprows = nrows*i p = p + (pd.read_csv(self.flags.pred_path,header=None,nrows=nrows,skiprows=skiprows).values) p = p/self.flags.epochs if '_cv' in self.flags.task: from utils.np_utils.utils import cross_entropy y = np.argmax(self.mDB.y,axis=1) print("cross entropy", cross_entropy(y[self.mDB.split[self.flags.fold][1]],p)) s = pd.DataFrame(p,columns=['class%d'%i for i in range(1,10)]) s['ID'] = np.arange(nrows)+1 s.to_csv(self.flags.pred_path.replace(".csv","_sub.csv"),index=False,float_format="%.5f")
def cv(flags): X,y,Xt,yt,idx = build_feature(flags) params['verbose_eval'] = 10 if '4c' in flags.task: y = np.argmax(to4c(onehot_encode(y)),axis=1) yt = np.argmax(to4c(onehot_encode(yt)),axis=1) params['num_class'] = np.max(y)+1 model = xgb_model(params) print(X.shape,Xt.shape,y.shape,yt.shape) model.fit(X,y,Xt,yt,print_fscore=False) yp = model.predict(Xt) s = pd.DataFrame(yp,columns=['class%d'%i for i in range(1,yp.shape[1]+1)]) s['real'] = np.array(yt) s['ID'] = idx path = flags.data_path fold = flags.fold s.to_csv('%s/cv_%d.csv'%(path,fold),index=False) from utils.np_utils.utils import cross_entropy print(cross_entropy(yt,yp))
def sub(flags): X,y,Xt,_,_ = build_feature(flags) if '4c' in flags.task: y = np.argmax(to4c(onehot_encode(y)),axis=1) print(X.shape,Xt.shape,y.shape) params['num_class'] = np.max(y)+1 params['num_round'] = 90 params["early_stopping_rounds"] = None params['verbose_eval'] = 100 yp = np.zeros([Xt.shape[0],9]) m = 5 if 'bag' in flags.task else 1 for i in range(m): params['seed'] = i*9 model = xgb_model(params) model.fit(X,y,print_fscore=False) tmp = model.predict(Xt) print(i,np.mean(tmp)) yp += tmp yp/=m s = pd.DataFrame(yp,columns=["class%d"%i for i in range(1,yp.shape[1]+1)]) s['ID'] = 1+np.arange(yp.shape[0]) s.to_csv(flags.pred_path,index=False)
def label_test_file(self): outfile = open("pred_vld.txt","w") prep_alfa = lambda X: pad_sequences(sequences=self.indexer.texts_to_sequences(X), maxlen=self.SentMaxLen) vld = json.loads(open('validation.json', 'r').read()) for prem, hypo, label in zip(vld[0], vld[1], vld[2]): prem_pad, hypo_pad = prep_alfa([prem]), prep_alfa([hypo]) ans = np.reshape(self.model.predict(x=[prem_pad, hypo_pad], batch_size = 1), -1) # PREDICTION if np.argmax(ans) != label: outfile.write(prem + "\n" + hypo + "\n") outfile.write("Truth: " + self.rLabels[label] + "\n") outfile.write('Contradiction \t{:.1f}%\n'.format(float(ans[0]) * 100) + 'Neutral \t\t{:.1f}%\n'.format(float(ans[1]) * 100) + 'Entailment \t{:.1f}%\n'.format(float(ans[2]) * 100)) outfile.write("-"*15 + "\n") outfile.close()
def print_result(input,model,i2wi,maxlen_input): ans_partial = np.zeros((1,maxlen_input)) ans_partial[0, 0] = BOS # the index of the symbol BOS (begin of sentence) for k in range(maxlen_input - 1): ye = model.predict([input, ans_partial]) mp = np.argmax(ye) #print(mp,ans_partial) #ans_partial[0, 0:-1] = ans_partial[0, 1:] ans_partial[0, k+1] = mp text = [] for k in ans_partial[0]: k = k.astype(int) w = i2w[k] text.append(w) return(" ".join(text)) # Función principal (interfaz con línea de comandos)
def fit(self,X,verbose=False): ss =[] labels_list = [] for i in xrange(self.n_repeat): od = self._create_detector(*self.ad_parms0, **self.ad_parms1) labels = self._train_clf(od, X, self.n_clusters,verbose=verbose) ss += [od.loglikelihood(X,labels)] labels_list += [labels] #print ss, labels self._detector_fit(X, np.array(labels_list[np.argmax(ss)])) self.clf_ = SklearnClassifier.clf(self) return self
def sample(self, sess, chars, vocab, num, prime, temperature): state = self.cell.zero_state(1, tf.float32).eval() for char in prime[:-1]: x = np.zeros((1, 1)) x[0, 0] = vocab[char] feed = {self.input_data: x, self.initial_state: state} [state] = sess.run([self.final_state], feed) def weighted_pick(a): a = a.astype(np.float64) a = a.clip(min=1e-20) a = np.log(a) / temperature a = np.exp(a) / (np.sum(np.exp(a))) return np.argmax(np.random.multinomial(1, a, 1)) char = prime[-1] for n in range(num): x = np.zeros((1, 1)) x[0, 0] = vocab[char] feed = {self.input_data: x, self.initial_state: state} [probs, state] = sess.run([self.probs, self.final_state], feed) p = probs[0] sample = weighted_pick(p) char = chars[sample] yield char
def test_data_ann_rnn(feats, target, groups, ann, rnn): """ mode = 'scores' or 'preds' take two ready trained models (cnn+rnn) test on input data and return acc+f1 """ if target.ndim==2: target = np.argmax(target,1) cnn_pred = ann.predict_classes(feats, 1024, verbose=0) cnn_acc = accuracy_score(target, cnn_pred) cnn_f1 = f1_score(target, cnn_pred, average='macro') seqlen = rnn.input_shape[1] features_seq, target_seq, groups_seq = tools.to_sequences(feats, target, seqlen=seqlen, groups=groups) new_targ_seq = np.roll(target_seq, 4) rnn_pred = rnn.predict_classes(features_seq, 1024, verbose=0) rnn_acc = accuracy_score(new_targ_seq, rnn_pred) rnn_f1 = f1_score(new_targ_seq,rnn_pred, average='macro') confmat = confusion_matrix(new_targ_seq, rnn_pred) return [cnn_acc, cnn_f1, rnn_acc, rnn_f1, confmat, (rnn_pred, target_seq, groups_seq)]
def reset(self): """ Resets the state of the generator""" self.step = 0 Y = np.argmax(self.Y,1) labels = np.unique(Y) idx = [] smallest = len(Y) for i,label in enumerate(labels): where = np.where(Y==label)[0] if smallest > len(where): self.slabel = i smallest = len(where) idx.append(where) self.idx = idx self.labels = labels self.n_per_class = int(self.batch_size // len(labels)) self.n_batches = int(np.ceil((smallest//self.n_per_class)))+1 self.update_probabilities()
def __init__(self, X, Y, batch_size,cropsize=0, truncate=False, sequential=False, random=True, val=False, class_weights=None): assert len(X) == len(Y), 'X and Y must be the same length {}!={}'.format(len(X),len(Y)) if sequential: print('Using sequential mode') print ('starting normal generator') self.X = X self.Y = Y self.rnd_idx = np.arange(len(Y)) self.Y_last_epoch = [] self.val = val self.step = 0 self.i = 0 self.cropsize=cropsize self.truncate = truncate self.random = False if sequential or val else random self.batch_size = int(batch_size) self.sequential = sequential self.c_weights = class_weights if class_weights else dict(zip(np.unique(np.argmax(Y,1)),np.ones(len(np.argmax(Y,1))))) assert set(np.argmax(Y,1)) == set([int(x) for x in self.c_weights.keys()]), 'not all labels in class weights' self.n_batches = int(len(X)//batch_size if truncate else np.ceil(len(X)/batch_size)) if self.random: self.randomize()
def next_normal(self): x_batch = self.X[self.step*self.batch_size:(self.step+1)*self.batch_size] y_batch = self.Y[self.step*self.batch_size:(self.step+1)*self.batch_size] diff = len(x_batch[0]) - self.cropsize if self.cropsize!=0 and not self.val: start = np.random.choice(np.arange(0,diff+5,5), len(x_batch)) x_batch = [x[start[i]:start[i]+self.cropsize,:] for i,x in enumerate(x_batch)] elif self.cropsize !=0 and self.val: x_batch = [x[diff//2:diff//2+self.cropsize] for i,x in enumerate(x_batch)] x_batch = np.array(x_batch, dtype=np.float32) y_batch = np.array(y_batch, dtype=np.int32) self.step+=1 if self.val: self.Y_last_epoch.extend(y_batch) return x_batch # for validation generator, save the new y_labels else: weights = np.ones(len(y_batch)) for t in np.unique(np.argmax(y_batch,1)): weights[np.argmax(y_batch,1)==t] = self.c_weights[t] return (x_batch,y_batch)
def get_batch_idx(self, idx, **kwargs): if self.mode == 'train': new_idx = [] # self.log.info('Label IDX: {}'.format(idx)) if self.stats_provider is None: label_ids = [ii % self._real_size for ii in idx] else: # print idx, self.stats_provider.get_size() stats_batch = self.stats_provider.get_batch_idx(idx) label_ids = [] for ii in xrange(len(idx)): label_ids.append(np.argmax(stats_batch['y_gt'][ii])) for ii in label_ids: data_group = self.data_provider.label_idx[ii] num_ids = len(data_group) kk = int(np.floor(self.rnd.uniform(0, num_ids))) new_idx.append(data_group[kk]) else: new_idx = idx return self.data_provider.get_batch_idx(new_idx)
def listen(self, results): score_out = results['score_out'] y_gt = results['y_gt'] sort_idx = np.argsort(score_out, axis=-1) idx_gt = np.argmax(y_gt, axis=-1) correct = 0 count = 0 for kk, ii in enumerate(idx_gt): sort_idx_ = sort_idx[kk][::-1] for jj in sort_idx_[:self.top_k]: if ii == jj: correct += 1 break count += 1 # self.log.info('Correct {}/{}'.format(correct, count)) self.correct += correct self.count += count self.step = int(results['step']) # self.log.info('Step {}'.format(self.step)) pass
def viterbi_decode(score, transition_params): """ Adapted from Tensorflow implementation. Decode the highest scoring sequence of tags outside of TensorFlow. This should only be used at test time. Args: score: A [seq_len, num_tags] matrix of unary potentials. transition_params: A [num_tags, num_tags] matrix of binary potentials. Returns: viterbi: A [seq_len] list of integers containing the highest scoring tag indicies. viterbi_score: A float containing the score for the Viterbi sequence. """ trellis = numpy.zeros_like(score) backpointers = numpy.zeros_like(score, dtype=numpy.int32) trellis[0] = score[0] for t in range(1, score.shape[0]): v = numpy.expand_dims(trellis[t - 1], 1) + transition_params trellis[t] = score[t] + numpy.max(v, 0) backpointers[t] = numpy.argmax(v, 0) viterbi = [numpy.argmax(trellis[-1])] for bp in reversed(backpointers[1:]): viterbi.append(bp[viterbi[-1]]) viterbi.reverse() viterbi_score = numpy.max(trellis[-1]) return viterbi, viterbi_score
def add(self, outputs, targets): outputs = to_numpy(outputs) targets = to_numpy(targets) if np.ndim(targets) == 2: targets = np.argmax(targets, 1) assert np.ndim(outputs) == 2, 'wrong output size (2D expected)' assert np.ndim(targets) == 1, 'wrong target size (1D or 2D expected)' assert targets.shape[0] == outputs.shape[0], 'number of outputs and targets do not match' top_k = self.top_k max_k = int(top_k[-1]) predict = torch.from_numpy(outputs).topk(max_k, 1, True, True)[1].numpy() correct = (predict == targets[:, np.newaxis].repeat(predict.shape[1], 1)) self.size += targets.shape[0] for k in top_k: self.corrects[k] += correct[:, :k].sum()
def __init__( self, normalization_parameters, parameters, ): self._quantile_states = collections.deque( maxlen=parameters.action_budget.window_size ) self._quantile = 100 - parameters.action_budget.action_limit self.quantile_value = 0 self._limited_action = np.argmax( np.array(parameters.actions) == parameters.action_budget.limited_action ) self._discount_factor = parameters.rl.gamma self._quantile_update_rate = \ parameters.action_budget.quantile_update_rate self._quantile_update_frequency = \ parameters.action_budget.quantile_update_frequency self._update_counter = 0 super(self.__class__, self).__init__(normalization_parameters, parameters) self._max_q = parameters.rl.maxq_learning
def _build_policy(env, predictor, epsilon): eye = np.eye(env.num_states) q_values = predictor.predict( {str(i): eye[i] for i in range(env.num_states)} ) policy_vector = [ env.ACTIONS[np.argmax([q_values[action][i] for action in env.ACTIONS])] for i in range(env.num_states) ] def policy(state) -> str: if np.random.random() < epsilon: return np.random.choice(env.ACTIONS) else: return policy_vector[state] return policy
def choose_action(d, c, q_table): global epsilon state_actions = q_table[d][c][:] # random move or no data recorded for this state yet if (np.random.uniform() < epsilon) or (np.sum(state_actions) == 0): action_chose = np.random.randint(n_actions) # decrease random moves over time to a minimum of 10% if epsilon > 0.1: epsilon *= 0.9 else: action_chose = state_actions.argmax() return action_chose
def get_predicted_antecedents(self, antecedents, antecedent_scores): """ Forms a list of predicted antecedent labels Args: antecedents: [] get from C++ function antecedent_scores: [num_mentions, max_ant + 1] output of fully-connected network that compute antecedent_scores Returns: a list of predicted antecedent labels """ predicted_antecedents = [] for i, index in enumerate(np.argmax(antecedent_scores, axis=1) - 1): if index < 0: predicted_antecedents.append(-1) else: predicted_antecedents.append(antecedents[i, index]) return predicted_antecedents
def multiclass_classifier(X_train, Y_train, X_val, Y_val, X_test, Y_test, nb_epoch=200, batch_size=10, seed=7): clf = softmax_network(X_train.shape[1], Y_train.shape[1]) clf.fit(X_train, Y_train, epochs=nb_epoch, batch_size=batch_size, shuffle=True, validation_data=(X_val, Y_val), callbacks=[ ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.01), EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, verbose=0, mode='auto'), ] ) acc = clf.test_on_batch(X_test, Y_test)[1] # confusion matrix and precision-recall true = np.argmax(Y_test,axis=1) pred = np.argmax(clf.predict(X_test), axis=1) print confusion_matrix(true, pred) print classification_report(true, pred) return acc
def adjust_prediction(self, probability, image): crf = dcrf.DenseCRF(np.prod(probability.shape), 2) # crf = dcrf.DenseCRF(np.prod(probability.shape), 1) binary_prob = np.stack((1 - probability, probability), axis=0) unary = unary_from_softmax(binary_prob) # unary = unary_from_softmax(np.expand_dims(probability, axis=0)) crf.setUnaryEnergy(unary) # per dimension scale factors sdims = [self.sdims] * 3 smooth = create_pairwise_gaussian(sdims=sdims, shape=probability.shape) crf.addPairwiseEnergy(smooth, compat=2) if self.schan: # per channel scale factors schan = [self.schan] * 6 appearance = create_pairwise_bilateral(sdims=sdims, schan=schan, img=image, chdim=3) crf.addPairwiseEnergy(appearance, compat=2) result = crf.inference(self.iter) crf_prediction = np.argmax(result, axis=0).reshape(probability.shape).astype(np.float32) return crf_prediction
def evaluate_performance(ladder, valid_loader, e, agg_cost_scaled, agg_supervised_cost_scaled, agg_unsupervised_cost_scaled, args): correct = 0. total = 0. for batch_idx, (data, target) in enumerate(valid_loader): if args.cuda: data = data.cuda() data, target = Variable(data), Variable(target) output = ladder.forward_encoders_clean(data) # TODO: Do away with the below hack for GPU tensors. if args.cuda: output = output.cpu() target = target.cpu() output = output.data.numpy() preds = np.argmax(output, axis=1) target = target.data.numpy() correct += np.sum(target == preds) total += target.shape[0] print("Epoch:", e + 1, "\t", "Total Cost:", "{:.4f}".format(agg_cost_scaled), "\t", "Supervised Cost:", "{:.4f}".format(agg_supervised_cost_scaled), "\t", "Unsupervised Cost:", "{:.4f}".format(agg_unsupervised_cost_scaled), "\t", "Validation Accuracy:", correct / total)
def extract_digits(self, image): """ Extract digits from a binary image representing a sudoku :param image: binary image/sudoku :return: array of digits and their probabilities """ prob = np.zeros(4, dtype=np.float32) digits = np.zeros((4, 9, 9), dtype=object) for i in range(4): labeled, features = label(image, structure=CROSS) objs = find_objects(labeled) for obj in objs: roi = image[obj] # center of bounding box cy = (obj[0].stop + obj[0].start) / 2 cx = (obj[1].stop + obj[1].start) / 2 dists = cdist([[cy, cx]], CENTROIDS, 'euclidean') pos = np.argmin(dists) cy, cx = pos % 9, pos / 9 # 28x28 image, center relative to sudoku prediction = self.classifier.classify(morph(roi)) if digits[i, cy, cx] is 0: # Newly found digit digits[i, cy, cx] = prediction prob[i] += prediction[0, 0] elif prediction[0, 0] > digits[i, cy, cx][0, 0]: # Overlapping! (noise), choose the most probable prediction prob[i] -= digits[i, cy, cx][0, 0] digits[i, cy, cx] = prediction prob[i] += prediction[0, 0] image = np.rot90(image) logging.info(prob) return digits[np.argmax(prob)]
def predict(self, data): predictions = [] for name in data: x = np.array([self.__parse_name(name)]) prediction = self.__model.predict([x]) predictions.append(self.__indexes_classes[np.argmax(prediction)]) return predictions
def binarize_predictions(array, task='binary.classification'): ''' Turn predictions into decisions {0,1} by selecting the class with largest score for multiclass problems and thresholding at 0.5 for other cases.''' # add a very small random value as tie breaker (a bit bad because this changes the score every time) # so to make sure we get the same result every time, we seed it #eps = 1e-15 #np.random.seed(sum(array.shape)) #array = array + eps*np.random.rand(array.shape[0],array.shape[1]) bin_array = np.zeros(array.shape) if (task != 'multiclass.classification') or (array.shape[1]==1): bin_array[array>=0.5] = 1 else: sample_num=array.shape[0] for i in range(sample_num): j = np.argmax(array[i,:]) bin_array[i,j] = 1 return bin_array
def run_epoch_doc(docs, labels, tags, tm, pad_id, cf): batches = int(math.ceil(float(len(docs))/cf.batch_size)) accs = [] for b in xrange(batches): d, y, m, t, num_docs = get_batch_doc(docs, labels, tags, b, cf.doc_len, cf.tag_len, cf.batch_size, pad_id) prob = sess.run(tm.sup_probs, {tm.doc:d, tm.label:y, tm.sup_mask: m, tm.tag: t}) pred = np.argmax(prob, axis=1) accs.extend(pred[:num_docs] == y[:num_docs]) print "\ntest classification accuracy = %.3f" % np.mean(accs)
def gen_sent_on_doc(docs, tags, idxvocab, vocabxid, start_symbol, end_symbol, cf): topics, _ = tm.get_topics(sess, topn=topn) topics = [ " ".join([idxvocab[w] for w in t]) for t in topics ] doc_text = [ item.replace("\t", "\n") for item in codecs.open(args.input_doc, "r", "utf-8").readlines() ] output = codecs.open(args.gen_sent_on_doc, "w", "utf-8") with tf.variable_scope("model", reuse=True, initializer=initializer): mgen = LM(is_training=False, vocab_size=len(idxvocab), batch_size=1, num_steps=1, config=cf, \ reuse_conv_variables=True) for d in range(len(docs)): output.write("\n" + "="*100 + "\n") output.write("Doc " + str(d) +":\n") output.write(doc_text[d]) doc, _, _, t, _ = get_batch_doc(docs, None, tags, d, cf.doc_len, cf.tag_len, 1, vocabxid[pad_symbol]) best_topics, best_words = mgen.get_topics_on_doc(sess, doc, t, topn) output.write("\nRepresentative topics:\n") output.write("\n".join([ ("[%.3f] %s: %s" % (item[1],str(item[0]).zfill(3),topics[item[0]])) \ for item in best_topics ]) + "\n") output.write("\nRepresentative words:\n") output.write("\n".join([ ("[%.3f] %s" % (item[1], idxvocab[item[0]])) for item in best_words ]) + "\n") output.write("\nSentence generation (greedy; argmax):" + "\n") s = mgen.generate_on_doc(sess, doc, t, vocabxid[start_symbol], 0, cf.lm_sent_len+10, vocabxid[end_symbol]) output.write("[0] " + " ".join([ idxvocab[item] for item in s ]) + "\n") for temp in gen_temps: output.write("\nSentence generation (random; temperature = " + str(temp) + "):\n") for i in xrange(gen_num): s = mgen.generate_on_doc(sess, doc, t, vocabxid[start_symbol], temp, cf.lm_sent_len+10, \ vocabxid[end_symbol]) output.write("[" + str(i) + "] " + " ".join([ idxvocab[item] for item in s ]) + "\n") ###### #main# ###### #load the vocabulary
def sample(self, probs, temperature): if temperature == 0: return np.argmax(probs) probs = probs.astype(np.float64) #convert to float64 for higher precision probs = np.log(probs) / temperature probs = np.exp(probs) / math.fsum(np.exp(probs)) return np.argmax(np.random.multinomial(1, probs, 1)) #generate a sentence given conv_hidden
def write_predictions(self, inputs): rev_label_map = {j: i for (i, j) in self.label_map.items()} predictions = numpy.argmax(self.model.predict(inputs), axis=1) test_output_file = open("%s.predictions" % self.model_name_prefix, "w") for prediction in predictions: print >>test_output_file, rev_label_map[prediction + 1]
def test(self, inputs, targets): if not self.model: raise RuntimeError, "Model not trained!" metrics = self.model.evaluate(inputs, targets) print >>sys.stderr, "Test accuracy: %.4f" % (metrics[1]) # The first metric is loss. predictions = numpy.argmax(self.model.predict(inputs), axis=1) rev_label_map = {ind: label for label, ind in self.label_map.items()} predicted_labels = [rev_label_map[pred] for pred in predictions] return predicted_labels
def calculate_assignments(self, assignment_weights): clusters = np.argmax(assignment_weights, axis=1) return clusters
def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10): assert(embeddings1.shape[0] == embeddings2.shape[0]) assert(embeddings1.shape[1] == embeddings2.shape[1]) nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) nrof_thresholds = len(thresholds) k_fold = KFold(n_splits=nrof_folds, shuffle=False) tprs = np.zeros((nrof_folds,nrof_thresholds)) fprs = np.zeros((nrof_folds,nrof_thresholds)) accuracy = np.zeros((nrof_folds)) diff = np.subtract(embeddings1, embeddings2) dist = np.sum(np.square(diff),1) indices = np.arange(nrof_pairs) for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): # Find the best threshold for the fold acc_train = np.zeros((nrof_thresholds)) for threshold_idx, threshold in enumerate(thresholds): _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) best_threshold_index = np.argmax(acc_train) for threshold_idx, threshold in enumerate(thresholds): tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) tpr = np.mean(tprs,0) fpr = np.mean(fprs,0) return tpr, fpr, accuracy
def length_histogram(fqin, name): ''' Create a histogram, and return the bin edges of the bin containing the most reads ''' logging.info("Creating length histogram to find bin with most reads.") lengths = get_lengths(fqin) plt.hist(lengths, bins='auto') plt.savefig(name, format='png', dpi=100) plt.close("all") hist, bin_edges = np.histogram(lengths, bins='auto') maxindex = np.argmax(hist) return (bin_edges[maxindex], bin_edges[maxindex + 1])
def test_estimate_tree(num_edges): set_random_seed(0) E = num_edges V = 1 + E grid = make_complete_graph(V) K = grid.shape[1] edge_logits = np.random.random([K]) - 0.5 edges = estimate_tree(grid, edge_logits) # Check size. assert len(edges) == E for v in range(V): assert any(v in edge for edge in edges) # Check optimality. edges = tuple(edges) if V < len(TREE_GENERATORS): all_trees = get_spanning_trees(V) assert edges in all_trees all_trees = list(all_trees) logits = [] for tree in all_trees: logits.append( sum(edge_logits[find_complete_edge(u, v)] for (u, v) in tree)) expected = all_trees[np.argmax(logits)] assert edges == expected