我们从Python开源项目中,提取了以下10个代码示例,用于说明如何使用data_utils.vectorize_data()。
def test(self): ckpt = tf.train.get_checkpoint_state(self.model_dir) if ckpt and ckpt.model_checkpoint_path: self.saver.restore(self.sess, ckpt.model_checkpoint_path) else: print("...no checkpoint found...") if self.isInteractive: self.interactive() else: testS, testQ, testA = vectorize_data( self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_test = len(testS) print("Testing Size", n_test) test_preds = self.batch_predict(testS, testQ, n_test) test_acc = metrics.accuracy_score(test_preds, testA) print("Testing Accuracy:", test_acc)
def test(self): ckpt = tf.train.get_checkpoint_state(self.model_dir) if ckpt and ckpt.model_checkpoint_path: self.saver.restore(self.sess, ckpt.model_checkpoint_path) else: print("...no checkpoint found...") if self.isInteractive: self.interactive() else: testP, testS, testQ, testA = vectorize_data(self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_test = len(testS) print("Testing Size", n_test) test_preds=self.batch_predict(testP,testS,testQ,n_test) test_acc = metrics.accuracy_score(test_preds, testA) print("Testing Accuracy:", test_acc) # print(testA) # for pred in test_preds: # print(pred, self.indx2candid[pred])
def test(self): ckpt = tf.train.get_checkpoint_state(self.model_dir) if ckpt and ckpt.model_checkpoint_path: self.saver.restore(self.sess, ckpt.model_checkpoint_path) else: print("...no checkpoint found...") if self.isInteractive: self.interactive() else: testS, testQ, testA = vectorize_data(self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_test = len(testS) print("Testing Size", n_test) test_preds=self.batch_predict(testS,testQ,n_test) test_acc = metrics.accuracy_score(test_preds, testA) print("Testing Accuracy:", test_acc) # print(testA) # for pred in test_preds: # print(pred, self.indx2candid[pred])
def load_data(self): # single babi task # TODO: refactor all this running elsewhere # task data train, test = load_task(data_dir, task_id) vocab = sorted(reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in train + test))) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) self.memory_size = 50 self.max_story_size = max(map(len, (s for s, _, _ in train + test))) self.mean_story_size = int(np.mean(map(len, (s for s, _, _ in train + test)))) self.sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in train + test))) self.query_size = max(map(len, (q for _, q, _ in train + test))) self.memory_size = min(self.memory_size, self.max_story_size) self.vocab_size = len(word_idx) + 1 # +1 for nil word self.sentence_size = max(self.query_size, self.sentence_size) # for the position print("Longest sentence length", self.sentence_size) print("Longest story length", self.max_story_size) print("Average story length", self.mean_story_size) # train/validation/test sets self.S, self.Q, self.A = vectorize_data(train, word_idx, self.sentence_size, self.memory_size) self.trainS, self.valS, self.trainQ, self.valQ, self.trainA, self.valA = cross_validation.train_test_split(self.S, self.Q, self.A, test_size=.1) # TODO: randomstate self.testS, self.testQ, self.testA = vectorize_data(test, word_idx, self.sentence_size, self.memory_size) print(self.testS[0]) print("Training set shape", self.trainS.shape) # params self.n_train = self.trainS.shape[0] self.n_test = self.testS.shape[0] self.n_val = self.valS.shape[0] print("Training Size", self.n_train) print("Validation Size", self.n_val) print("Testing Size", self.n_test)
def interactive(self): context = [] u = None r = None nid = 1 while True: line = raw_input('--> ').strip().lower() if line == 'exit': break if line == 'restart': context = [] nid = 1 print("clear memory") continue u = tokenize(line) data = [(context, u, -1)] s, q, a = vectorize_data( data, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) preds = self.model.predict(s, q) r = self.indx2candid[preds[0]] print(r) r = tokenize(r) u.append('$u') u.append('#' + str(nid)) r.append('$r') r.append('#' + str(nid)) context.append(u) context.append(r) nid += 1
def interactive(self): context=[['male', 'young', '$r', '#0']] # context = [] u=None r=None nid=1 while True: line=input('--> ').strip().lower() if line=='exit': break if line=='restart': context=[['female', 'young', '$r', '#0']] # context = [] nid=1 print("clear memory") continue u=tokenize(line) data=[(context,u,-1)] s,q,a=vectorize_data(data, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) preds=self.model.predict(s,q) r=self.indx2candid[preds[0]] print(r) r=tokenize(r) u.append('$u') u.append('#'+str(nid)) r.append('$r') r.append('#'+str(nid)) context.append(u) context.append(r) nid+=1
def train(self): trainS, trainQ, trainA = vectorize_data( self.trainData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) valS, valQ, valA = vectorize_data( self.valData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_train = len(trainS) n_val = len(valS) print("Training Size", n_train) print("Validation Size", n_val) tf.set_random_seed(self.random_state) batches = zip(range(0, n_train - self.batch_size, self.batch_size), range(self.batch_size, n_train, self.batch_size)) batches = [(start, end) for start, end in batches] best_validation_accuracy = 0 for t in range(1, self.epochs + 1): np.random.shuffle(batches) total_cost = 0.0 for start, end in batches: s = trainS[start:end] q = trainQ[start:end] a = trainA[start:end] cost_t = self.model.batch_fit(s, q, a) total_cost += cost_t if t % self.evaluation_interval == 0: train_preds = self.batch_predict(trainS, trainQ, n_train) val_preds = self.batch_predict(valS, valQ, n_val) train_acc = metrics.accuracy_score( np.array(train_preds), trainA) val_acc = metrics.accuracy_score(val_preds, valA) print('-----------------------') print('Epoch', t) print('Total Cost:', total_cost) print('Training Accuracy:', train_acc) print('Validation Accuracy:', val_acc) print('-----------------------') # write summary train_acc_summary = tf.summary.scalar( 'task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32)) val_acc_summary = tf.summary.scalar( 'task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32)) merged_summary = tf.summary.merge( [train_acc_summary, val_acc_summary]) summary_str = self.sess.run(merged_summary) self.summary_writer.add_summary(summary_str, t) self.summary_writer.flush() if val_acc > best_validation_accuracy: best_validation_accuracy = val_acc self.saver.save(self.sess, self.model_dir + 'model.ckpt', global_step=t)
def train(self): trainP, trainS, trainQ, trainA = vectorize_data(self.trainData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) valP, valS, valQ, valA = vectorize_data(self.valData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_train = len(trainS) n_val = len(valS) print("Training Size", n_train) print("Validation Size", n_val) tf.set_random_seed(self.random_state) batches = zip(range(0, n_train-self.batch_size, self.batch_size), range(self.batch_size, n_train, self.batch_size)) batches = [(start, end) for start, end in batches] best_validation_accuracy=0 for t in range(1, self.epochs+1): print('Epoch', t) np.random.shuffle(batches) total_cost = 0.0 for start, end in batches: p = trainP[start:end] s = trainS[start:end] q = trainQ[start:end] a = trainA[start:end] cost_t = self.model.batch_fit(p, s, q, a) total_cost += cost_t if t % self.evaluation_interval == 0: train_preds=self.batch_predict(trainP,trainS,trainQ,n_train) val_preds=self.batch_predict(valP,valS,valQ,n_val) train_acc = metrics.accuracy_score(np.array(train_preds), trainA) val_acc = metrics.accuracy_score(val_preds, valA) print('-----------------------') print('Epoch', t) print('Total Cost:', total_cost) print('Training Accuracy:', train_acc) print('Validation Accuracy:', val_acc) print('-----------------------') # write summary # train_acc_summary = tf.scalar_summary('task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32)) # val_acc_summary = tf.scalar_summary('task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32)) # merged_summary = tf.merge_summary([train_acc_summary, val_acc_summary]) train_acc_summary = tf.summary.scalar('task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32)) val_acc_summary = tf.summary.scalar('task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32)) merged_summary = tf.summary.merge([train_acc_summary, val_acc_summary]) summary_str = self.sess.run(merged_summary) self.summary_writer.add_summary(summary_str, t) self.summary_writer.flush() if val_acc > best_validation_accuracy: best_validation_accuracy=val_acc self.saver.save(self.sess,self.model_dir+'model.ckpt',global_step=t)
def train(self): trainS, trainQ, trainA = vectorize_data(self.trainData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) valS, valQ, valA = vectorize_data(self.valData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size) n_train = len(trainS) n_val = len(valS) print("Training Size",n_train) print("Validation Size", n_val) tf.set_random_seed(self.random_state) batches = zip(range(0, n_train-self.batch_size, self.batch_size), range(self.batch_size, n_train, self.batch_size)) batches = [(start, end) for start, end in batches] best_validation_accuracy=0 for t in range(1, self.epochs+1): print('Epoch', t) np.random.shuffle(batches) total_cost = 0.0 for start, end in batches: s = trainS[start:end] q = trainQ[start:end] a = trainA[start:end] cost_t = self.model.batch_fit(s, q, a) total_cost += cost_t if t % self.evaluation_interval == 0: train_preds=self.batch_predict(trainS,trainQ,n_train) val_preds=self.batch_predict(valS,valQ,n_val) train_acc = metrics.accuracy_score(np.array(train_preds), trainA) val_acc = metrics.accuracy_score(val_preds, valA) print('-----------------------') print('Epoch', t) print('Total Cost:', total_cost) print('Training Accuracy:', train_acc) print('Validation Accuracy:', val_acc) print('-----------------------') # write summary # train_acc_summary = tf.scalar_summary('task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32)) # val_acc_summary = tf.scalar_summary('task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32)) # merged_summary = tf.merge_summary([train_acc_summary, val_acc_summary]) train_acc_summary = tf.summary.scalar('task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32)) val_acc_summary = tf.summary.scalar('task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32)) merged_summary = tf.summary.merge([train_acc_summary, val_acc_summary]) summary_str = self.sess.run(merged_summary) self.summary_writer.add_summary(summary_str, t) self.summary_writer.flush() if val_acc > best_validation_accuracy: best_validation_accuracy=val_acc self.saver.save(self.sess,self.model_dir+'model.ckpt',global_step=t)