def fit(self, X, y=None): """ This should fit classifier. All the "work" should be done here. Note: assert is not a good choice here and you should rather use try/except blog with exceptions. This is just for short syntax. """ # Generate batches batches = batch_iter( list(zip(X, y)), self.FLAGS.batch_size, self.FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) feed_dict = { self.cnn.input_x: x_batch, self.cnn.input_y: y_batch, self.cnn.dropout_keep_prob: self.FLAGS.dropout_keep_prob } _, loss, accuracy = self.sess.run( [self.optimizer, self.cnn.loss, self.cnn.accuracy], feed_dict) # print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
def batch_dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ batches = data_helpers.batch_iter( zip(x_batch, y_batch), FLAGS.batch_size, 1) for batch in batches: x_batch, y_batch = zip(*batch) feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches
def dev_step(x_dev, y_dev): """ Evaluates model on a dev set """ batches = data_helpers.batch_iter( list(zip(x_dev, y_dev)), FLAGS.batch_size, 1) loss_sum = 0 accuracy_sum = 0 count = 0 for batch in batches: x_batch, y_batch = zip(*batch) feed_dict = { rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.dropout_keep_prob: 1.0, rnn.batch_size: len(x_batch), rnn.real_len: real_len(x_batch) } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, rnn.loss, rnn.accuracy], feed_dict) loss_sum = loss_sum + loss accuracy_sum = accuracy_sum + loss count = count + 1 loss = loss_sum / count accuracy = accuracy_sum / count time_str = datetime.datetime.now().isoformat() logger.info("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) dev_summary_writer.add_summary(summaries, step) # Generate batches
def predict(self, x_raw): x_raw = list(x_raw) x_raw = [s.strip() for s in x_raw] x_raw = [list(s) for s in x_raw] x_pad,_ = data_helpers.pad_sentences(x_raw,sequence_length) x_test = np.array([[vocabulary.get(word,0) for word in sentence] for sentence in x_pad]) # Get the placeholders from the graph by name input_x = self.graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = self.graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = self.graph.get_operation_by_name("output/predictions").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] for x_test_batch in batches: batch_predictions = self.sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0}) all_predictions = np.concatenate([all_predictions, batch_predictions]) return all_predictions #test predict #========================================
def get_batches(): print("Loading train data...") lexical_features = lexical_level_features(df) batch_iterator = data_helpers.batch_iter(lexical_features, FLAGS.batch_size, FLAGS.num_epochs) return batch_iterator
def get_batches_test(): print("Loading test data...") df = data_helpers.read_data("/home/sahil/ML-bucket/test.csv") lexical_features = lexical_level_features(df) batch_iterator = data_helpers.batch_iter(lexical_features, FLAGS.batch_size, 1, shuffle=False) return batch_iterator
def fun(): r = hack() s = data_helpers.batch_iter(r, 64, 1) return s
def linear_NN(X, y): graph = tf.Graph() with graph.as_default(): nn = linear_nn.nn_linear(X, y) global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.MomentumOptimizer( learning_rate=0.001, momentum=0.9, use_nesterov=True, ).minimize(nn.loss, global_step=global_step) with tf.Session(graph=graph) as session: train_loss_history = [] session.run(tf.global_variables_initializer()) batches = data_helpers.batch_iter(zip(X, y), batch_size=64, num_epochs=num_epochs, shuffle=True) for batch in batches: X_train, y_train = zip(*batch) feed_dict = {nn.input_x: np.asarray(X_train), nn.input_y: np.asarray(y_train)} _, step, loss, predictions = session.run([optimizer, global_step, nn.loss, nn.predictions], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}".format(time_str, step, loss)) train_loss_history.append(loss) # if step % 10 == 0: # pass x_axis = np.arange(step) plt.plot(x_axis, train_loss_history, "b-", linewidth=2, label="train") plt.grid() plt.legend() plt.ylabel("loss") plt.show()
def dev_step(x_dev, y_dev, writer=None): ''' Evaluates model on full dev set. -------------------------------- Since full dev set likely won't fit into memory, this function splits the dev set into minibatches and returns the average of loss and accuracy to cmd line and to summary writer ''' dev_stats = StatisticsCollector() dev_batches = data_helpers.batch_iter(list(zip(x_dev, y_dev)), FLAGS.batch_size, 1) for dev_batch in dev_batches: if len(dev_batch) > 0: x_dev_batch, y_dev_batch = zip(*dev_batch) feed_dict = { cnn.input_song1: tuple(spect_dict[i[0]] for i in x_dev_batch), cnn.input_song2: tuple(spect_dict[i[1]] for i in x_dev_batch), cnn.input_y: y_dev_batch, cnn.dropout_keep_prob: 1.0 } step, loss, accuracy = sess.run( [global_step, cnn.loss, cnn.accuracy], feed_dict) dev_stats.collect(accuracy, loss) time_str = datetime.datetime.now().isoformat() batch_accuracy, batch_loss, summaries = dev_stats.report() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, batch_loss, batch_accuracy)) if writer: writer.add_summary(summaries, step) # Generate training batches
def batch_dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ batches = data_helpers.batch_iter( zip(x_batch, y_batch), FLAGS.batch_size, 1) t_acc = 0.0 t_acc = float(t_acc) t_loss = 0.0 t_loss = float(t_loss) t = 0 f_r = open(file_name, "a+") step1 = 0 for batch in batches: x_batch, y_batch = zip(*batch) feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) t_acc += accuracy t_loss += loss t += 1 step1 = step if writer: writer.add_summary(summaries, step) f_r.write(str(step1) + " step " + " accuracy " + str(t_acc/t) + " loss " + str(t_loss/t) + "\n") print "total ", t_acc/t, "loss", t_loss/t # Generate batches
def dev_step(x_dev, pos_dev, neg_dev): """ Evaluates model on a dev set """ batches = data_helpers.batch_iter( list(zip(x_dev, pos_dev, neg_dev)), FLAGS.batch_size, 1) loss_sum = 0 accuracy_sum = 0 count = 0 for batch in batches: x_batch, pos_batch, neg_batch = zip(*batch) feed_dict = { rnn.input_x: x_batch, rnn.input_xpos: pos_batch, rnn.input_xneg: neg_batch, rnn.real_len_x: real_len(x_batch), rnn.real_len_xpos: real_len(pos_batch), rnn.real_len_xneg: real_len(neg_batch), rnn.dropout_keep_prob: 1.0, rnn.batch_size: len(x_batch), } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, rnn.loss, rnn.accuracy], feed_dict) loss_sum = loss_sum + loss accuracy_sum = accuracy_sum + loss count = count + 1 loss = loss_sum / count accuracy = accuracy_sum / count time_str = datetime.datetime.now().isoformat() logger.info("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) dev_summary_writer.add_summary(summaries, step) # Generate batches
def getSentimentCNN(fileToLoad, modelDir): checkpoint_dir = "./rnn_runs/"+modelDir+"/checkpoints/" batch_size = 64 x_test, y_test, vocabulary, vocabulary_inv,trainS = data_helpers.load_data_for_books("./data/"+fileToLoad+".txt") y_test = np.argmax(y_test, axis=1) print("Vocabulary size: {:d}".format(len(vocabulary))) print("Test set size {:d}".format(len(y_test))) checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] scores = graph.get_operation_by_name("output/scores").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(x_test, batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] all_scores = [] for x_test_batch in batches: batch_scores = sess.run(scores, {input_x: x_test_batch, dropout_keep_prob: 1.0}) batch_predictions = np.argmax(batch_scores,axis=1) #batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0}) all_predictions = np.concatenate([all_predictions, batch_predictions]) all_scores = np.concatenate([all_scores,batch_scores[:,1] - batch_scores[:,0]]) mbs = float(len(all_predictions[all_predictions == 1]))/len(all_predictions) mss = np.mean(all_scores) print "Mean Binary Sentiment",mbs print "Mean Smooth Sentiment",mss return all_predictions,all_scores
def getSentimentRNN(fileToLoad,modelDir): checkpoint_dir = "./rnn_runs/"+modelDir+"/checkpoints/" batch_size = 64 n_hidden = 256 x_test, y_test, vocabulary, vocabulary_inv,trainS = data_helpers.load_data_for_books("./data/"+fileToLoad+".txt") y_test = np.argmax(y_test, axis=1) print("Vocabulary size: {:d}".format(len(vocabulary))) print("Test set size {:d}".format(len(y_test))) x_test = np.fliplr(x_test) checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) print("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("x_input").outputs[0] predictions = graph.get_operation_by_name("prediction").outputs[0] istate = graph.get_operation_by_name('initial_state').outputs[0] keep_prob = graph.get_operation_by_name('keep_prob').outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(x_test, batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] all_scores = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, {input_x: x_test_batch, istate: np.zeros((len(x_test_batch), 2*n_hidden)), keep_prob: 1.0}) binaryPred = np.argmax(batch_predictions,axis=1) all_predictions = np.concatenate([all_predictions, binaryPred]) all_scores = np.concatenate([all_scores, batch_predictions[:,1] - batch_predictions[:,0]]) mbs = float(len(all_predictions[all_predictions == 1]))/len(all_predictions) mss = np.mean(all_scores) print "Mean Binary Sentiment",mbs print "Mean Smooth Sentiment",mss return all_predictions,all_scores
def text_cnn_load_model_and_eval(x_test, checkpoint_file, allow_soft_placement, log_device_placement, embeddings): graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=allow_soft_placement, log_device_placement=log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] # Generate batches for one epoch batch_size = 50 batches = data_helpers.batch_iter(x_test, batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] # Load embeddings placeholder embedding_size = embeddings.shape[1] embeddings_number = embeddings.shape[0] print 'embedding_size:%s, embeddings_number:%s' % (embedding_size, embeddings_number) # with tf.name_scope("embedding"): # embeddings_placeholder = tf.placeholder(tf.float32, shape=[embeddings_number, embedding_size]) embeddings_placeholder = graph.get_operation_by_name("embedding/Placeholder").outputs[0] for x_test_batch in batches: batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0, embeddings_placeholder: embeddings}) all_predictions = np.concatenate([all_predictions, batch_predictions]) return all_predictions
def text_cnn_load_model_and_eval_v2(x_test_s1, x_test_s2, checkpoint_file, allow_soft_placement, log_device_placement, embeddings): graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=allow_soft_placement, log_device_placement=log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x_s1 = graph.get_operation_by_name("input_x_s1").outputs[0] input_x_s2 = graph.get_operation_by_name("input_x_s2").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] # Generate batches for one epoch batch_size = 50 batches = data_helpers.batch_iter(list(zip(x_test_s1, x_test_s2)), batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] # Load embeddings placeholder embedding_size = embeddings.shape[1] embeddings_number = embeddings.shape[0] print 'embedding_size:%s, embeddings_number:%s' % (embedding_size, embeddings_number) # with tf.name_scope("embedding"): # embeddings_placeholder = tf.placeholder(tf.float32, shape=[embeddings_number, embedding_size]) embeddings_placeholder = graph.get_operation_by_name("embedding/Placeholder").outputs[0] for batch in batches: x_test_batch_s1, x_test_batch_s2 = zip(*batch) batch_predictions = sess.run(predictions, {input_x_s1: x_test_batch_s1, input_x_s2: x_test_batch_s2, dropout_keep_prob: 1.0, embeddings_placeholder: embeddings}) all_predictions = np.concatenate([all_predictions, batch_predictions]) return all_predictions