Python data_helpers 模块，batch_iter() 实例源码

我们从Python开源项目中，提取了以下17个代码示例，用于说明如何使用data_helpers.batch_iter()。

项目：TextAsGraphClassification 作者：NightmareNyx | 项目源码 | 文件源码

def fit(self, X, y=None):
        """
        This should fit classifier. All the "work" should be done here.
        Note: assert is not a good choice here and you should rather
        use try/except blog with exceptions. This is just for short syntax.
        """

        # Generate batches
        batches = batch_iter(
            list(zip(X, y)), self.FLAGS.batch_size, self.FLAGS.num_epochs)

        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            feed_dict = {
                self.cnn.input_x: x_batch,
                self.cnn.input_y: y_batch,
                self.cnn.dropout_keep_prob: self.FLAGS.dropout_keep_prob
            }
            _, loss, accuracy = self.sess.run(
                [self.optimizer, self.cnn.loss, self.cnn.accuracy],
                feed_dict)
            # print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))

项目：nlp_code 作者：bohaohan | 项目源码 | 文件源码

def batch_dev_step(x_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            batches = data_helpers.batch_iter(
            zip(x_batch, y_batch), FLAGS.batch_size, 1)
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_y: y_batch,
                  cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

        # Generate batches

项目：nlp_code 作者：bohaohan | 项目源码 | 文件源码

def batch_dev_step(x_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            batches = data_helpers.batch_iter(
            zip(x_batch, y_batch), FLAGS.batch_size, 1)
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_y: y_batch,
                  cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

        # Generate batches

项目：question-classification-cnn-rnn-attention 作者：sefira | 项目源码 | 文件源码

def dev_step(x_dev, y_dev):
            """
            Evaluates model on a dev set
            """
            batches = data_helpers.batch_iter(
                list(zip(x_dev, y_dev)), FLAGS.batch_size, 1)
            loss_sum = 0
            accuracy_sum = 0
            count = 0
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                feed_dict = {
                  rnn.input_x: x_batch,
                  rnn.input_y: y_batch,
                  rnn.dropout_keep_prob: 1.0,
                  rnn.batch_size: len(x_batch),
                  rnn.real_len: real_len(x_batch)
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, rnn.loss, rnn.accuracy],
                    feed_dict)
                loss_sum = loss_sum + loss
                accuracy_sum = accuracy_sum + loss
                count = count + 1
            loss = loss_sum / count
            accuracy = accuracy_sum / count
            time_str = datetime.datetime.now().isoformat()
            logger.info("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            dev_summary_writer.add_summary(summaries, step)

        # Generate batches

项目：cnn-text-classification-tf-Chinese-py3 作者：MakDon | 项目源码 | 文件源码

def predict(self, x_raw):
        x_raw = list(x_raw)
        x_raw = [s.strip() for s in x_raw]
        x_raw = [list(s) for s in x_raw]
        x_pad,_ = data_helpers.pad_sentences(x_raw,sequence_length)
        x_test = np.array([[vocabulary.get(word,0) for word in sentence] for sentence in x_pad])

        # Get the placeholders from the graph by name
        input_x = self.graph.get_operation_by_name("input_x").outputs[0]
        # input_y = graph.get_operation_by_name("input_y").outputs[0]
        dropout_keep_prob = self.graph.get_operation_by_name("dropout_keep_prob").outputs[0]

        # Tensors we want to evaluate
        predictions = self.graph.get_operation_by_name("output/predictions").outputs[0]

        # Generate batches for one epoch
        batches = data_helpers.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False)

        # Collect the predictions here
        all_predictions = []

        for x_test_batch in batches:
            batch_predictions = self.sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0})
            all_predictions = np.concatenate([all_predictions, batch_predictions])

        return all_predictions

#test predict
#========================================

项目：Relation_Extraction 作者：wadhwasahil | 项目源码 | 文件源码

def get_batches():
    print("Loading train data...")
    lexical_features = lexical_level_features(df)
    batch_iterator = data_helpers.batch_iter(lexical_features, FLAGS.batch_size, FLAGS.num_epochs)
    return batch_iterator

项目：Relation_Extraction 作者：wadhwasahil | 项目源码 | 文件源码

def get_batches_test():
    print("Loading test data...")
    df = data_helpers.read_data("/home/sahil/ML-bucket/test.csv")
    lexical_features = lexical_level_features(df)
    batch_iterator = data_helpers.batch_iter(lexical_features, FLAGS.batch_size, 1, shuffle=False)
    return batch_iterator

项目：Relation_Extraction 作者：wadhwasahil | 项目源码 | 文件源码

def fun():
    r = hack()
    s = data_helpers.batch_iter(r, 64, 1)
    return s

项目：Facial_KeyPoints_Detection 作者：wadhwasahil | 项目源码 | 文件源码

def linear_NN(X, y):
    graph = tf.Graph()
    with graph.as_default():
        nn = linear_nn.nn_linear(X, y)
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.MomentumOptimizer(
            learning_rate=0.001,
            momentum=0.9,
            use_nesterov=True,
        ).minimize(nn.loss, global_step=global_step)
        with tf.Session(graph=graph) as session:
            train_loss_history = []
            session.run(tf.global_variables_initializer())
            batches = data_helpers.batch_iter(zip(X, y), batch_size=64, num_epochs=num_epochs, shuffle=True)
            for batch in batches:
                X_train, y_train = zip(*batch)
                feed_dict = {nn.input_x: np.asarray(X_train), nn.input_y: np.asarray(y_train)}
                _, step, loss, predictions = session.run([optimizer, global_step, nn.loss, nn.predictions], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}".format(time_str, step, loss))
                train_loss_history.append(loss)
                # if step % 10 == 0:
                #     pass
            x_axis = np.arange(step)
            plt.plot(x_axis, train_loss_history, "b-", linewidth=2, label="train")
            plt.grid()
            plt.legend()
            plt.ylabel("loss")
            plt.show()

项目：coversongs-dual-convnet 作者：markostam | 项目源码 | 文件源码

def dev_step(x_dev, y_dev, writer=None):
            '''
            Evaluates model on full dev set.
            --------------------------------
            Since full dev set likely won't fit into memory, this function
            splits the dev set into minibatches and returns the average 
            of loss and accuracy to cmd line and to summary writer
            '''
            dev_stats = StatisticsCollector()

            dev_batches = data_helpers.batch_iter(list(zip(x_dev, y_dev)), 
                                      FLAGS.batch_size, 1)
            for dev_batch in dev_batches:
                if len(dev_batch) > 0:
                    x_dev_batch, y_dev_batch = zip(*dev_batch)
                    feed_dict = {
                      cnn.input_song1: tuple(spect_dict[i[0]] for i in x_dev_batch),
                      cnn.input_song2: tuple(spect_dict[i[1]] for i in x_dev_batch),
                      cnn.input_y: y_dev_batch,
                      cnn.dropout_keep_prob: 1.0
                    }

                    step, loss, accuracy = sess.run(
                        [global_step, cnn.loss, cnn.accuracy],
                        feed_dict)
                    dev_stats.collect(accuracy, loss)

            time_str = datetime.datetime.now().isoformat()
            batch_accuracy, batch_loss, summaries = dev_stats.report()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, batch_loss, batch_accuracy))
            if writer:
                writer.add_summary(summaries, step)

        # Generate training batches

项目：nlp_code 作者：bohaohan | 项目源码 | 文件源码

def batch_dev_step(x_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            batches = data_helpers.batch_iter(
            zip(x_batch, y_batch), FLAGS.batch_size, 1)
            t_acc = 0.0
            t_acc = float(t_acc)
            t_loss = 0.0
            t_loss = float(t_loss)
            t = 0
            f_r = open(file_name, "a+")
            step1 = 0
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_y: y_batch,
                  cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                t_acc += accuracy
                t_loss += loss
                t += 1
                step1 = step
                if writer:
                    writer.add_summary(summaries, step)
            f_r.write(str(step1) + " step " + " accuracy " + str(t_acc/t) + " loss " + str(t_loss/t) + "\n")
            print "total ", t_acc/t, "loss", t_loss/t

        # Generate batches

项目：nlp_code 作者：bohaohan | 项目源码 | 文件源码

def batch_dev_step(x_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            batches = data_helpers.batch_iter(
            zip(x_batch, y_batch), FLAGS.batch_size, 1)
            t_acc = 0.0
            t_acc = float(t_acc)
            t_loss = 0.0
            t_loss = float(t_loss)
            t = 0
            f_r = open(file_name, "a+")
            step1 = 0
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_y: y_batch,
                  cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                t_acc += accuracy
                t_loss += loss
                t += 1
                step1 = step
                if writer:
                    writer.add_summary(summaries, step)
            f_r.write(str(step1) + " step " + " accuracy " + str(t_acc/t) + " loss " + str(t_loss/t) + "\n")
            print "total ", t_acc/t, "loss", t_loss/t

        # Generate batches

项目：bing_query_title_match 作者：sefira | 项目源码 | 文件源码

def dev_step(x_dev, pos_dev, neg_dev):
            """
            Evaluates model on a dev set
            """
            batches = data_helpers.batch_iter(
                list(zip(x_dev, pos_dev, neg_dev)), FLAGS.batch_size, 1)
            loss_sum = 0
            accuracy_sum = 0
            count = 0
            for batch in batches:
                x_batch, pos_batch, neg_batch = zip(*batch)
                feed_dict = {
                  rnn.input_x: x_batch,
                  rnn.input_xpos: pos_batch,
                  rnn.input_xneg: neg_batch,
                  rnn.real_len_x: real_len(x_batch),
                  rnn.real_len_xpos: real_len(pos_batch),
                  rnn.real_len_xneg: real_len(neg_batch),
                  rnn.dropout_keep_prob: 1.0,
                  rnn.batch_size: len(x_batch),
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, rnn.loss, rnn.accuracy],
                    feed_dict)
                loss_sum = loss_sum + loss
                accuracy_sum = accuracy_sum + loss
                count = count + 1
            loss = loss_sum / count
            accuracy = accuracy_sum / count
            time_str = datetime.datetime.now().isoformat()
            logger.info("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            dev_summary_writer.add_summary(summaries, step)

        # Generate batches

项目：DNN-Sentiment 作者：awjuliani | 项目源码 | 文件源码

def getSentimentCNN(fileToLoad, modelDir):
    checkpoint_dir = "./rnn_runs/"+modelDir+"/checkpoints/"
    batch_size = 64
    x_test, y_test, vocabulary, vocabulary_inv,trainS = data_helpers.load_data_for_books("./data/"+fileToLoad+".txt")
    y_test = np.argmax(y_test, axis=1)
    print("Vocabulary size: {:d}".format(len(vocabulary)))
    print("Test set size {:d}".format(len(y_test)))

    checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
          allow_soft_placement=True,
          log_device_placement=False)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x = graph.get_operation_by_name("input_x").outputs[0]
            # input_y = graph.get_operation_by_name("input_y").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name("output/predictions").outputs[0]
            scores = graph.get_operation_by_name("output/scores").outputs[0]
            # Generate batches for one epoch
            batches = data_helpers.batch_iter(x_test, batch_size, 1, shuffle=False)

            # Collect the predictions here
            all_predictions = []
            all_scores = []
            for x_test_batch in batches:
                batch_scores = sess.run(scores, {input_x: x_test_batch, dropout_keep_prob: 1.0})
                batch_predictions = np.argmax(batch_scores,axis=1)
                #batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0})
                all_predictions = np.concatenate([all_predictions, batch_predictions])
                all_scores = np.concatenate([all_scores,batch_scores[:,1] - batch_scores[:,0]])

    mbs = float(len(all_predictions[all_predictions == 1]))/len(all_predictions)
    mss = np.mean(all_scores)
    print "Mean Binary Sentiment",mbs
    print "Mean Smooth Sentiment",mss
    return all_predictions,all_scores

项目：DNN-Sentiment 作者：awjuliani | 项目源码 | 文件源码

def getSentimentRNN(fileToLoad,modelDir):
    checkpoint_dir = "./rnn_runs/"+modelDir+"/checkpoints/"
    batch_size = 64
    n_hidden = 256

    x_test, y_test, vocabulary, vocabulary_inv,trainS = data_helpers.load_data_for_books("./data/"+fileToLoad+".txt")
    y_test = np.argmax(y_test, axis=1)
    print("Vocabulary size: {:d}".format(len(vocabulary)))
    print("Test set size {:d}".format(len(y_test)))
    x_test = np.fliplr(x_test)

    checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
          allow_soft_placement=True,
          log_device_placement=False)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            print("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x = graph.get_operation_by_name("x_input").outputs[0]
            predictions = graph.get_operation_by_name("prediction").outputs[0]
            istate = graph.get_operation_by_name('initial_state').outputs[0]
            keep_prob = graph.get_operation_by_name('keep_prob').outputs[0]
            # Generate batches for one epoch
            batches = data_helpers.batch_iter(x_test, batch_size, 1, shuffle=False)

            # Collect the predictions here
            all_predictions = []
            all_scores = []
            for x_test_batch in batches:
                batch_predictions = sess.run(predictions, {input_x: x_test_batch, istate: np.zeros((len(x_test_batch), 2*n_hidden)), keep_prob: 1.0})
                binaryPred = np.argmax(batch_predictions,axis=1)
                all_predictions = np.concatenate([all_predictions, binaryPred])
                all_scores = np.concatenate([all_scores, batch_predictions[:,1] - batch_predictions[:,0]])

        mbs = float(len(all_predictions[all_predictions == 1]))/len(all_predictions)
        mss = np.mean(all_scores)
        print "Mean Binary Sentiment",mbs
        print "Mean Smooth Sentiment",mss
        return all_predictions,all_scores

项目：conll16st-hd-sdp 作者：tbmihailov | 项目源码 | 文件源码

def text_cnn_load_model_and_eval(x_test,
                                 checkpoint_file,
                                 allow_soft_placement,
                                 log_device_placement,
                                 embeddings):

    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=allow_soft_placement,
            log_device_placement=log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x = graph.get_operation_by_name("input_x").outputs[0]
            # input_y = graph.get_operation_by_name("input_y").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name("output/predictions").outputs[0]

            # Generate batches for one epoch
            batch_size = 50
            batches = data_helpers.batch_iter(x_test, batch_size, 1, shuffle=False)

            # Collect the predictions here
            all_predictions = []

            # Load embeddings placeholder
            embedding_size = embeddings.shape[1]
            embeddings_number = embeddings.shape[0]
            print 'embedding_size:%s, embeddings_number:%s' % (embedding_size, embeddings_number)
            # with tf.name_scope("embedding"):
            #     embeddings_placeholder = tf.placeholder(tf.float32, shape=[embeddings_number, embedding_size])
            embeddings_placeholder = graph.get_operation_by_name("embedding/Placeholder").outputs[0]

            for x_test_batch in batches:
                batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0,
                                                           embeddings_placeholder: embeddings})
                all_predictions = np.concatenate([all_predictions, batch_predictions])

    return all_predictions

项目：conll16st-hd-sdp 作者：tbmihailov | 项目源码 | 文件源码

def text_cnn_load_model_and_eval_v2(x_test_s1,
                                    x_test_s2,
                  checkpoint_file,
                  allow_soft_placement,
                  log_device_placement,
                  embeddings):
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=allow_soft_placement,
            log_device_placement=log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x_s1 = graph.get_operation_by_name("input_x_s1").outputs[0]
            input_x_s2 = graph.get_operation_by_name("input_x_s2").outputs[0]
            # input_y = graph.get_operation_by_name("input_y").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name("output/predictions").outputs[0]

            # Generate batches for one epoch
            batch_size = 50
            batches = data_helpers.batch_iter(list(zip(x_test_s1, x_test_s2)), batch_size, 1, shuffle=False)

            # Collect the predictions here
            all_predictions = []

            # Load embeddings placeholder
            embedding_size = embeddings.shape[1]
            embeddings_number = embeddings.shape[0]
            print 'embedding_size:%s, embeddings_number:%s' % (embedding_size, embeddings_number)
            # with tf.name_scope("embedding"):
            #     embeddings_placeholder = tf.placeholder(tf.float32, shape=[embeddings_number, embedding_size])
            embeddings_placeholder = graph.get_operation_by_name("embedding/Placeholder").outputs[0]

            for batch in batches:
                x_test_batch_s1, x_test_batch_s2 = zip(*batch)
                batch_predictions = sess.run(predictions, {input_x_s1: x_test_batch_s1,
                                                           input_x_s2: x_test_batch_s2,
                                                           dropout_keep_prob: 1.0,
                                                           embeddings_placeholder: embeddings})
                all_predictions = np.concatenate([all_predictions, batch_predictions])

    return all_predictions