我们从Python开源项目中,提取了以下23个代码示例,用于说明如何使用tensorflow.PaddingFIFOQueue()。
def __init__(self, files_list, thread_count, batch_size, numcep, numcontext, next_index=lambda x: x + 1): self._coord = None self._numcep = numcep self._x = tf.placeholder(tf.float32, [None, numcep + (2 * numcep * numcontext)]) self._x_length = tf.placeholder(tf.int32, []) self._y = tf.placeholder(tf.int32, [None,]) self._y_length = tf.placeholder(tf.int32, []) self.example_queue = tf.PaddingFIFOQueue(shapes=[[None, numcep + (2 * numcep * numcontext)], [], [None,], []], dtypes=[tf.float32, tf.int32, tf.int32, tf.int32], capacity=2 * self._get_device_count() * batch_size) self._enqueue_op = self.example_queue.enqueue([self._x, self._x_length, self._y, self._y_length]) self._close_op = self.example_queue.close(cancel_pending_enqueues=True) self.batch_size = batch_size self._numcontext = numcontext self._thread_count = thread_count self._files_list = self._create_files_list(files_list) self._next_index = next_index
def get_input_queues(path, word2idx, batch_size=32, num_threads=8): input_ph = tf.placeholder(tf.int32, shape=[None]) # [T] queue = tf.PaddingFIFOQueue(shapes=[[None, ]], dtypes=[tf.int32], capacity=5000,) # TODO: enqueue_many would be faster, would require batch and padding at numpy-level enqueue_op = queue.enqueue([input_ph]) def enqueue_data(sess): # for epoch in range(epoch_size): while True: # for idx, line in enumerate(read_data(path)): v = vectorize(line, word2idx) sess.run(enqueue_op, feed_dict={input_ph: v}) # dequeue_batch = queue.dequeue_many(batch_size) dequeue_op = queue.dequeue() dequeue_batch = tf.train.batch([dequeue_op], batch_size=batch_size, num_threads=num_threads, capacity=1000, dynamic_pad=True, name="batch_and_pad") # TODO: get corpus_size here return enqueue_data, dequeue_batch
def prefetch(self, tensor_dict, capacity): """Creates a prefetch queue for tensors. Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a dequeue op that evaluates to a tensor_dict. This function is useful in prefetching preprocessed tensors so that the data is readily available for consumers. Args: tensor_dict: a dictionary of tensors to prefetch. capacity: the size of the prefetch queue. Returns: a FIFO prefetcher queue """ names = list(tensor_dict.keys()) dtypes = [t.dtype for t in tensor_dict.values()] shapes = [t.get_shape() for t in tensor_dict.values()] prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes, shapes=shapes, names=names, name='prefetch_queue') enqueue_op = prefetch_queue.enqueue(tensor_dict) tf.train.queue_runner.add_queue_runner( tf.train.queue_runner.QueueRunner(prefetch_queue, [enqueue_op])) return prefetch_queue
def __init__(self, audio_dir, coord, sample_rate, sample_size=None, silence_threshold=None, queue_size=32): self.audio_dir = audio_dir self.sample_rate = sample_rate self.coord = coord self.sample_size = sample_size self.silence_threshold = silence_threshold self.threads = [] self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.queue = tf.PaddingFIFOQueue(queue_size, ['float32'], shapes=[(None, 1)]) self.enqueue = self.queue.enqueue([self.sample_placeholder]) # TODO Find a better way to check this. # Checking inside the AudioReader's thread makes it hard to terminate # the execution of the script, so we do it in the constructor for now. files = find_files(audio_dir) if not files: raise ValueError("No audio files found in '{}'.".format(audio_dir))
def _make_batch_queue(input, capacity, num_threads=1): queue = tf.PaddingFIFOQueue(capacity=capacity, dtypes=[s.dtype for s in input], shapes=[s.get_shape() for s in input]) tf.summary.scalar("fraction_of_%d_full" % capacity, tf.cast(queue.size(), tf.float32) * (1. / capacity)) enqueue_ops = [queue.enqueue(input)]*num_threads queue_runner.add_queue_runner(queue_runner.QueueRunner(queue, enqueue_ops)) return queue # This class is responsible for setting up and running experiments # Also provides helper functions related to experiments (e.g. get accuracy)
def preprocess(data): # PaddingFIFOQueue pads to the max size seen in the data (instead of the minibatch) # by chopping off the ends, this limits redundant computations in the output layer sequence_length = tf.reduce_sum(tf.cast(tf.not_equal(data, 0), dtype=tf.int32), axis=1) maximum_sequence_length = tf.reduce_max(sequence_length) data = data[:, :maximum_sequence_length] source = data[:, :-1] target = data[:, 1:] sequence_length -= 1 return source, target, sequence_length
def _build_dev_pipeline(tfrecords_file_path, feature_columns, batch_size=None, nb_instances=None): """ Build the dev pipeline :param tfrecords_file_path: dev TFRecords file path :return: queue runner list, queues, symbolic link to mini-batch """ with tf.device('/cpu:0'): # Creating a list with tfrecords tfrecords_list = [tfrecords_file_path] # Will contains queue runners for thread creation queue_runner_list = list() # Filename queue, contains only on filename (train TFRecords file) filename_queue = tf.train.string_input_producer(tfrecords_list) # Decode one example tensor_list = read_and_decode(filename_queue, feature_columns) dtypes = [tf.string, tf.int32, tf.int32, tf.int32, tf.int32, tf.int32] shapes = [[], [], [None], [None, None], [None], [None]] for _ in feature_columns: dtypes.append(tf.int32) shapes.append([None]) # Main queue padding_queue = tf.PaddingFIFOQueue(nb_instances, dtypes=dtypes, shapes=shapes) # Enqueue and dequeue Ops + queue runner creation enqueue_op = padding_queue.enqueue(tensor_list) batch = padding_queue.dequeue_many(batch_size) queue_runner_list.append(tf.train.QueueRunner(padding_queue, [enqueue_op] * 1)) return queue_runner_list, [filename_queue, padding_queue], batch
def _build_test_pipeline(tfrecords_file_path, feature_columns, batch_size=None, nb_instances=None): """ Build the test pipeline :param tfrecords_file_path: test TFRecords file path :return: queue runner list, queues, symbolic link to mini-batch """ with tf.device('/cpu:0'): # Creating a list with tfrecords tfrecords_list = [tfrecords_file_path] # Will contains queue runners for thread creation queue_runner_list = list() # Filename queue, contains only on filename (train TFRecords file) filename_queue = tf.train.string_input_producer(tfrecords_list) # Decode one example tensor_list = read_and_decode_test(filename_queue, feature_columns) dtypes = [tf.string, tf.int32, tf.int32, tf.int32, tf.int32] shapes = [[], [], [None], [None, None], [None]] for _ in feature_columns: dtypes.append(tf.int32) shapes.append([None]) # Main queue padding_queue = tf.PaddingFIFOQueue(nb_instances, dtypes=dtypes, shapes=shapes) # Enqueue and dequeue Ops + queue runner creation enqueue_op = padding_queue.enqueue(tensor_list) batch = padding_queue.dequeue_many(batch_size) queue_runner_list.append(tf.train.QueueRunner(padding_queue, [enqueue_op] * 1)) return queue_runner_list, [filename_queue, padding_queue], batch
def __init__(self, text_dir, coord, sample_size=None, queue_size=256): self.text_dir = text_dir self.coord = coord self.sample_size = sample_size self.threads = [] self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.queue = tf.PaddingFIFOQueue(queue_size, ['float32'], shapes=[(None, 1)]) self.enqueue = self.queue.enqueue([self.sample_placeholder])
def requeue(*tensors, capacity=FLAGS.num_threads_per_queue, num_threads=FLAGS.num_threads_per_queue): queue = tf.PaddingFIFOQueue(capacity, dtypes(*tensors), static_shapes(*tensors)) add_queue_runner(queue, [queue.enqueue(tensors)] * num_threads) return queue
def get_queue(nodes, queue_type='fifo', batch_size=256, capacity=None, min_after_dequeue=None, shape_flag=True, seed=0): """ A generic queue for reading data Built on top of https://indico.io/blog/tensorflow-data-input-part2-extensions/ """ if capacity is None: capacity = 2 * batch_size if min_after_dequeue is None: min_after_dequeue = capacity // 2 names = [] dtypes = [] shapes = [] for name in nodes.keys(): names.append(name) dtypes.append(nodes[name].dtype) if shape_flag: shapes.append(nodes[name].get_shape()[1:]) else: shapes.append(nodes[name].get_shape()) if batch_size==1: shapes = None if queue_type == 'random': queue = tf.RandomShuffleQueue(capacity=capacity, min_after_dequeue=min_after_dequeue, dtypes=dtypes, shapes=shapes, names=names, seed=seed) elif queue_type == 'fifo': queue = tf.FIFOQueue(capacity=capacity, dtypes=dtypes, shapes=shapes, names=names) elif queue_type == 'padding_fifo': queue = tf.PaddingFIFOQueue(capacity=capacity, dtypes=dtypes, shapes=shapes, names=names) elif queue_type == 'priority': queue = tf.PriorityQueue(capacity=capacity, types=dtypes, shapes=shapes, names=names) else: Exception('Queue type %s not recognized' % queue_type) return queue
def get_padded_batch(file_list, batch_size, input_size, output_size, num_enqueuing_threads=4, num_epochs=1, shuffle=True): """Reads batches of SequenceExamples from TFRecords and pads them. Can deal with variable length SequenceExamples by padding each batch to the length of the longest sequence with zeros. Args: file_list: A list of paths to TFRecord files containing SequenceExamples. batch_size: The number of SequenceExamples to include in each batch. input_size: The size of each input vector. The returned batch of inputs will have a shape [batch_size, num_steps, input_size]. num_enqueuing_threads: The number of threads to use for enqueuing SequenceExamples. Returns: inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s. labels: A tensor of shape [batch_size, num_steps] of float32s. lengths: A tensor of shape [batch_size] of int32s. The lengths of each SequenceExample before padding. """ file_queue = tf.train.string_input_producer( file_list, num_epochs=num_epochs, shuffle=shuffle) reader = tf.TFRecordReader() _, serialized_example = reader.read(file_queue) sequence_features = { 'inputs': tf.FixedLenSequenceFeature(shape=[input_size], dtype=tf.float32), 'inputs_cmvn': tf.FixedLenSequenceFeature(shape=[input_size], dtype=tf.float32), 'labels1': tf.FixedLenSequenceFeature(shape=[output_size], dtype=tf.float32), 'labels2': tf.FixedLenSequenceFeature(shape=[output_size], dtype=tf.float32)} _, sequence = tf.parse_single_sequence_example( serialized_example, sequence_features=sequence_features) length = tf.shape(sequence['inputs'])[0] capacity = 1000 + (num_enqueuing_threads + 1) * batch_size queue = tf.PaddingFIFOQueue( capacity=capacity, dtypes=[tf.float32, tf.float32,tf.float32,tf.float32, tf.int32], shapes=[(None, input_size),(None,input_size), (None, output_size),(None,output_size), ()]) enqueue_ops = [queue.enqueue([sequence['inputs'], sequence['inputs_cmvn'], sequence['labels1'], sequence['labels2'], length])] * num_enqueuing_threads tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops)) return queue.dequeue_many(batch_size)
def _build_train_pipeline(tfrecords_file_path, feature_columns, buckets=None, batch_size=None, nb_instances=None): """ Build the train pipeline. Sequences are grouped into buckets for faster training. :param tfrecords_file_path: train TFRecords file path :param buckets: train buckets :param batch_size: mini-batch size :return: queue runner list, queues, symbolic link to mini-batch """ with tf.device('/cpu:0'): # Creating a list with tfrecords tfrecords_list = [tfrecords_file_path] # Will contains queue runners for thread creation queue_runner_list = list() # Filename queue, contains only on filename (train TFRecords file) filename_queue = tf.train.string_input_producer(tfrecords_list) # Decode one example tensor_list = read_and_decode(filename_queue, feature_columns) dtypes = [tf.string, tf.int32, tf.int32, tf.int32, tf.int32, tf.int32] for _ in feature_columns: dtypes.append(tf.int32) # Random shuffle queue, allow for randomization of training instances (maximum size: 50% of nb. instances) shuffle_queue = tf.RandomShuffleQueue(nb_instances, nb_instances//2, dtypes=dtypes) # Enqueue and dequeue Ops + queue runner creation enqueue_op_shuffle_queue = shuffle_queue.enqueue(tensor_list) inputs = shuffle_queue.dequeue() queue_runner_list.append(tf.train.QueueRunner(shuffle_queue, [enqueue_op_shuffle_queue] * 4)) shapes = [[], [], [None], [None, None], [None], [None]] for _ in feature_columns: shapes.append([None]) if buckets: # Bucketing according to bucket boundaries passed as arguments length, batch = tf.contrib.training.bucket_by_sequence_length(inputs[1], inputs, batch_size, sorted(buckets), num_threads=4, capacity=32, shapes=shapes, dynamic_pad=True) else: padding_queue = tf.PaddingFIFOQueue(nb_instances, dtypes=dtypes, shapes=shapes) enqueue_op_padding_queue = padding_queue.enqueue(inputs) batch = padding_queue.dequeue_many(batch_size) queue_runner_list.append(tf.train.QueueRunner(padding_queue, [enqueue_op_padding_queue] * 4)) return queue_runner_list, [filename_queue, shuffle_queue], batch
def get_padded_batch(file_list, batch_size, input_size, output_size, num_enqueuing_threads=4, num_epochs=1, shuffle=True): """Reads batches of SequenceExamples from TFRecords and pads them. Can deal with variable length SequenceExamples by padding each batch to the length of the longest sequence with zeros. Args: file_list: A list of paths to TFRecord files containing SequenceExamples. batch_size: The number of SequenceExamples to include in each batch. input_size: The size of each input vector. The returned batch of inputs will have a shape [batch_size, num_steps, input_size]. num_enqueuing_threads: The number of threads to use for enqueuing SequenceExamples. Returns: inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s. labels: A tensor of shape [batch_size, num_steps] of float32s. lengths: A tensor of shape [batch_size] of int32s. The lengths of each SequenceExample before padding. """ file_queue = tf.train.string_input_producer( file_list, num_epochs=num_epochs, shuffle=shuffle) reader = tf.TFRecordReader() _, serialized_example = reader.read(file_queue) sequence_features = { 'inputs': tf.FixedLenSequenceFeature(shape=[input_size], dtype=tf.float32), 'labels': tf.FixedLenSequenceFeature(shape=[output_size], dtype=tf.float32), 'genders': tf.FixedLenSequenceFeature(shape=[2], dtype=tf.float32)} _, sequence = tf.parse_single_sequence_example( serialized_example, sequence_features=sequence_features) length = tf.shape(sequence['inputs'])[0] capacity = 1000 + (num_enqueuing_threads + 1) * batch_size queue = tf.PaddingFIFOQueue( capacity=capacity, dtypes=[tf.float32, tf.float32, tf.float32, tf.int32], shapes=[(None, input_size), (None, output_size),(1,2), ()]) enqueue_ops = [queue.enqueue([sequence['inputs'], sequence['labels'], sequence['genders'], length])] * num_enqueuing_threads tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops)) return queue.dequeue_many(batch_size)
def get_padded_batch_v2(file_list, batch_size, input_size, output_size, num_enqueuing_threads=4, num_epochs=1, shuffle=True): """Reads batches of SequenceExamples from TFRecords and pads them. Can deal with variable length SequenceExamples by padding each batch to the length of the longest sequence with zeros. Args: file_list: A list of paths to TFRecord files containing SequenceExamples. batch_size: The number of SequenceExamples to include in each batch. input_size: The size of each input vector. The returned batch of inputs will have a shape [batch_size, num_steps, input_size]. num_enqueuing_threads: The number of threads to use for enqueuing SequenceExamples. Returns: inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s. labels: A tensor of shape [batch_size, num_steps] of float32s. lengths: A tensor of shape [batch_size] of int32s. The lengths of each SequenceExample before padding. """ file_queue = tf.train.string_input_producer( file_list, num_epochs=num_epochs, shuffle=shuffle) reader = tf.TFRecordReader() _, serialized_example = reader.read(file_queue) sequence_features = { 'inputs': tf.FixedLenSequenceFeature(shape=[input_size],dtype=tf.float32), 'inputs_cmvn': tf.FixedLenSequenceFeature(shape=[input_size],dtype=tf.float32), 'labels1': tf.FixedLenSequenceFeature(shape=[output_size],dtype=tf.float32), 'labels2': tf.FixedLenSequenceFeature(shape=[output_size],dtype=tf.float32), } _, sequence = tf.parse_single_sequence_example( serialized_example, sequence_features=sequence_features) length = tf.shape(sequence['inputs'])[0] capacity = 1000 + (num_enqueuing_threads + 1) * batch_size queue = tf.PaddingFIFOQueue( capacity=capacity, dtypes=[tf.float32, tf.float32,tf.float32, tf.float32, tf.int32], shapes=[(None, input_size),(None, input_size),(None, output_size), (None, output_size), ()]) enqueue_ops = [queue.enqueue([sequence['inputs'], sequence['inputs_cmvn'], sequence['labels1'], sequence['labels2'], length])] * num_enqueuing_threads tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops)) return queue.dequeue_many(batch_size)
def __init__(self, config, reverse=True, capacity=2048, threads=1): batch_size, bucket_count = config.batch_size, len(config.buckets) with tf.variable_scope('BucketingQueue'): # bucket_id, encoder_inputs, decoder_inputs, target_weights self.queue = tf.PaddingFIFOQueue(capacity=capacity, dtypes=[tf.int32, tf.int32, tf.int32, tf.int32, tf.float32, tf.int32, tf.int32, tf.int32], shapes=[(), (None, ), (None, ), (None, ), (None,), (None,), (None,), (None,)], name="BucketingFIFOQueue") self.bucket_id = tf.placeholder(tf.int32, [], name='BucketId') self.encoder_inputs = tf.placeholder(tf.int32, [None, ], name='EncoderInputs') self.decoder_inputs = tf.placeholder(tf.int32, [None, ], name='DecoderInputs') self.weights = tf.placeholder(tf.int32, [None, ], 'DecoderWeights') self.targets = tf.placeholder(tf.float32, [None,], 'DecoderTargets') self.decoder_length = tf.placeholder(tf.int32, [None, ], name='DecoderLength') self.encoder_authors = tf.placeholder(tf.int32, [None, ], name='AuthorEncoderInputs') self.decoder_authors = tf.placeholder(tf.int32, [None, ], name='AuthorDecoderInputs') self._batch_size = batch_size self._enqueue_op = self.queue.enqueue([self.bucket_id, self.encoder_inputs, self.decoder_inputs, self.weights, self.targets, self.decoder_length, self.encoder_authors, self.decoder_authors]) bucket_id, encoder, decoder, weights, targets, dec_len, enc_authors, dec_authors = self.queue.dequeue() self.bucket = tf.contrib.training.bucket([encoder, decoder, weights, targets, dec_len, enc_authors, dec_authors], bucket_id, batch_size, bucket_count, capacity=capacity, num_threads=threads, dynamic_pad=True, name="BucketingOp") self._close_op = self.queue.close() self._MAX_AUTHOR = config.max_author_len self._config = config self._buckets = config.buckets self.PAD_ID = config.PAD_ID self.GO_ID = config.GO_ID self._reverse = reverse self.END_ID = config.END_ID
def __init__(self, config): self.config = config self.embedding_info = [(emb["size"], emb["lowercase"]) for emb in config["embeddings"]] self.embedding_size = sum(size for size, _ in self.embedding_info) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.embedding_dicts = [util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"]] self.max_mention_width = config["max_mention_width"] self.genres = { g:i for i,g in enumerate(config["genres"]) } self.eval_data = None # Load eval data lazily. input_props = [] input_props.append((tf.float32, [None, None, self.embedding_size])) # Text embeddings. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss(*self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay(self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam" : tf.train.AdamOptimizer, "sgd" : tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
def __init__(self, audio_dir, coord, sample_rate, gc_enabled, receptive_field, sample_size=None, silence_threshold=None, queue_size=32): self.audio_dir = audio_dir self.sample_rate = sample_rate self.coord = coord self.sample_size = sample_size self.receptive_field = receptive_field self.silence_threshold = silence_threshold self.gc_enabled = gc_enabled self.threads = [] self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.queue = tf.PaddingFIFOQueue(queue_size, ['float32'], shapes=[(None, 1)]) self.enqueue = self.queue.enqueue([self.sample_placeholder]) if self.gc_enabled: self.id_placeholder = tf.placeholder(dtype=tf.int32, shape=()) self.gc_queue = tf.PaddingFIFOQueue(queue_size, ['int32'], shapes=[()]) self.gc_enqueue = self.gc_queue.enqueue([self.id_placeholder]) # TODO Find a better way to check this. # Checking inside the AudioReader's thread makes it hard to terminate # the execution of the script, so we do it in the constructor for now. files = find_files(audio_dir) if not files: raise ValueError("No audio files found in '{}'.".format(audio_dir)) if self.gc_enabled and not_all_have_id(files): raise ValueError("Global conditioning is enabled, but file names " "do not conform to pattern having id.") # Determine the number of mutually-exclusive categories we will # accomodate in our embedding table. if self.gc_enabled: _, self.gc_category_cardinality = get_category_cardinality(files) # Add one to the largest index to get the number of categories, # since tf.nn.embedding_lookup expects zero-indexing. This # means one or more at the bottom correspond to unused entries # in the embedding lookup table. But that's a small waste of memory # to keep the code simpler, and preserves correspondance between # the id one specifies when generating, and the ids in the # file names. self.gc_category_cardinality += 1 print("Detected --gc_cardinality={}".format( self.gc_category_cardinality)) else: self.gc_category_cardinality = None
def __init__(self, audio_dir, audio_test_dir, coord, sample_rate, gc_enabled, sample_size=None, silence_threshold=None, queue_size=32, seq_len=256): self.audio_dir = audio_dir self.sample_rate = sample_rate self.coord = coord self.sample_size = sample_size self.silence_threshold = silence_threshold self.gc_enabled = gc_enabled self.threads = [] self.test_files = find_files(audio_test_dir) self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.angle_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.sample_test_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.angle_test_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.queue = tf.PaddingFIFOQueue(queue_size, ['float32','float32','float32','float32'], shapes=[(None, None),(None,None),(None,None),(None,None)]) self.enqueue = self.queue.enqueue(\ [self.sample_placeholder, self.angle_placeholder,\ self.sample_test_placeholder, self.angle_test_placeholder]) if self.gc_enabled: self.id_placeholder = tf.placeholder(dtype=tf.int32, shape=()) self.gc_queue = tf.PaddingFIFOQueue(queue_size, ['int32'], shapes=[()]) self.gc_enqueue = self.gc_queue.enqueue([self.id_placeholder]) files = find_files(audio_dir) print(audio_dir) if not files: raise ValueError("No audio files found in '{}'.".format(audio_dir)) if self.gc_enabled and not_all_have_id(files): raise ValueError("Global conditioning is enabled, but file names " "do not conform to pattern having id.") # Determine the number of mutually-exclusive categories we will # accomodate in our embedding table. if self.gc_enabled: _, self.gc_category_cardinality = get_category_cardinality(files) # Add one to the largest index to get the number of categories, # since tf.nn.embedding_lookup expects zero-indexing. This # means one or more at the bottom correspond to unused entries # in the embedding lookup table. But that's a small waste of memory # to keep the code simpler, and preserves correspondance between # the id one specifies when generating, and the ids in the # file names. self.gc_category_cardinality += 1 print("Detected --gc_cardinality={}".format( self.gc_category_cardinality)) else: self.gc_category_cardinality = None
def prefetch(tensor_dict, capacity): """Creates a prefetch queue for tensors. Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a dequeue op that evaluates to a tensor_dict. This function is useful in prefetching preprocessed tensors so that the data is readily available for consumers. Example input pipeline when you don't need batching: ---------------------------------------------------- key, string_tensor = slim.parallel_reader.parallel_read(...) tensor_dict = decoder.decode(string_tensor) tensor_dict = preprocessor.preprocess(tensor_dict, ...) prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20) tensor_dict = prefetch_queue.dequeue() outputs = Model(tensor_dict) ... ---------------------------------------------------- For input pipelines with batching, refer to core/batcher.py Args: tensor_dict: a dictionary of tensors to prefetch. capacity: the size of the prefetch queue. Returns: a FIFO prefetcher queue """ names = list(tensor_dict.keys()) dtypes = [t.dtype for t in tensor_dict.values()] shapes = [t.get_shape() for t in tensor_dict.values()] prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes, shapes=shapes, names=names, name='prefetch_queue') enqueue_op = prefetch_queue.enqueue(tensor_dict) tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner( prefetch_queue, [enqueue_op])) tf.summary.scalar('queue/%s/fraction_of_%d_full' % (prefetch_queue.name, capacity), tf.to_float(prefetch_queue.size()) * (1. / capacity)) return prefetch_queue