我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.TextLineReader()。
def _abspath_no_label_load_file(path, epochs=None, shuffle=True, seed=0): filename_queue = tf.train.string_input_producer([path], num_epochs=epochs, shuffle=shuffle, seed=seed) reader = tf.TextLineReader() key, value = reader.read(filename_queue) #image_path, = tf.decode_csv(value, record_defaults=[['']], field_delim=' ') image_path = value image_abspath = image_path image_content = tf.read_file(image_abspath) image = decode_image(image_content, channels=3) image.set_shape([None, None, 3]) imgshape = tf.shape(image)[:2] return image, imgshape, image_path
def _read_image_and_box(self, bboxes_csv): """Extract the filename from the queue, read the image and produce a single box Returns: image, [y_min, x_min, y_max, x_max, label] """ reader = tf.TextLineReader(skip_header_lines=True) _, row = reader.read(bboxes_csv) # file ,y_min, x_min, y_max, x_max, label record_defaults = [[""], [0.], [0.], [0.], [0.], [0.]] # eg: # 2008_000033,0.1831831831831832,0.208,0.7717717717717718,0.952,0 filename, y_min, x_min, y_max, x_max, label = tf.decode_csv( row, record_defaults) image_path = os.path.join(self._data_dir, 'VOCdevkit', 'VOC2012', 'JPEGImages') + "/" + filename + ".jpg" # image is normalized in [-1,1] image = read_image_jpg(image_path) return image, tf.stack([y_min, x_min, y_max, x_max, label])
def read_csv(batch_size, file_name): filename_queue = tf.train.string_input_producer([file_name]) reader = tf.TextLineReader(skip_header_lines=0) key, value = reader.read(filename_queue) # decode_csv will convert a Tensor from type string (the text line) in # a tuple of tensor columns with the specified defaults, which also # sets the data type for each column decoded = tf.decode_csv( value, field_delim=' ', record_defaults=[[0] for i in range(FLAGS.max_sentence_len * 2)]) # batch actually reads the file and loads "batch_size" rows in a single tensor return tf.train.shuffle_batch(decoded, batch_size=batch_size, capacity=batch_size * 50, min_after_dequeue=batch_size)
def _read_image_and_box(self, bboxes_csv): """Extract the filename from the queue, read the image and produce a single box Returns: image, box """ reader = tf.TextLineReader(skip_header_lines=True) _, row = reader.read(bboxes_csv) # file ,y_min, x_min, y_max, x_max, label record_defaults = [[""], [0.], [0.], [0.], [0.], [0.]] # eg: # 2008_000033,0.1831831831831832,0.208,0.7717717717717718,0.952,0 filename, y_min, x_min, y_max, x_max, label = tf.decode_csv( row, record_defaults) image_path = os.path.join(self._data_dir, 'VOCdevkit', 'VOC2012', 'JPEGImages') + "/" + filename + ".jpg" # image is normalized in [-1,1], convert to #_image_depth depth image = read_image_jpg(image_path, depth=self._image_depth) return image, tf.stack([y_min, x_min, y_max, x_max, label])
def inputs(lists, image_shape, batch_size): filename_queue = tf.train.string_input_producer(lists, shuffle=True) reader = tf.TextLineReader() _, value = reader.read(filename_queue) image, label = read_my_file_format(value) image = tf.image.resize_images(image, [image_shape[0]+3, image_shape[1]+3]) image = tf.random_crop(image, image_shape) label = tf.cast(label, tf.float32) image.set_shape(image_shape) # image = tf.image.random_flip_left_right(image) float_image = tf.image.per_image_whitening(image) min_after_dequeue = 1000 capacity = min_after_dequeue+(2+1)*batch_size image_batch, label_batch = tf.train.shuffle_batch([float_image, label], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue) return image_batch, label_batch
def inputs_for_test(lists, image_shape, batch_size): filename_queue = tf.train.string_input_producer(lists, shuffle=True) reader = tf.TextLineReader() _, value = reader.read(filename_queue) image, label = read_my_file_format(value) image = tf.image.resize_images(image, [image_shape[0], image_shape[1]]) # image = tf.random_crop(image, image_shape) label = tf.cast(label, tf.float32) image.set_shape(image_shape) # image = tf.image.random_flip_left_right(image) float_image = tf.image.per_image_whitening(image) min_after_dequeue = 1000 capacity = min_after_dequeue+(2+1)*batch_size image_batch, label_batch = tf.train.batch([float_image, label], batch_size=batch_size) return image_batch, label_batch
def test_read_text_lines(self): gfile.Glob = self._orig_glob filename = self._create_temp_file("ABC\nDEF\nGHK\n") batch_size = 1 queue_capacity = 5 name = "my_batch" with tf.Graph().as_default() as g, self.test_session(graph=g) as session: inputs = tf.contrib.learn.io.read_batch_examples( filename, batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, name=name) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() tf.train.start_queue_runners(session, coord=coord) self.assertAllEqual(session.run(inputs), [b"ABC"]) self.assertAllEqual(session.run(inputs), [b"DEF"]) self.assertAllEqual(session.run(inputs), [b"GHK"]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop()
def test_read_text_lines_multifile(self): gfile.Glob = self._orig_glob filenames = self._create_sorted_temp_files(["ABC\n", "DEF\nGHK\n"]) batch_size = 1 queue_capacity = 5 name = "my_batch" with tf.Graph().as_default() as g, self.test_session(graph=g) as session: inputs = tf.contrib.learn.io.read_batch_examples( filenames, batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, name=name) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() tf.train.start_queue_runners(session, coord=coord) self.assertAllEqual(session.run(inputs), [b"ABC"]) self.assertAllEqual(session.run(inputs), [b"DEF"]) self.assertAllEqual(session.run(inputs), [b"GHK"]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop()
def test_batch_text_lines(self): gfile.Glob = self._orig_glob filename = self._create_temp_file("A\nB\nC\nD\nE\n") batch_size = 3 queue_capacity = 10 name = "my_batch" with tf.Graph().as_default() as g, self.test_session(graph=g) as session: inputs = tf.contrib.learn.io.read_batch_examples( [filename], batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, read_batch_size=10, name=name) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() tf.train.start_queue_runners(session, coord=coord) self.assertAllEqual(session.run(inputs), [b"A", b"B", b"C"]) self.assertAllEqual(session.run(inputs), [b"D", b"E"]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop()
def _voc_seg_load_file(path, epochs=None, shuffle=True, seed=0): PASCAL_ROOT = os.environ['VOC_DIR'] filename_queue = tf.train.string_input_producer([path], num_epochs=epochs, shuffle=shuffle, seed=seed) reader = tf.TextLineReader() key, value = reader.read(filename_queue) image_path, seg_path = tf.decode_csv(value, record_defaults=[[''], ['']], field_delim=' ') image_abspath = PASCAL_ROOT + image_path seg_abspath = PASCAL_ROOT + seg_path image_content = tf.read_file(image_abspath) image = decode_image(image_content, channels=3) image.set_shape([None, None, 3]) imgshape = tf.shape(image)[:2] imgname = image_path seg_content = tf.read_file(seg_abspath) seg = tf.cast(tf.image.decode_png(seg_content, channels=1), tf.int32) return image, seg, imgshape, imgname
def _imagenet_load_file(path, epochs=None, shuffle=True, seed=0, subset='train', prepare_path=True): IMAGENET_ROOT = os.environ.get('IMAGENET_DIR', '') if not isinstance(path, list): path = [path] filename_queue = tf.train.string_input_producer(path, num_epochs=epochs, shuffle=shuffle, seed=seed) reader = tf.TextLineReader() key, value = reader.read(filename_queue) image_path, label_str = tf.decode_csv(value, record_defaults=[[''], ['']], field_delim=' ') if prepare_path: image_abspath = IMAGENET_ROOT + '/images/' + subset + image_path else: image_abspath = image_path image_content = tf.read_file(image_abspath) image = decode_image(image_content, channels=3) image.set_shape([None, None, 3]) imgshape = tf.shape(image)[:2] label = tf.string_to_number(label_str, out_type=tf.int32) return image, label, imgshape, image_path
def _relpath_no_label_load_file(path, root_path, epochs=None, shuffle=True, seed=0): filename_queue = tf.train.string_input_producer([path], num_epochs=epochs, shuffle=shuffle, seed=seed) reader = tf.TextLineReader() key, value = reader.read(filename_queue) #image_path, = tf.decode_csv(value, record_defaults=[['']], field_delim=' ') image_path = value image_abspath = root_path + '/' + image_path image_content = tf.read_file(image_abspath) image = decode_image(image_content, channels=3) image.set_shape([None, None, 3]) imgshape = tf.shape(image)[:2] return image, imgshape, image_path
def read_instances(self, count, shuffle, epochs): """Reads the data represented by this DataSource using a TensorFlow reader. Arguments: epochs: The number of epochs or passes over the data to perform. Returns: A tensor containing instances that are read. """ # None implies unlimited; switch the value to None when epochs is 0. epochs = epochs or None files = tf.train.match_filenames_once(self._path, name='files') queue = tf.train.string_input_producer(files, num_epochs=epochs, shuffle=shuffle, name='queue') reader = tf.TextLineReader(name='reader') _, instances = reader.read_up_to(queue, count, name='read') return instances
def acquire_data_ops(filename_queue, processing_method, record_defaults=None): """ Get the line/lines from the files in the given filename queue, read/decode them, and give them to the given method for processing the information. """ with tf.name_scope("acquire_data"): # with tf.device("/cpu:0"): if record_defaults is None: record_defaults = [[""]] reader = tf.TextLineReader() key, value = reader.read(filename_queue) row = tf.decode_csv(value, record_defaults=record_defaults) #The 3 is because this is used for training and it trains on triplets return processing_method(row[0], 3), tf.constant(True, dtype=tf.bool)
def read_data(filename_queue, bucket): ''' :param filename_queue:file queue :param bucket:(encoder_length,decoder_length) :return: ''' class DataRecord(object): pass result = DataRecord() reader = tf.TextLineReader() key, value = reader.read(filename_queue) recoder_defaults = [[1] for i in range(bucket[0] + bucket[1])] recoder = tf.decode_csv(value, record_defaults=recoder_defaults) # encoder_input result.encoder = tf.pack(recoder[0:bucket[0]]) # decoder_input result.decoder = tf.pack(recoder[bucket[0]:]) return result
def batch_generator(filenames): """ filenames is the list of files you want to read from. In this case, it contains only heart.csv """ filename_queue = tf.train.string_input_producer(filenames) reader = tf.TextLineReader(skip_header_lines=1) _,value = reader.read(filename_queue) record_defaults = [[1.0] for _ in range(N_FEATURES)] record_defaults[4] = [''] record_defaults.append([1]) content = tf.decode_csv(value,record_defaults=record_defaults) content[4] = tf.cond(tf.equal(content[4],tf.constant('Present')),lambda : tf.constant(1.0),lambda :tf.constant(0.0)) features = tf.stack(content[:N_FEATURES]) label = content[-1] min_after_dequeue = 10 * BATCH_SIZE capacity = 20 * BATCH_SIZE data_batch,laebl_batch = tf.train.shuffle_batch([features,label],batch_size=BATCH_SIZE,capacity=capacity,min_after_dequeue=min_after_dequeue) return data_batch,laebl_batch
def data_generator(data_dir): reader = tf.TextLineReader() queue = tf.train.string_input_producer([data_dir]) _, value = reader.read(queue) coord = tf.train.Coordinator() sess = tf.Session() threads = tf.train.start_queue_runners(sess=sess, coord=coord) while True: v = sess.run(value) [data, label] = v.split(b"|") data = np.array(json.loads(data.decode("utf-8"))) label = np.array(json.loads(label.decode("utf-8"))) yield (data, label) coord.request_stop() coord.join(threads) sess.close()
def input_fn(batch_size,file_name): """ Input function creates feautre and label dict for cross-validation :param batch_size: :param file_name: :return: feature dict """ examples_op = tf.contrib.learn.read_batch_examples( file_name, batch_size=batch_size, reader=tf.TextLineReader, num_threads=5, num_epochs=1, randomize_input=False, parse_fn=lambda x: tf.decode_csv(x, [tf.constant([''], dtype=tf.string)] * len(COLUMNS),field_delim=",")) examples_dict = {} for i, header in enumerate(COLUMNS): examples_dict[header] = examples_op[:,i] feature_cols = {k: tf.string_to_number(examples_dict[k], out_type=tf.float32) for k in CONTINUOUS_COLUMNS} feature_cols.update({k: dense_to_sparse(examples_dict[k]) for k in CATEGORICAL_COLUMNS}) label = tf.string_to_number(examples_dict[LABEL_COLUMN], out_type=tf.int32) return feature_cols, label
def smiles_labels_batch_queue(eval_params): fname_queue = tf.train.string_input_producer( [eval_params['substances_fname']], num_epochs=None, shuffle=True, name="substances_fname_queue") reader = tf.TextLineReader( skip_header_lines=1, name="substance_file_reader") _, record = reader.read(queue=fname_queue) substance_id, smiles, label = tf.decode_csv( records=record, record_defaults=[[""], [""], [1.0]], field_delim=eval_params['substances_field_delim']) smiles_batch, labels_batch = tf.train.shuffle_batch( tensors = [smiles, label], batch_size = eval_params['batch_size'], capacity = eval_params['queue_capacity'], min_after_dequeue = eval_params['queue_min_after_dequeue'], num_threads = eval_params['queue_num_threads'], seed = eval_params['queue_seed']) return smiles_batch, labels_batch
def smiles_triple_batch_queue(eval_params): fname_queue = tf.train.string_input_producer( [eval_params['substances_fname']], num_epochs=None, shuffle=True, name="substances_fname_queue") reader = tf.TextLineReader( skip_header_lines=1, name="substance_file_reader") _, record = reader.read(queue=fname_queue) # entries = [ # target_id, # substance_id, smiles, # substance_plus_id, smiles_plus # substance_minus_id, smiles_minus] entries = tf.decode_csv( records=record, record_defaults=[[""], [""], [""], [""], [""], [""], [""]], field_delim=eval_params['substances_field_delim'])
def read_pascifar(pascifar_path, queue): """ Reads and parses files from the queue. Args: pascifar_path: a constant string tensor representing the path of the PASCIFAR dataset queue: A queue of strings in the format: file, label Returns: image_path: a tf.string tensor. The absolute path of the image in the dataset label: a int64 tensor with the label """ # Reader for text lines reader = tf.TextLineReader(skip_header_lines=1) # read a record from the queue _, row = reader.read(queue) # file,width,height,label record_defaults = [[""], [0]] image_path, label = tf.decode_csv(row, record_defaults, field_delim=",") image_path = pascifar_path + tf.constant("/") + image_path label = tf.cast(label, tf.int64) return image_path, label
def make_data_provider(self, **kwargs): decoder_source = split_tokens_decoder.SplitTokensDecoder( tokens_feature_name="source_tokens", length_feature_name="source_len", append_token="SEQUENCE_END", delimiter=self.params["source_delimiter"]) dataset_source = tf.contrib.slim.dataset.Dataset( data_sources=self.params["source_files"], reader=tf.TextLineReader, decoder=decoder_source, num_samples=None, items_to_descriptions={}) dataset_target = None if len(self.params["target_files"]) > 0: decoder_target = split_tokens_decoder.SplitTokensDecoder( tokens_feature_name="target_tokens", length_feature_name="target_len", prepend_token="SEQUENCE_START", append_token="SEQUENCE_END", delimiter=self.params["target_delimiter"]) dataset_target = tf.contrib.slim.dataset.Dataset( data_sources=self.params["target_files"], reader=tf.TextLineReader, decoder=decoder_target, num_samples=None, items_to_descriptions={}) return parallel_data_provider.ParallelDataProvider( dataset1=dataset_source, dataset2=dataset_target, shuffle=self.params["shuffle"], num_epochs=self.params["num_epochs"], **kwargs)
def read_bbbc006(all_files_queue): """Reads and parses examples from BBBC006 data files. Recommendation: if you want N-way read parallelism, call this function N times. This will give you N independent Readers reading different files & positions within those files, which will give better mixing of examples. Args: filename_queue: A queue of strings with the filenames to read from. Returns: An object representing a single example, with the following fields: label: a [height, width, 2] uint8 Tensor with contours tensor in depth 0 and segments tensor in depth 1. uint8image: a [height, width, depth] uint8 Tensor with the image data """ class BBBC006Record(object): pass result = BBBC006Record() # Read a record, getting filenames from the filename_queue. text_reader = tf.TextLineReader() _, csv_content = text_reader.read(all_files_queue) i_path, c_path, s_path = tf.decode_csv(csv_content, record_defaults=[[""], [""], [""]]) result.uint8image = read_from_queue(tf.read_file(i_path)) contour = read_from_queue(tf.read_file(c_path)) segment = read_from_queue(tf.read_file(s_path)) result.label = tf.concat([contour, segment], 2) return result
def data_loader(csv_filename: str, params: Params, batch_size: int=128, data_augmentation: bool=False, num_epochs: int=None, image_summaries: bool=False): def input_fn(): # Choose case one csv file or list of csv files if not isinstance(csv_filename, list): filename_queue = tf.train.string_input_producer([csv_filename], num_epochs=num_epochs, name='filename_queue') elif isinstance(csv_filename, list): filename_queue = tf.train.string_input_producer(csv_filename, num_epochs=num_epochs, name='filename_queue') # Skip lines that have already been processed reader = tf.TextLineReader(name='CSV_Reader', skip_header_lines=0) key, value = reader.read(filename_queue, name='file_reading_op') default_line = [['None'], ['None']] path, label = tf.decode_csv(value, record_defaults=default_line, field_delim=params.csv_delimiter, name='csv_reading_op') image, img_width = image_reading(path, resized_size=params.input_shape, data_augmentation=data_augmentation, padding=True) to_batch = {'images': image, 'images_widths': img_width, 'filenames': path, 'labels': label} prepared_batch = tf.train.shuffle_batch(to_batch, batch_size=batch_size, min_after_dequeue=500, num_threads=15, capacity=4000, allow_smaller_final_batch=False, name='prepared_batch_queue') if image_summaries: tf.summary.image('input/image', prepared_batch.get('images'), max_outputs=1) tf.summary.text('input/labels', prepared_batch.get('labels')[:10]) tf.summary.text('input/widths', tf.as_string(prepared_batch.get('images_widths'))) return prepared_batch, prepared_batch.get('labels') return input_fn
def __init__(self, config, batch_size, one_hot=False): self.lookup = None reader = tf.TextLineReader() filename_queue = tf.train.string_input_producer(["chargan.txt"]) key, x = reader.read(filename_queue) vocabulary = self.get_vocabulary() table = tf.contrib.lookup.string_to_index_table_from_tensor( mapping = vocabulary, default_value = 0) x = tf.string_join([x, tf.constant(" " * 64)]) x = tf.substr(x, [0], [64]) x = tf.string_split(x,delimiter='') x = tf.sparse_tensor_to_dense(x, default_value=' ') x = tf.reshape(x, [64]) x = table.lookup(x) self.one_hot = one_hot if one_hot: x = tf.one_hot(x, len(vocabulary)) x = tf.cast(x, dtype=tf.float32) x = tf.reshape(x, [1, int(x.get_shape()[0]), int(x.get_shape()[1]), 1]) else: x = tf.cast(x, dtype=tf.float32) x -= len(vocabulary)/2.0 x /= len(vocabulary)/2.0 x = tf.reshape(x, [1,1, 64, 1]) num_preprocess_threads = 8 x = tf.train.shuffle_batch( [x], batch_size=batch_size, num_threads=num_preprocess_threads, capacity= 5000, min_after_dequeue=500, enqueue_many=True) self.x = x self.table = table
def read_my_file_format(self, filename_queue): reader = tf.TextLineReader() key, record_string = reader.read(filename_queue) # "a" means representative value to indicate type for csv cell value. image_file_name, depth_file_name = tf.decode_csv(record_string, [["a"], ["a"]]) image_png_data = tf.read_file(image_file_name) depth_png_data = tf.read_file(depth_file_name) # channels=1 means image is read as gray-scale image_decoded = tf.image.decode_png(image_png_data, channels=1) image_decoded.set_shape([512, 512, 1]) depth_decoded = tf.image.decode_png(depth_png_data, channels=1) depth_decoded.set_shape([512, 512, 1]) return image_decoded, depth_decoded
def make_data_provider(self, **kwargs): decoder_source = split_tokens_decoder.SplitTokensDecoder( tokens_feature_name="source_tokens", length_feature_name="source_len", append_token="SEQUENCE_END", delimiter=self.params["source_delimiter"]) dataset_source = tf.contrib.slim.dataset.Dataset( data_sources=self.params["source_files"], reader=tf.TextLineReader, decoder=decoder_source, num_samples=None, items_to_descriptions={}) dataset_target = None if len(self.params["target_files"]) > 0: decoder_target = split_tokens_decoder.SplitTokensDecoder( tokens_feature_name="target_tokens", length_feature_name="target_len", prepend_token="SEQUENCE_END", append_token="SEQUENCE_END", delimiter=self.params["target_delimiter"]) dataset_target = tf.contrib.slim.dataset.Dataset( data_sources=self.params["target_files"], reader=tf.TextLineReader, decoder=decoder_target, num_samples=None, items_to_descriptions={}) return parallel_data_provider.ParallelDataProvider( dataset1=dataset_source, dataset2=dataset_target, shuffle=self.params["shuffle"], num_epochs=self.params["num_epochs"], **kwargs)
def batch_generator(filenames): """ filenames is the list of files you want to read from. In this case, it contains only heart.csv """ filename_queue = tf.train.string_input_producer(filenames) reader = tf.TextLineReader(skip_header_lines=1) # skip the first line in the file _, value = reader.read(filename_queue) record_defaults = [[''] for _ in range(N_FEATURES)] # read in the 10 rows of data content = tf.decode_csv(value, record_defaults = record_defaults,field_delim = '\t') # pack all 9 features into a tensor features = tf.stack(content[:N_FEATURES - 1]) # assign the last column to label label = content[-1] # minimum number elements in the queue after a dequeue, used to ensure # that the samples are sufficiently mixed # I think 10 times the BATCH_SIZE is sufficient min_after_dequeue = 10 * BATCH_SIZE # the maximum number of elements in the queue capacity = 20 * BATCH_SIZE # shuffle the data to generate BATCH_SIZE sample pairs data_batch, label_batch = tf.train.batch([features, label], batch_size=BATCH_SIZE, capacity=capacity, min_after_dequeue = min_after_dequeue, allow_smaller_final_batch=True) return data_batch, label_batch # return features,label
def inputs_without_crop(lists, image_shape, batch_size): filename_queue = tf.train.string_input_producer(lists, shuffle=True) reader = tf.TextLineReader() _, value = reader.read(filename_queue) image, label = read_my_file_format(value) image = tf.image.resize_images(image, [image_shape[0], image_shape[1]]) # image = tf.random_crop(image, image_shape) label = tf.cast(label, tf.float32) image.set_shape(image_shape) # image = tf.image.random_flip_left_right(image) float_image = tf.image.per_image_whitening(image) min_after_dequeue = 1000 capacity = min_after_dequeue+(2+1)*batch_size # image_batch, label_batch = tf.train.shuffle_batch([float_image, label], # batch_size=batch_size, # capacity=capacity, # min_after_dequeue=min_after_dequeue) image_batch, label_batch = tf.train.batch([float_image, label], batch_size=batch_size, capacity=128) return image_batch, label_batch
def read_audio_csv(filename_queue): reader = tf.TextLineReader() key, value = reader.read(filename_queue) defaultVal = [[0.] for idx in range(WIDE*FEATURE_DIM + OUT_DIM)] fileData = tf.decode_csv(value, record_defaults=defaultVal) features = fileData[:WIDE*FEATURE_DIM] features = tf.reshape(features, [WIDE, FEATURE_DIM]) labels = fileData[WIDE*FEATURE_DIM:] return features, labels
def test_keyed_read_text_lines(self): gfile.Glob = self._orig_glob filename = self._create_temp_file("ABC\nDEF\nGHK\n") batch_size = 1 queue_capacity = 5 name = "my_batch" with tf.Graph().as_default() as g, self.test_session(graph=g) as session: keys, inputs = tf.contrib.learn.io.read_keyed_batch_examples( filename, batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, name=name) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() tf.train.start_queue_runners(session, coord=coord) self.assertAllEqual(session.run([keys, inputs]), [[filename.encode("utf-8") + b":1"], [b"ABC"]]) self.assertAllEqual(session.run([keys, inputs]), [[filename.encode("utf-8") + b":2"], [b"DEF"]]) self.assertAllEqual(session.run([keys, inputs]), [[filename.encode("utf-8") + b":3"], [b"GHK"]]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop()
def test_keyed_parse_json(self): gfile.Glob = self._orig_glob filename = self._create_temp_file( '{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}\n' '{"features": {"feature": {"age": {"int64_list": {"value": [1]}}}}}\n' '{"features": {"feature": {"age": {"int64_list": {"value": [2]}}}}}\n' ) batch_size = 1 queue_capacity = 5 name = "my_batch" with tf.Graph().as_default() as g, self.test_session(graph=g) as session: dtypes = {"age": tf.FixedLenFeature([1], tf.int64)} parse_fn = lambda example: tf.parse_single_example( # pylint: disable=g-long-lambda tf.decode_json_example(example), dtypes) keys, inputs = tf.contrib.learn.io.read_keyed_batch_examples( filename, batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, parse_fn=parse_fn, name=name) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() tf.train.start_queue_runners(session, coord=coord) key, age = session.run([keys, inputs["age"]]) self.assertAllEqual(age, [[0]]) self.assertAllEqual(key, [filename.encode("utf-8") + b":1"]) key, age = session.run([keys, inputs["age"]]) self.assertAllEqual(age, [[1]]) self.assertAllEqual(key, [filename.encode("utf-8") + b":2"]) key, age = session.run([keys, inputs["age"]]) self.assertAllEqual(age, [[2]]) self.assertAllEqual(key, [filename.encode("utf-8") + b":3"]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop()
def test_read_keyed_batch_features_mutual_exclusive_args(self): filename = self._create_temp_file("abcde") features = {"sequence": tf.FixedLenFeature([], tf.string)} with self.assertRaisesRegexp(ValueError, "can not both be set"): _, _ = tf.contrib.learn.read_keyed_batch_features( filename, 1, features, tf.TextLineReader, randomize_input=False, num_queue_runners=2, num_enqueue_threads=2)
def test_read_text_lines_multifile(self): gfile.Glob = self._orig_glob filenames = self._create_sorted_temp_files(["ABC\n", "DEF\nGHK\n"]) batch_size = 1 queue_capacity = 5 name = "my_batch" with tf.Graph().as_default() as g, self.test_session(graph=g) as session: inputs = tf.contrib.learn.io.read_batch_examples( filenames, batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, name=name) self.assertAllEqual((None,), inputs.get_shape().as_list()) session.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(session, coord=coord) self.assertEqual("%s:1" % name, inputs.name) file_name_queue_name = "%s/file_name_queue" % name file_names_name = "%s/input" % file_name_queue_name example_queue_name = "%s/fifo_queue" % name test_util.assert_ops_in_graph({ file_names_name: "Const", file_name_queue_name: "FIFOQueue", "%s/read/TextLineReader" % name: "TextLineReader", example_queue_name: "FIFOQueue", name: "QueueDequeueUpTo" }, g) self.assertAllEqual(session.run(inputs), [b"ABC"]) self.assertAllEqual(session.run(inputs), [b"DEF"]) self.assertAllEqual(session.run(inputs), [b"GHK"]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop() coord.join(threads)
def test_batch_text_lines(self): gfile.Glob = self._orig_glob filename = self._create_temp_file("A\nB\nC\nD\nE\n") batch_size = 3 queue_capacity = 10 name = "my_batch" with tf.Graph().as_default() as g, self.test_session(graph=g) as session: inputs = tf.contrib.learn.io.read_batch_examples( [filename], batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, read_batch_size=10, name=name) self.assertAllEqual((None,), inputs.get_shape().as_list()) session.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(session, coord=coord) self.assertAllEqual(session.run(inputs), [b"A", b"B", b"C"]) self.assertAllEqual(session.run(inputs), [b"D", b"E"]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop() coord.join(threads)
def read_single_line_example(filename): filename_queue = tf.train.string_input_producer([filename], num_epochs=1) reader = tf.TextLineReader() line, value = reader.read(filename_queue) return line, value
def _load_samples(csv_name, image_type): filename_queue = tf.train.string_input_producer( [csv_name]) reader = tf.TextLineReader() _, csv_filename = reader.read(filename_queue) record_defaults = [tf.constant([], dtype=tf.string), tf.constant([], dtype=tf.string)] filename_i, filename_j = tf.decode_csv( csv_filename, record_defaults=record_defaults) file_contents_i = tf.read_file(filename_i) file_contents_j = tf.read_file(filename_j) if image_type == '.jpg': image_decoded_A = tf.image.decode_jpeg( file_contents_i, channels=model.IMG_CHANNELS) image_decoded_B = tf.image.decode_jpeg( file_contents_j, channels=model.IMG_CHANNELS) elif image_type == '.png': image_decoded_A = tf.image.decode_png( file_contents_i, channels=model.IMG_CHANNELS, dtype=tf.uint8) image_decoded_B = tf.image.decode_png( file_contents_j, channels=model.IMG_CHANNELS, dtype=tf.uint8) return image_decoded_A, image_decoded_B
def test_inputs(self, csv, batch_size, verbose=False): print("input csv file path: %s, batch size: %d" % (csv, batch_size)) filename_queue = tf.train.string_input_producer([csv], shuffle=False) reader = tf.TextLineReader() _, serialized_example = reader.read(filename_queue) filename, label = tf.decode_csv(serialized_example, [["path"], [0]]) label = tf.cast(label, tf.int32) jpg = tf.read_file(filename) image = tf.image.decode_jpeg(jpg, channels=3) image = tf.cast(image, tf.float32) if verbose: print "original image shape:" print image.get_shape() # resize to distort dist = tf.image.resize_images(image, (FLAGS.scale_h, FLAGS.scale_w)) # random crop dist = tf.image.resize_image_with_crop_or_pad(dist, FLAGS.input_h, FLAGS.input_w) min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(FLAGS.num_examples_per_epoch_for_train * min_fraction_of_examples_in_queue) print ( 'filling queue with %d train images before starting to train. This will take a few minutes.' % min_queue_examples) return self._generate_image_and_label_batch(dist, label, min_queue_examples, batch_size, shuffle=False)
def csv_inputs(self, csv, batch_size, distorted=False, verbose=False): print("input csv file path: %s, batch size: %d" % (csv, batch_size)) filename_queue = tf.train.string_input_producer([csv], shuffle=True) reader = tf.TextLineReader() _, serialized_example = reader.read(filename_queue) filename, label = tf.decode_csv(serialized_example, [["path"], [0]]) label = tf.cast(label, tf.int32) jpg = tf.read_file(filename) image = tf.image.decode_jpeg(jpg, channels=3) image = tf.cast(image, tf.float32) if verbose: print "original image shape:" print image.get_shape() if distorted: # resize to distort dist = tf.image.resize_images(image, (FLAGS.scale_h, FLAGS.scale_w)) # random crop dist = tf.image.resize_image_with_crop_or_pad(dist, FLAGS.input_h, FLAGS.input_w) # random flip dist = tf.image.random_flip_left_right(dist) # color constancy #dist = self.distort_color(dist) else: # resize to input dist = tf.image.resize_images(image, FLAGS.input_h, FLAGS.input_w) if verbose: print "dist image shape:" print dist.get_shape() min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(FLAGS.num_examples_per_epoch_for_train * min_fraction_of_examples_in_queue) print ('filling queue with %d train images before starting to train. This will take a few minutes.' % min_queue_examples) return self._generate_image_and_label_batch(dist, label, min_queue_examples, batch_size)
def train_image(dataset, batch_size=None): filename_queue = tf.train.string_input_producer([dataset.file_name()], shuffle=True) reader = tf.TextLineReader() _, serialized_example = reader.read(filename_queue) rgb_filename, depth_filename = tf.decode_csv(serialized_example, [["path"], ["meters"]]) # input rgb_png = tf.read_file(rgb_filename) image = tf.image.decode_png(rgb_png, channels=3) image = tf.cast(image, tf.float32) # target depth_png = tf.read_file(depth_filename) depth = tf.image.decode_png(depth_png, channels=1) depth = tf.cast(depth, tf.float32) depth = tf.div(depth, [255.0]) # depth = tf.cast(depth, tf.int64) # resize image = tf.image.resize_images(image, (IMAGE_HEIGHT, IMAGE_WIDTH)) depth = tf.image.resize_images(depth, (TARGET_HEIGHT, TARGET_WIDTH)) invalid_depth = tf.sign(depth) # generate batch images, depths, invalid_depths = tf.train.batch( [image, depth, invalid_depth], batch_size=self.batch_size, num_threads=4, capacity=50 + 3 * self.batch_size, ) return images, depths, invalid_depths
def train_batch_inputs(dataset_csv_file_path, batch_size): with tf.name_scope('batch_processing'): if (os.path.isfile(dataset_csv_file_path) != True): raise ValueError('No data files found for this dataset') filename_queue = tf.train.string_input_producer([dataset_csv_file_path], shuffle=True) reader = tf.TextLineReader() _, serialized_example = reader.read(filename_queue) filename, depth_filename = tf.decode_csv(serialized_example, [["path"], ["annotation"]]) # input png = tf.read_file(filename) image = tf.image.decode_png(png, channels=3) image = tf.cast(image, tf.float32) # target depth_png = tf.read_file(depth_filename) depth = tf.image.decode_png(depth_png, dtype=tf.uint16, channels=1) depth = tf.cast(depth, dtype=tf.int16) # resize image = tf.image.resize_images(image, (IMAGE_HEIGHT, IMAGE_WIDTH)) depth = tf.image.resize_images(depth, (TARGET_HEIGHT, TARGET_WIDTH)) invalid_depth = tf.sign(depth) # generate batch images, depths, invalid_depths = tf.train.batch( [image, depth, invalid_depth], batch_size = batch_size, num_threads = 4, capacity = 50 + 3 * batch_size ) return images, depths, invalid_depths
def eval_batch_inputs(dataset_csv_file_path, batch_size): with tf.name_scope('eval_batch_processing'): if (os.path.isfile(dataset_csv_file_path) != True): raise ValueError('No data files found for this dataset') filename_queue = tf.train.string_input_producer([dataset_csv_file_path], shuffle=True) reader = tf.TextLineReader() _, serialized_example = reader.read(filename_queue) filename, depth_filename = tf.decode_csv(serialized_example, [["path"], ["annotation"]]) # input png = tf.read_file(filename) image = tf.image.decode_png(png, channels=3) image = tf.cast(image, tf.float32) # target depth_png = tf.read_file(depth_filename) depth = tf.image.decode_png(depth_png, dtype=tf.uint16, channels=1) depth = tf.cast(depth, dtype=tf.int16) # resize image = tf.image.resize_images(image, (IMAGE_HEIGHT, IMAGE_WIDTH)) depth = tf.image.resize_images(depth, (TARGET_HEIGHT, TARGET_WIDTH)) invalid_depth = tf.sign(depth) # generate batch images, depths, invalid_depths = tf.train.batch( [image, depth, invalid_depth], batch_size = batch_size, num_threads = 4, capacity = 50 + 3 * batch_size ) return images, depths, invalid_depths
def csv_inputs(self, csv_file_path): filename_queue = tf.train.string_input_producer([csv_file_path], shuffle=True) reader = tf.TextLineReader() _, serialized_example = reader.read(filename_queue) filename, depth_filename, depthMeters_filename = tf.decode_csv(serialized_example, [["path"], ["annotation"], ["meters"]]) # input rgb_png = tf.read_file(filename) image = tf.image.decode_png(rgb_png, channels=3) image = tf.cast(image, tf.float32) # target depth_png = tf.read_file(depth_filename) depth = tf.image.decode_png(depth_png, channels=1) depth = tf.cast(depth, tf.float32) depth = tf.div(depth, [255.0]) #depth = tf.cast(depth, tf.int64) # resize image = tf.image.resize_images(image, (IMAGE_HEIGHT, IMAGE_WIDTH)) depth = tf.image.resize_images(depth, (TARGET_HEIGHT, TARGET_WIDTH)) invalid_depth = tf.sign(depth) # generate batch images, depths, invalid_depths = tf.train.batch( [image, depth, invalid_depth], batch_size=self.batch_size, num_threads=4, capacity= 50 + 3 * self.batch_size, ) return images, depths, invalid_depths
def csv_inputs_test(self, csv_file_path): filename_queue = tf.train.string_input_producer([csv_file_path], shuffle=False) reader = tf.TextLineReader() _, serialized_example = reader.read(filename_queue) filename, depth_filename, depthMeters_filename = tf.decode_csv(serialized_example, [["path"], ["annotation"], ["meters"]]) # input rgb_png = tf.read_file(filename) image = tf.image.decode_png(rgb_png, channels=3) image = tf.cast(image, tf.float32) # target depth_png = tf.read_file(depth_filename) depth = tf.image.decode_png(depth_png, channels=1) depth = tf.cast(depth, tf.float32) depth = tf.div(depth, [255.0]) # resize image = tf.image.resize_images(image, (IMAGE_HEIGHT, IMAGE_WIDTH)) depth = tf.image.resize_images(depth, (TARGET_HEIGHT, TARGET_WIDTH)) invalid_depth = tf.sign(depth) # generate batch images, depths, invalid_depths, filenames, depth_filenames = tf.train.batch( [image, depth, invalid_depth, filename, depth_filename], batch_size=self.batch_size, num_threads=4, capacity= 50 + 3 * self.batch_size, ) return images, depths, invalid_depths, filenames, depth_filenames