我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.uint8()。
def parse_example(serialized_example): features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features={ 'shape': tf.FixedLenFeature([], tf.string), 'img_raw': tf.FixedLenFeature([], tf.string), 'gt_raw': tf.FixedLenFeature([], tf.string), 'example_name': tf.FixedLenFeature([], tf.string) }) with tf.variable_scope('decoder'): shape = tf.decode_raw(features['shape'], tf.int32) image = tf.decode_raw(features['img_raw'], tf.float32) ground_truth = tf.decode_raw(features['gt_raw'], tf.uint8) example_name = features['example_name'] with tf.variable_scope('image'): # reshape and add 0 dimension (would be batch dimension) image = tf.expand_dims(tf.reshape(image, shape), 0) with tf.variable_scope('ground_truth'): # reshape ground_truth = tf.cast(tf.reshape(ground_truth, shape[:-1]), tf.float32) return image, ground_truth, example_name
def preprocess(self, image_buffer, bbox, batch_position): """Preprocessing image_buffer as a function of its batch position.""" if self.train: image = train_image(image_buffer, self.height, self.width, bbox, batch_position, self.resize_method, self.distortions, None, summary_verbosity=self.summary_verbosity, distort_color_in_yiq=self.distort_color_in_yiq, fuse_decode_and_crop=self.fuse_decode_and_crop) else: image = tf.image.decode_jpeg( image_buffer, channels=3, dct_method='INTEGER_FAST') image = eval_image(image, self.height, self.width, batch_position, self.resize_method, summary_verbosity=self.summary_verbosity) # Note: image is now float32 [height,width,3] with range [0, 255] # image = tf.cast(image, tf.uint8) # HACK TESTING return image
def make_png_thumbnail(x, n): ''' Input: `x`: Tensor, value range=[-1, 1), shape=[n*n, h, w, c] `n`: sqrt of the number of images Return: `tf.string` (bytes) of the PNG. (write these binary directly into a file) ''' with tf.name_scope('MakeThumbnail'): _, h, w, c = x.get_shape().as_list() x = tf.reshape(x, [n, n, h, w, c]) x = tf.transpose(x, [0, 2, 1, 3, 4]) x = tf.reshape(x, [n * h, n * w, c]) x = x / 2. + .5 x = tf.image.convert_image_dtype(x, tf.uint8, saturate=True) x = tf.image.encode_png(x) return x
def make_png_jet_thumbnail(x, n): ''' Input: `x`: Tensor, value range=[-1, 1), shape=[n*n, h, w, c] `n`: sqrt of the number of images Return: `tf.string` (bytes) of the PNG. (write these binary directly into a file) ''' with tf.name_scope('MakeThumbnail'): _, h, w, c = x.get_shape().as_list() x = tf.reshape(x, [n, n, h, w, c]) x = tf.transpose(x, [0, 2, 1, 3, 4]) x = tf.reshape(x, [n * h, n * w, c]) x = x / 2. + .5 x = gray2jet(x) x = tf.image.convert_image_dtype(x, tf.uint8, saturate=True) x = tf.image.encode_png(x) return x
def get_label_queue(self,batch_size): tf_labels = tf.convert_to_tensor(self.attr.values, dtype=tf.uint8)#0,1 with tf.name_scope('label_queue'): uint_label=tf.train.slice_input_producer([tf_labels])[0] label=tf.to_float(uint_label) #All labels, not just those in causal_model dict_data={sl:tl for sl,tl in zip(self.label_names,tf.split(label,len(self.label_names)))} num_preprocess_threads = max(self.num_worker-3,1) data_batch = tf.train.shuffle_batch( dict_data, batch_size=batch_size, num_threads=num_preprocess_threads, capacity=self.min_queue_examples + 3 * batch_size, min_after_dequeue=self.min_queue_examples, ) return data_batch
def write_tfrecord(self, img_list, label_list, record_path): # write a single tfrecord if os.path.exists(record_path): print ("%s exists!"%record_path) return self._check_list() print ("write %s"%record_path) self._write_info() writer = tf.python_io.TFRecordWriter(record_path) c = 0 for imgname,label in zip(img_list,label_list): img = Image.open(imgname).resize((self.flags.width, self.flags.height)) data = np.array(img).astype(np.uint8) img,data = self._check_color(img,data) example = self._get_example(data,label) writer.write(example.SerializeToString()) c+=1 if c%LOG_EVERY == 0: print ("%d images written to tfrecord"%c) writer.close() print("writing %s done"%record_path)
def get_shapes_and_dtypes(data): shapes = {} dtypes = {} for k in data.keys(): if isinstance(data[k][0], str): shapes[k] = [] dtypes[k] = tf.string elif isinstance(data[k][0], np.ndarray): shapes[k] = data[k][0].shape dtypes[k] = tf.uint8 elif isinstance(data[k][0], np.bool_): shapes[k] = [] dtypes[k] = tf.string else: raise TypeError('Unknown data type', type(data[k][0])) return shapes, dtypes
def __init__(self): # Create a single Session to run all image coding calls. self._sess = tf.Session() # Initializes function that decodes video self._video_path = tf.placeholder(dtype=tf.string) self._decode_video = decode_video(self._video_path) # Initialize function that resizes a frame self._resize_video_data = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]) # Initialize function to JPEG-encode a frame self._raw_frame = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) self._raw_mask = tf.placeholder(dtype=tf.uint8, shape=[None, None, 1]) self._encode_frame = tf.image.encode_jpeg(self._raw_frame, quality=100) self._encode_mask = tf.image.encode_png(self._raw_mask)
def extract_images(filename): """Extract the images into a 4D uint8 numpy array [index, y, x, depth].""" print('Extracting', filename) with tf.gfile.Open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream: magic = _read32(bytestream) if magic != 2051: raise ValueError( 'Invalid magic number %d in MNIST image file: %s' % (magic, filename)) num_images = _read32(bytestream) rows = _read32(bytestream) cols = _read32(bytestream) buf = bytestream.read(rows * cols * num_images) data = numpy.frombuffer(buf, dtype=numpy.uint8) data = data.reshape(num_images, rows, cols, 1) return data
def parse_mnist_tfrec(tfrecord, features_shape): tfrecord_features = tf.parse_single_example( tfrecord, features={ 'features': tf.FixedLenFeature([], tf.string), 'targets': tf.FixedLenFeature([], tf.string) } ) features = tf.decode_raw(tfrecord_features['features'], tf.uint8) features = tf.reshape(features, features_shape) features = tf.cast(features, tf.float32) targets = tf.decode_raw(tfrecord_features['targets'], tf.uint8) targets = tf.reshape(targets, []) targets = tf.one_hot(indices=targets, depth=10, on_value=1, off_value=0) targets = tf.cast(targets, tf.float32) return features, targets
def parse_mnist_tfrec(tfrecord, name, features_shape, scalar_targs=False): tfrecord_features = tf.parse_single_example( tfrecord, features={ 'features': tf.FixedLenFeature([], tf.string), 'targets': tf.FixedLenFeature([], tf.string) }, name=name+'_data' ) with tf.variable_scope('features'): features = tf.decode_raw( tfrecord_features['features'], tf.uint8 ) features = tf.reshape(features, features_shape) features = tf.cast(features, tf.float32) with tf.variable_scope('targets'): targets = tf.decode_raw(tfrecord_features['targets'], tf.uint8) if scalar_targs: targets = tf.reshape(targets, []) targets = tf.one_hot( indices=targets, depth=10, on_value=1, off_value=0 ) targets = tf.cast(targets, tf.float32) return features, targets
def read_and_decode(filename_queue, batch_size): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) feature = features() feature = tf.parse_single_example( serialized_example, features = feature, ) hr_image = tf.decode_raw(feature['hr_image'], tf.uint8) height = tf.cast(feature['height'], tf.int32) width = tf.cast(feature['width'], tf.int32) print(height) image_shape = tf.stack([128, 128,3 ]) hr_image = tf.reshape(hr_image, image_shape) hr_image = tf.image.random_flip_left_right(hr_image) hr_image = tf.image.random_contrast(hr_image, 0.5, 1.3) hr_images = tf.train.shuffle_batch([hr_image], batch_size = batch_size, capacity = 30, num_threads = 2, min_after_dequeue = 10) return hr_images
def _convert_string_dtype(dtype): if dtype == 'float16': return tf.float16 if dtype == 'float32': return tf.float32 elif dtype == 'float64': return tf.float64 elif dtype == 'int16': return tf.int16 elif dtype == 'int32': return tf.int32 elif dtype == 'int64': return tf.int64 elif dtype == 'uint8': return tf.int8 elif dtype == 'uint16': return tf.uint16 else: raise ValueError('Unsupported dtype:', dtype)
def process_state(self, state): """ Processing of state State placeholders are tf.uint8 for fast transfer to GPU Need to cast it to float32 for the rest of the tf graph. Args: state: node of tf graph of shape = (batch_size, height, width, nchannels) of type tf.uint8. if , values are between 0 and 255 -> 0 and 1 """ state = tf.cast(state, tf.float32) state /= self.config.high return state
def save(tensor, name="noise.png"): """ Save an image Tensor to a file. :param Tensor tensor: Image tensor :param str name: Filename, ending with .png or .jpg :return: None """ tensor = tf.image.convert_image_dtype(tensor, tf.uint8, saturate=True) if name.endswith(".png"): data = tf.image.encode_png(tensor).eval() elif name.endswith(".jpg"): data = tf.image.encode_jpeg(tensor).eval() else: raise ValueError("Filename should end with .png or .jpg") with open(name, "wb") as fh: fh.write(data)
def jpeg_decimate(tensor, shape, iterations=25): """ JPEG decimation with conv2d feedback loop :param Tensor tensor: :return: Tensor """ jpegged = tensor for i in range(iterations): jpegged = tf.image.convert_image_dtype(jpegged, tf.uint8) data = tf.image.encode_jpeg(jpegged, quality=random.randint(5, 50), x_density=random.randint(50, 500), y_density=random.randint(50, 500)) jpegged = tf.image.decode_jpeg(data) jpegged = tf.image.convert_image_dtype(jpegged, tf.float32, saturate=True) return jpegged
def _maybe_download_and_extract(self): """Download and extract the MNIST dataset""" data_sets = mnist.read_data_sets( self._data_dir, dtype=tf.uint8, reshape=False, validation_size=self._num_examples_per_epoch_for_eval) # Convert to Examples and write the result to TFRecords. if not tf.gfile.Exists(os.path.join(self._data_dir, 'train.tfrecords')): convert_to_tfrecords(data_sets.train, 'train', self._data_dir) if not tf.gfile.Exists( os.path.join(self._data_dir, 'validation.tfrecords')): convert_to_tfrecords(data_sets.validation, 'validation', self._data_dir) if not tf.gfile.Exists(os.path.join(self._data_dir, 'test.tfrecords')): convert_to_tfrecords(data_sets.test, 'test', self._data_dir)
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'label_raw': tf.FixedLenFeature([], tf.string), }) image = tf.decode_raw(features['image_raw'], tf.int16) image.set_shape([IMAGE_HEIGHT * IMAGE_WIDTH]) image = tf.cast(image, tf.float32) * (1. / 255) - 0.5 reshape_image = tf.reshape(image, [IMAGE_HEIGHT, IMAGE_WIDTH, 1]) label = tf.decode_raw(features['label_raw'], tf.uint8) label.set_shape([CHARS_NUM * CLASSES_NUM]) reshape_label = tf.reshape(label, [CHARS_NUM, CLASSES_NUM]) return tf.cast(reshape_image, tf.float32), tf.cast(reshape_label, tf.float32)
def read_and_decode_embedding(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features={ 'label': tf.FixedLenFeature( [], tf.int64), 'sequence_raw': tf.FixedLenFeature( [], tf.string), }) sequence = features['sequence_raw'] # preprocess s_decode = tf.decode_raw(sequence, tf.int32) s_decode.set_shape([FLAGS.embed_length]) # Convert label from a scalar uint8 tensor to an int32 scalar. label = tf.cast(features['label'], tf.int32) return s_decode, label
def _extract_images(filename, num_images): """Extract the images into a numpy array. Args: filename: The path to an MNIST images file. num_images: The number of images in the file. Returns: A numpy array of shape [number_of_images, height, width, channels]. """ print('Extracting images from: ', filename) with gzip.open(filename) as bytestream: bytestream.read(16) buf = bytestream.read( _IMAGE_SIZE * _IMAGE_SIZE * num_images * _NUM_CHANNELS) data = np.frombuffer(buf, dtype=np.uint8) data = data.reshape(num_images, _IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS) return data
def _extract_labels(filename, num_labels): """Extract the labels into a vector of int64 label IDs. Args: filename: The path to an MNIST labels file. num_labels: The number of labels in the file. Returns: A numpy array of shape [number_of_labels] """ print('Extracting labels from: ', filename) with gzip.open(filename) as bytestream: bytestream.read(8) buf = bytestream.read(1 * num_labels) labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64) return labels
def ternary_encoder(input_data): """Encoding and compressing the signs """ a = tf.sign(input_data) # -1, 0, 1 a = tf.add(a,1) # shift -1,0,1 to 0,1,2 (2'b00,2'b01,2'b10) a = tf.reshape(a,[-1]) pad_size = 4 - tf.mod(tf.size(a), 4) pad = tf.range(0.0, pad_size) a = tf.concat([a, pad], 0) a_split1, a_split2, a_split3, a_split4 = tf.split(a,4) # assume the size is dividable by 4 # encode 4 grads into 1 Byte sum_1 = tf.add(a_split1, a_split2*4) sum_2 = tf.add(a_split3*16, a_split4*64) sum_all = tf.add(sum_1, sum_2) encoded = tf.cast(sum_all, tf.uint8) return encoded
def decode_raw(image_buffer, orig_height, orig_width, scope=None): """Decode a RAW string into one 3-D float image Tensor. Args: image_buffer: scalar string Tensor. [orig_height, orig_width]: the size of original image scope: Optional scope for op_scope. Returns: 3-D float Tensor with values ranging from [0, 1). """ with tf.op_scope([image_buffer], scope, 'decode_raw'): # Decode the string as an raw RGB. image = tf.decode_raw(image_buffer, tf.uint8) image = tf.reshape(image, tf.concat([orig_height,orig_width,[3]],0)) # After this point, all image pixels reside in [0,1) # The various adjust_* ops all require this range for dtype float. image = tf.image.convert_image_dtype(image, dtype=tf.float32) return image
def tf_random_aspect_resize(image, label, low_val=1.0, upper_val=1.5): shape = tf.shape(image) height = shape[0] width = shape[1] # 1~1.5 which_side = tf.to_float(tf.random_uniform([1]))[0] multi_val = tf.to_float(tf.random_uniform([1]))[0] * (upper_val - low_val) + low_val new_height = tf.cond(which_side > 0.5, lambda: tf.to_float(height), lambda: tf.to_float(height) * multi_val) new_width = tf.cond(which_side <= 0.5, lambda: tf.to_float(width), lambda: tf.to_float(width) * multi_val) new_height = tf.to_int32(new_height) new_width = tf.to_int32(new_width) image = tf.expand_dims(image, 0) label = tf.expand_dims(label, 0) resized_image = tf.image.resize_bilinear(image, [new_height, new_width], align_corners=False) resized_image = tf.cast(resized_image, tf.uint8) resized_label = tf.image.resize_nearest_neighbor(label, [new_height, new_width], align_corners=False) resized_label = tf.cast(resized_label, tf.uint8) resized_image = tf.squeeze(resized_image, 0) resized_label = tf.squeeze(resized_label, 0) return resized_image, resized_label
def tf_aspect_preserving_resize(image, label, smallest_side): smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) shape = tf.shape(image) height = shape[0] width = shape[1] new_height, new_width = _smallest_size_at_least(height, width, smallest_side) new_height = tf.maximum(new_height, smallest_side) new_width = tf.maximum(new_width, smallest_side) image = tf.expand_dims(image, 0) label = tf.expand_dims(label, 0) resized_image = tf.image.resize_bilinear(image, [new_height, new_width], align_corners=False) resized_image = tf.cast(resized_image, tf.uint8) resized_image = tf.squeeze(resized_image, 0) resized_label = tf.image.resize_nearest_neighbor(label, [new_height, new_width], align_corners=False) resized_label = tf.cast(resized_label, tf.uint8) resized_label = tf.squeeze(resized_label, 0) return resized_image, resized_label
def _read_pngs_from(path): """Reads directory of images. Args: path: path to the directory Returns: A list of all images in the directory in the TF format (You need to call sess.run() or .eval() to get the value). """ images = [] png_files_path = glob.glob(os.path.join(path, '*.[pP][nN][gG]')) for filename in png_files_path: im = Image.open(filename) im = np.asarray(im, np.uint8) # get only images name, not path image_name = filename.split('/')[-1].split('.')[0] images.append([int(image_name), im]) images = sorted(images, key=lambda image: image[0]) images_only = [np.asarray(image[1], np.uint8) for image in images] # Use unint8 or you will be !!! images_only = np.array(images_only) #print(images_only.shape) return images_only
def __init__(self): # Create a single Session to run all image coding calls. self._sess = tf.Session() # Initializes function that converts PNG to JPEG data. self._png_data = tf.placeholder(dtype=tf.string) self._decode_png = tf.image.decode_png(self._png_data, channels=3) # Initializes function that decodes RGB JPEG data. self._jpeg_data = tf.placeholder(dtype=tf.string) self._decode_jpeg = tf.image.decode_jpeg(self._jpeg_data, channels=3) # Initializes function that encode RGB JPEG/PNG data. self._image = tf.placeholder(dtype=tf.uint8) self._encoded_png = tf.image.encode_png(self._image) self._encoded_jpeg = tf.image.encode_jpeg(self._image)
def decode_from_tfrecords(filename,num_epoch=None): filename_queue=tf.train.string_input_producer([filename],num_epochs=num_epoch)#??????????????????????????????????????? reader=tf.TFRecordReader() _,serialized=reader.read(filename_queue) example=tf.parse_single_example(serialized,features={ 'height':tf.FixedLenFeature([],tf.int64), 'width':tf.FixedLenFeature([],tf.int64), 'nchannel':tf.FixedLenFeature([],tf.int64), 'image':tf.FixedLenFeature([],tf.string), 'label':tf.FixedLenFeature([],tf.int64) }) label=tf.cast(example['label'], tf.int32) image=tf.decode_raw(example['image'],tf.uint8) image=tf.reshape(image,tf.pack([ tf.cast(example['height'], tf.int32), tf.cast(example['width'], tf.int32), tf.cast(example['nchannel'], tf.int32)])) return image,label
def __init__(self): # Create a single Session to run all image coding calls. self._sess = tf.Session() # Initializes function that converts PNG to JPEG data. self._png_data = tf.placeholder(dtype=tf.string) image = tf.image.decode_png(self._png_data, channels=3) self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) # Initializes function that decodes RGB JPEG data. self._decode_jpeg_data = tf.placeholder(dtype=tf.string) self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) # Resize self._resize = tf.expand_dims(self._decode_jpeg, 0) self._resize = tf.image.resize_bilinear(self._resize, [FLAGS.new_height, FLAGS.new_width]) self._resize = tf.squeeze(self._resize) self._resize = tf.cast(self._resize, tf.uint8) self._new_jpeg = tf.image.encode_jpeg(self._resize, format='rgb', quality=FLAGS.jpeg_q, progressive=False, optimize_size=True, chroma_downsampling=True)
def preprocess(self, inputs): """Perform preprocess. Args: inputs: raw input to the model. Returns: preprocessed input data. """ preprocess_fn = self.get_preprocess_fn() assert inputs.ndim == 3 or inputs.ndim == 4, "invalid image format for preprocessing" if inputs.ndim == 3: inputs = np.expand_dims(inputs, axis=0) with tf.Graph().as_default() as cur_g: input_tensor = tf.convert_to_tensor(inputs, dtype=tf.uint8) all_inputs = tf.unstack(input_tensor) processed_inputs = [] for cur_input in all_inputs: new_input = preprocess_fn(cur_input, self.net_params.input_img_height, self.net_params.input_img_width) processed_inputs.append(new_input) new_inputs = tf.stack(processed_inputs) with tf.Session(graph=cur_g) as sess: processed_inputs = sess.run(new_inputs) return processed_inputs
def read_and_decode(filename, batch_size): # ??????????? filename_queue = tf.train.string_input_producer([filename]) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) # ???????? features = tf.parse_single_example( serialized_example, features={ 'label': tf.FixedLenFeature([], tf.int64), 'img_raw': tf.FixedLenFeature([], tf.string), } ) img = tf.decode_raw(features['img_raw'], tf.uint8) print('xxxx: ', img.get_shape()) img = tf.reshape(img, [512, 144, 3]) img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 label = tf.cast(features['label'], tf.int32) image_batch, label_batch = tf.train.batch([img, label], batch_size=batch_size, num_threads=64, capacity=2000) return image_batch, tf.reshape(label_batch, [batch_size])
def read_and_decode(filename, img_size=128, depth=1): if not filename.endswith('.tfrecords'): print "Invalid file \"{:s}\"".format(filename) return [], [] else: data_queue = tf.train.string_input_producer([filename]) reader = tf.TFRecordReader() _, serialized_example = reader.read(data_queue) features = tf.parse_single_example(serialized_example, features={ 'label' : tf.FixedLenFeature([], tf.int64), 'img_raw' : tf.FixedLenFeature([], tf.string), }) img = tf.decode_raw(features['img_raw'], tf.uint8) img = tf.reshape(img, [img_size, img_size, depth]) # Normalize the image img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 label = tf.cast(features['label'], tf.int32) label_onehot = tf.stack(tf.one_hot(label, n_classes)) return img, label_onehot #read_and_decode('test.tfrecords')
def read_and_decode(filename, img_size=128, depth=1): if not filename.endswith('.tfrecords'): print "Invalid file \"{:s}\"".format(filename) return [], [] else: data_queue = tf.train.string_input_producer([filename]) reader = tf.TFRecordReader() _, serialized_example = reader.read(data_queue) features = tf.parse_single_example(serialized_example, features={ 'label' : tf.FixedLenFeature([], tf.int64), 'img_raw' : tf.FixedLenFeature([], tf.string), }) img = tf.decode_raw(features['img_raw'], tf.uint8) img = tf.reshape(img, [img_size, img_size, depth]) # Normalize the image img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 label = tf.cast(features['label'], tf.int32) label_onehot = tf.stack(tf.one_hot(label, n_classes)) return img, label_onehot
def __init__(self): self.history = StateProcessorSetting.history_length self.dims = StateProcessorSetting.observation_dims pass #get current,prev frame, set by env with tf.variable_scope('input', reuse =True): self.cur_frame = tf.get_variable('cur_frame',dtype = tf.uint8) self.prev_frame = tf.get_variable('prev_frame',dtype = tf.uint8) with tf.variable_scope('input'): maxOf2 = tf.maximum(tf.to_float(self.cur_frame), tf.to_float(self.prev_frame)) toGray = tf.expand_dims(tf.image.rgb_to_grayscale(maxOf2), 0) resize = tf.image.resize_bilinear(toGray, self.dims, align_corners=None, name='observation') self.observe = tf.div(tf.squeeze(resize), 255.0) self.state = tf.get_variable(name = 'state', shape = [self.dims[0],self.dims[1],self.history], dtype = tf.float32,initializer = tf.constant_initializer(0.0),trainable = False) self.to_stack = tf.expand_dims(self.observe, 2) self.f3, self.f2, self.f1, _ = tf.split(2, self.history, self.state) # each is 84x84x1 self.concat = tf.concat(2, [self.to_stack, self.f3, self.f2, self.f1], name='concat') self.updateState = self.state.assign(self.concat)
def get_data(img_folder, label_folder, train_fraction, img_size, train_timesteps=4, test_timesteps=4, batch_size=1, sample_objects=False, n_threads=3, in_memory=False, which_seqs=None, truncated_threshold=2., occluded_threshold=3., depth_folder=None, storage_dtype=tf.uint8, mirror=False, reverse=False, bbox_scale=.5): kitti = KittiTrackingParser(img_folder, label_folder, presence=True, id=False, cls=False, truncated_threshold=truncated_threshold, occluded_threshold=occluded_threshold) train, test = split_sequence_dict(kitti.data_dict, train_fraction) def make_store(name, d, timesteps, n_threads, mirror=False, reverse=False): s = KittiStore(d, timesteps, img_size, batch_size, sample_objects=sample_objects, which_seqs=which_seqs, n_threads=n_threads, in_memory=in_memory, depth_folder=depth_folder, storage_dtype=storage_dtype, mirror=mirror, reverse=reverse, bbox_scale=bbox_scale, name=name) return s train_store = make_store('train', train, train_timesteps, n_threads, mirror, reverse) test_store = make_store('test', test, test_timesteps, (n_threads // 2) + 1) return train_store, train_store.get_minibatch(), test_store, test_store.get_minibatch()
def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs, random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True): images = ops.convert_to_tensor(image_list, dtype=tf.string) labels = ops.convert_to_tensor(label_list, dtype=tf.int32) # Makes an input queue input_queue = tf.train.slice_input_producer([images, labels], num_epochs=max_nrof_epochs, shuffle=shuffle) images_and_labels = [] for _ in range(nrof_preprocess_threads): image, label = read_images_from_disk(input_queue) if random_rotate: image = tf.py_func(random_rotate_image, [image], tf.uint8) if random_crop: image = tf.random_crop(image, [image_size, image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size) if random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((image_size, image_size, 3)) image = tf.image.per_image_standardization(image) images_and_labels.append([image, label]) image_batch, label_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size, capacity=4 * nrof_preprocess_threads * batch_size, allow_smaller_final_batch=True) return image_batch, label_batch
def to_rgb(img): w, h = img.shape ret = np.empty((w, h, 3), dtype=np.uint8) ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img return ret
def get_video_matrix(self, features, feature_size, max_frames, max_quantized_value, min_quantized_value): """Decodes features from an input string and quantizes it. Args: features: raw feature values feature_size: length of each frame feature vector max_frames: number of frames (rows) in the output feature_matrix max_quantized_value: the maximum of the quantized value. min_quantized_value: the minimum of the quantized value. Returns: feature_matrix: matrix of all frame-features num_frames: number of frames in the sequence """ decoded_features = tf.reshape( tf.cast(tf.decode_raw(features, tf.uint8), tf.float32), [-1, feature_size]) num_frames = tf.minimum(tf.shape(decoded_features)[0], max_frames) feature_matrix = utils.Dequantize(decoded_features, max_quantized_value, min_quantized_value) feature_matrix = resize_axis(feature_matrix, 0, max_frames) return feature_matrix, num_frames