How to read and display an image from a .rec file - python-3.x

I am using im2rec.py tool to first generate lst files and then to generate .rec and .idx files as following:
BASE_DIR = './'
IMAGES_DIR = os.path.join(BASE_DIR,'IMAGES')
DATASET_DIR = os.path.join(BASE_DIR,'Dataset')
TRAIN_RATIO = 0.8
TEST_DATA_RATIO = 0.1
Dataset_lst_file = os.path.join(DATASET_DIR,"dataset")
!python $BASE_DIR/tools/im2rec.py --list --recursive --test-ratio=$TEST_DATA_RATIO --train-ratio=$TRAIN_RATIO $Dataset_lst_file $IMAGES_DIR
!python $BASE_DIR/tools/im2rec.py --resize 224 --center-crop --num-thread 4 $Dataset_lst_file $IMAGES_DIR
I am successfully generating .lst, .rec and .idx files. However, my doubt is how can I read a specific image from the .rec file and plot it. For instance, to know if the images were recorded ok or just to explore my dataset.
------------Update----------
I was able to plot as following:
#https://mxnet.apache.org/versions/1.5.0/tutorials/basic/data.html
data_iter = mx.image.ImageIter(batch_size=4, data_shape=(3, 224, 224),
path_imgrec=Dataset_lst_file+'_train.rec',
path_imgidx=Dataset_lst_file+'_train.idx')
data_iter.reset()
for j in range(4):
batch = data_iter.next()
data = batch.data[0]
#print(batch)
label = batch.label[0].asnumpy()
for i in range(4):
ax = plt.subplot(1,4,i+1)
plt.imshow(data[i].asnumpy().astype(np.uint8).transpose((1,2,0)))
ax.set_title('class: ' + str(label[i]))
plt.axis('off')
plt.show()

This tutorial includes an example of image visualization from a .rec file: https://gluon-cv.mxnet.io/build/examples_detection/finetune_detection.html
dataset = gcv.data.RecordFileDetection('pikachu_train.rec')
classes = ['pikachu'] # only one foreground class here
image, label = dataset[0]
print('label:', label)
# display image and label
ax = viz.plot_bbox(image, bboxes=label[:, :4], labels=label[:, 4:5], class_names=classes)
plt.show()

For completeness to previous answer. This will display images to screen using plot_bbox or render images to a folder.
Usage :
dumpRecordFileDetection('./data/val.rec', False, True, classes, ctx)
def dumpRecordFileDetection(record_filename, display_ui, output_to_directory, classes, ctx):
"""Dump RecordFileDetection to screen or a directory"""
if isinstance(ctx, mx.Context):
ctx = [ctx]
dataset = gcv.data.RecordFileDetection(record_filename)
print('images:', len(dataset))
image, label = dataset[0]
bboxes=label[:, :4]
labels=label[:, 4:5]
print(image.shape, label.shape)
print('labeldata:', label)
print('bboxes:', bboxes)
print('labels:', labels)
image_dump_dir = os.path.join("./dump")
if not os.path.exists(image_dump_dir):
os.makedirs(image_dump_dir)
for i, batch in enumerate(dataset):
size = len(batch)
image, label = batch
print(image.shape, label.shape)
bboxes = label[:, :4]
labels = label[:, 4:5].astype(np.uint8)
if output_to_directory:
file_path = os.path.join("./dump", "{0}_.png".format(i))
# Format (c x H x W)
img = image.asnumpy().astype(np.uint8)
for box, lbl, in zip(bboxes, labels):
cv2.rectangle(img,(box[0], box[1]),(box[2], box[3]),(0, 0, 255), 2)
txt = "{0}".format(classes[lbl[0]])
cv2.putText(img,txt,(box[0], box[1]), cv2.FONT_HERSHEY_PLAIN,1,(0,255,0),1,cv2.LINE_AA, False)
cv2.imwrite(file_path, img)
if display_ui:
ax = viz.plot_bbox(image, bboxes=bboxes, labels=labels, class_names=classes)
plt.show()

Related

Why are albumentations Augmentations (Yolo / YoloV5) altering Bounding Boxes if no augmentations are being placed?

I was using the Albumentations library in order to perform some data augmentations on an object detection dataset that I intended to train a YoloV5 model on.
I have to perform the augmentations seperately and save the images locally to disk, but when I do I noticed that some of the output bounding boxes returned aren't generating properly.
I have my augmentations set up in a seperate aug.py file, shown below (augmentations purposefully removed in debugging attempts, see below) -
import albumentations as A
import cv2
PROB = 0.5
bbp = A.BboxParams(format="yolo")
horizontal_flip_transform = A.Compose([
], bbox_params = bbp)
vertical_flip_transform = A.Compose([
], bbp)
pixel_dropout_transform = A.Compose([
], bbox_params = bbp)
random_rotate = A.Compose([
], bbox_params = bbp )
#NOTE: THIS METHOD IMPLIES THAT THE IMAGE WIDTHS MUST BE AT LEAST 50 PIXELS
#Remove this aug to remove this constraint
random_crop = A.Compose([
], bbox_params = bbp)
augs = [horizontal_flip_transform, vertical_flip_transform, pixel_dropout_transform, random_rotate, random_crop]
def get_augmentations():
return augs
And the relevant parts of my implementation for performing the augmentations and saving them to disk is below:
def run_augments_on_image(img_name, bboxes, max_images_to_generate = 500):
ret = []
img = np.array(Image.open(img_name), dtype=np.uint8)
transforms = get_augmentations()
for i in range(min(len(transforms), max_images_to_generate)):
transformed = transforms[i](image=img, bboxes = bboxes)
ret.append((transformed["image"] , transformed["bboxes"]))
return ret
def run_and_save_augments_on_image_sets(batch_img_names, bboxes_urls, max_images_to_generate, dataset_dir, trainval):
num_images = 0
for i in range(len(batch_img_names)):
bboxes = []
with open(os.path.join(dataset_dir, trainval, 'labels', bboxes_urls[i]), 'r') as f:
for row in f:
x = row.strip().split(' ')
x.append(row[0])
x.pop(0)
x[0] = float(x[0])
x[1] = float(x[1])
x[2] = float(x[2])
x[3] = float(x[3])
bboxes.append(x)
trans = run_augments_on_image(os.path.join(dataset_dir, trainval, 'images', batch_img_names[i]), bboxes)
img_index = len(os.listdir(os.path.join(dataset_dir, 'train' , 'images'))) + len(os.listdir(os.path.join(dataset_dir, 'valid', 'images'))) + 1
for j in range(len(trans)):
img_trans, bboxes_trans = trans[j]
p = Image.fromarray(img_trans).save(os.path.join(dataset_dir, trainval, 'images' , f'image-{img_index}.{batch_img_names[j].split(".")[-1]}'))
with open(os.path.join(dataset_dir, trainval, 'labels', f'image-{img_index}.txt'), 'w') as f:
for boxs in bboxes_trans:
print(f'{boxs[-1]} {boxs[0]} {boxs[1]} {boxs[2]} {boxs[3]}', file=f)
num_images += 1
img_index += 1
if num_images >= max_images_to_generate:
break
if num_images >= max_images_to_generate:
break
For testing purposes (some of the bounding boxes were off), I removed all the actual augmentations, expecting the input image label (one augmented image example shown below) to be equal to augmented label since there were no augmentations. But, as you can see, the two labels are different.
img-original.txt
0 0.5662285714285714 0.2740066225165563 0.5297714285714286 0.4837913907284769
img-augmented.txt
0 0.51488 0.47173333333333334 0.6405099999999999 0.6527333333333334
(The labels above are in normalized xywh YOLO format)
Why is albumentations altering the labels? None of the augmentations in augs.py contain anything.

How to apply a function to convert the paths to arrays using cv2 in tensorflow data pipeline?

Any help will be highly appreciated
I'm trying to load two lists containing image paths and their corresponding labels. Something like this:
p0 = ['a','b',....] #paths to images .tif format
p1 = [1,2,3,......] #paths to images .tif format
labels = [0,1,1,...] #corresponding labels w.r.t both the lists
I used a tf.data in the following way:
def TFData(p_0, p_1, batch_size, labels=None, is_train=True):
dset = tf.data.Dataset.from_tensor_slices((p_0,p_1))
if labels is not None:
label = tf.data.Dataset.from_tensor_slices(labels)
AUTO = tf.data.experimental.AUTOTUNE
final_dset = tf.data.Dataset.zip((dset, label))
final_dset = final_dset.batch(batch_size, drop_remainder=is_train).prefetch(AUTO)
return final_dset
This returns:
<PrefetchDataset shapes: (((64,), (64,)), (64,)), types: ((tf.string, tf.string), tf.int32)>
My question is how to apply a function to convert the paths to arrays using cv2 as the images are .tif files? such that the result will be:
<PrefetchDataset shapes: (((64,256,256,3), (64,256,256,3)), (64,)), types: ((tf.float64, tf.float64), tf.int32)>
I'm using a dataset.map. However it's throwing error:
def to_array(p_0):
im_1 = cv2.imread(p_0,1)
#im = tfio.experimental.image.decode_tiff(paths)
im_1 = cv2.resize(im_1,(img_w,img_h)) #img_w=img_h=256
im_1 = np.asarray(im_1, dtype=np.float64)
im_1 /= 255
return im_1
def parse_fn(p_0):
[p_0,] = tf.py_function(to_array, [p_0], [tf.float64])
return p_0
def TFData(p_0, p_1, batch_size, labels=None, is_train=True):
dset_1 = tf.data.Dataset.from_tensor_slices(p_0)
dset_1 = dset_1.map(parse_fn)
dset_2 = tf.data.Dataset.from_tensor_slices(p_1)
dset_2 = dset_2.map(parse_fn)
if labels is not None:
label = tf.data.Dataset.from_tensor_slices(labels)
AUTO = tf.data.experimental.AUTOTUNE
final_dset = tf.data.Dataset.zip((dset_1, dset_2, label))
final_dset = final_dset.batch(batch_size, drop_remainder=is_train).prefetch(AUTO)
return final_dset
print(train_data) #where train_data is defined as TFData()
<PrefetchDataset shapes: ((<unknown>, <unknown>), (64,)), types: ((tf.float64, tf.float64), tf.int32)>
This throws an error:
for (t,p),l in train_data.as_numpy_iterator():
print(t)
print(p)
print(l)
print(type(t))
break
SystemError: <built-in function imread> returned NULL without setting an error
[[{{node EagerPyFunc}}]] [Op:IteratorGetNext]
Any help will be highly appreciated
I think your problem is in cv2.imread.
Have you checked outside the functions to see if it is reading and plotting the data accordingly?
Please, try with -1 instead:
im_1 = cv2.imread(p_0,-1)

How can I compare 2 sets of images with OpenCV

I am using OpenCV to compare 2 images.
After a couple of days, I was able to modify it to compare a image to a list of images.
How can I compare a list of images with another list?
Ex: we have 2 folders Images1 and Images2. Images1 = te1.jpg, te2.jpg, te3.jpg; Images2 = te1.jpg, te2.jpg, te3.jpg.
I want to compare te1.jpg from Images1 with te1.jpg from Images2, te2.jpg from Images1 with te2.jpg from Images2 and te3.jpg from Images1 with te3.jpg from Images2.
Can I add both folders and make it loop thru them in order to get the correspondent image in Images2 for every image in Images1?
He is my code until now:
import cv2
import numpy as np
import glob
original = cv2.imread("te.jpg")
#Load all the images
all_images_to_compare = []
titles = []
for f in glob.iglob("images2/*"):
image = cv2.imread(f)
titles.append(f)
all_images_to_compare.append(image)
for image_to_compare, title in zip(all_images_to_compare, titles):
# 1) Check if 2 images are equals
if original.shape == image_to_compare.shape:
print("The images have the same size and channels")
difference = cv2.subtract(original, image_to_compare)
b, g, r = cv2.split(difference)
#image1 = original.shape
#image2 = duplicate.shape
cv2.imshow("difference", difference)
#cv2.imshow("b", b)
#cv2.imshow("g", g)
#cv2.imshow("r", r)
#print(image1)
#print(image2)
print(cv2.countNonZero(b))
if cv2.countNonZero(b) == 0 and cv2.countNonZero(g) == 0 and cv2.countNonZero(r) ==0:
print("Similarity: 100% (equal size and channels)")
# 2) Check for similarities between the 2 images
sift = cv2.xfeatures2d.SIFT_create()
kp_1, desc_1 = sift.detectAndCompute(original, None)
kp_2, desc_2 = sift.detectAndCompute(image_to_compare, None)
#print("Keypoints 1ST Image: " + str(len(kp_1)))
#print("Keypoints 2ND Image: " + str(len(kp_2)))
index_params = dict(algorithm=0, trees=5)
search_params = dict()
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(desc_1, desc_2, k=2)
good_points = []
ratio = 0.9 # mai putin de 1
for m, n in matches:
if m.distance < ratio*n.distance:
good_points.append(m)
# Define how similar they are
number_keypoints = 0
if len(kp_1) <= len(kp_2):
number_keypoints = len(kp_1)
else:
number_keypoints = len(kp_2)
print("Keypoints 1ST Image: " + str(len(kp_1)))
print("Keypoints 2ND Image: " + str(len(kp_2)))
print("Title:" +title)
percentage_similarity = len(good_points) / number_keypoints * 100
print("Similarity: " + str(int(percentage_similarity)) + "%\n")
I think you just need a nested for loop?
So for the folders "Images1" and "Images2" - I would to it this way:
import os
import cv2
# load all image names into a list
ls_imgs1_names = os.listdir(Images1)
ls_imgs2_names = os.listdir(Images2)
# construct image paths and save in list
ls_imgs1_path = [os.path.join(Images1, img) for img in ls_imgs1_names]
ls_imgs2_path = [os.path.join(Images2, img) for img in ls_imgs2_names]
# list comprehensin to load imgs in lists
ls_imgs1 = [cv2.imread(img) for img in ls_imgs1_path]
ls_imgs2 = [cv2.imread(img) for img in ls_imgs2_path]
for original in ls_imgs1:
for image_to_compare in ls_imgs2:
# compare orignal to image_to_compare
# here just insert your code where you compare two images
Depending on your memory or rather the amount of images in your folder I would either load all imgs directly into a list as I did above or you load the imgs in the for loops, so that you loop over the ls_imgs1_path and ls_imgs2_path

How to print the detected classes after performing object detection on an image?

I am following the object_detection_tutorial.ipynb tutorial.
Here is the code ( I only put parts which are needed, the rest of the code is the same as the notebook):
my_results = [] # I added this, a list to hold the detected classes
PATH_TO_LABELS = 'D:\\TensorFlow\\models\\research\\object_detection\\data\\oid_v4_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
PATH_TO_TEST_IMAGES_DIR = pathlib.Path('C:\\Users\\Bhavin\\Desktop\\objects')
TEST_IMAGE_PATHS = sorted(list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpg")))
TEST_IMAGE_PATHS
model = load_model()
def run_inference_for_single_image(model, image):
image = np.asarray(image)
# The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
input_tensor = tf.convert_to_tensor(image)
# The model expects a batch of images, so add an axis with `tf.newaxis`.
input_tensor = input_tensor[tf.newaxis,...]
# Run inference
output_dict = model(input_tensor)
# All outputs are batches tensors.
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
num_detections = int(output_dict.pop('num_detections'))
output_dict = {key:value[0, :num_detections].numpy()
for key,value in output_dict.items()}
output_dict['num_detections'] = num_detections
# detection_classes should be ints.
output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
# Handle models with masks:
if 'detection_masks' in output_dict:
# Reframe the the bbox mask to the image size.
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
output_dict['detection_masks'], output_dict['detection_boxes'],
image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
tf.uint8)
output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
return output_dict
def show_inference(model, image_path):
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = np.array(Image.open(image_path))
# Actual detection.
output_dict = run_inference_for_single_image(model, image_np)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks_reframed', None),
use_normalized_coordinates=True,
line_thickness=8)
name = "Image" + str(i) + ".jpg"
img = Image.fromarray(image_np)
plt.imsave(name,image_np)
my_results.append(output_dict['detection_classes']) # I added this
print(my_results) # I added this
#img.show()
i = 1
for image_path in TEST_IMAGE_PATHS:
show_inference(model, image_path)
i += 1
I checked some related stack overflow questions and the answer had something to do with category index. But the code and examples used are very different from the tutorial I am following.
The line : my_results.append(output_dict['detection_classes'])
Gives me output: [array([55], dtype=int64)]
How do I extract the classes of the detected objects?
First import six
Add get_classes_name_and_scores method, before def show_inference(model, image_path):
get_classes_name_and_scores method returns {'name': 'person', 'score': '91%'}
def get_classes_name_and_scores(
boxes,
classes,
scores,
category_index,
max_boxes_to_draw=20,
min_score_thresh=.9): # returns bigger than 90% precision
display_str = {}
if not max_boxes_to_draw:
max_boxes_to_draw = boxes.shape[0]
for i in range(min(max_boxes_to_draw, boxes.shape[0])):
if scores is None or scores[i] > min_score_thresh:
if classes[i] in six.viewkeys(category_index):
display_str['name'] = category_index[classes[i]]['name']
display_str['score'] = '{}%'.format(int(100 * scores[i]))
return display_str
Then add after vis_util.visualize_boxes_and_labels_on_image_array
print(get_classes_name_and_scores(
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index))

Split dataset based on file names in pytorch Dataset

Is there a way to divide the dataset into training and testing based on the filenames. I have a folder containing two folders: input and output. Input folder has the images and output are the labels for that image. The file names in the input folder are something like input01_train.png and input01_test.png like shown below.
Dataset
/ \
Input Output
| |
input01_train.png output01_train.png
. .
. .
input01_test.png output01_test.png
The code I have only divides the dataset into inputs and labels not test and train.
class CancerDataset(Dataset):
def __init__(self, dataset_folder):#,label_folder):
self.dataset_folder = torchvision.datasets.ImageFolder(dataset_folder ,transform = transforms.Compose([transforms.Resize(512),transforms.ToTensor()]))
self.label_folder = torchvision.datasets.ImageFolder(dataset_folder ,transform = transforms.Compose([transforms.Resize(512),transforms.ToTensor()]))
def __getitem__(self,index):
img = self.dataset_folder[index]
label = self.label_folder[index]
return img,label
def __len__(self):
return len(self.dataset_folder)
trainset = CancerDataset(dataset_folder = '/content/drive/My Drive/cancer_data/')
trainsetloader = DataLoader(trainset,batch_size = 1, shuffle = True,num_workers = 0,pin_memory = True)
I would like to be able to divide the train and test set by their names if that is possible .
You could load the images yourself in __getitem__, selecting only those that contain '_train.png' or '_test.png'.
class CancerDataset(Dataset):
def __init__(self, datafolder, datatype='train', transform = transforms.Compose([transforms.Resize(512),transforms.ToTensor()]):
self.datafolder = datafolder
self.image_files_list = [s for s in os.listdir(datafolder) if
'_%s.png' % datatype in s]
# Same for the labels files
self.label_files_list = ...
self.transform = transform
def __len__(self):
return len(self.image_files_list)
def __getitem__(self, idx):
img_name = os.path.join(self.datafolder,
self.image_files_list[idx])
image = Image.open(img_name)
image = self.transform(image)
# Same for the labels files
label = .... # Load in etc
label = self.transform(label)
return image, label
Now you could make two datasets (trainset and testset).
trainset = CancerDataset(dataset_folder = '/content/drive/My Drive/cancer_data/', datatype='train')
testset = CancerDataset(dataset_folder = '/content/drive/My Drive/cancer_data/', datatype='test')

Resources