Split dataset based on file names in pytorch Dataset - python-3.x

Is there a way to divide the dataset into training and testing based on the filenames. I have a folder containing two folders: input and output. Input folder has the images and output are the labels for that image. The file names in the input folder are something like input01_train.png and input01_test.png like shown below.
Dataset
/ \
Input Output
| |
input01_train.png output01_train.png
. .
. .
input01_test.png output01_test.png
The code I have only divides the dataset into inputs and labels not test and train.
class CancerDataset(Dataset):
def __init__(self, dataset_folder):#,label_folder):
self.dataset_folder = torchvision.datasets.ImageFolder(dataset_folder ,transform = transforms.Compose([transforms.Resize(512),transforms.ToTensor()]))
self.label_folder = torchvision.datasets.ImageFolder(dataset_folder ,transform = transforms.Compose([transforms.Resize(512),transforms.ToTensor()]))
def __getitem__(self,index):
img = self.dataset_folder[index]
label = self.label_folder[index]
return img,label
def __len__(self):
return len(self.dataset_folder)
trainset = CancerDataset(dataset_folder = '/content/drive/My Drive/cancer_data/')
trainsetloader = DataLoader(trainset,batch_size = 1, shuffle = True,num_workers = 0,pin_memory = True)
I would like to be able to divide the train and test set by their names if that is possible .

You could load the images yourself in __getitem__, selecting only those that contain '_train.png' or '_test.png'.
class CancerDataset(Dataset):
def __init__(self, datafolder, datatype='train', transform = transforms.Compose([transforms.Resize(512),transforms.ToTensor()]):
self.datafolder = datafolder
self.image_files_list = [s for s in os.listdir(datafolder) if
'_%s.png' % datatype in s]
# Same for the labels files
self.label_files_list = ...
self.transform = transform
def __len__(self):
return len(self.image_files_list)
def __getitem__(self, idx):
img_name = os.path.join(self.datafolder,
self.image_files_list[idx])
image = Image.open(img_name)
image = self.transform(image)
# Same for the labels files
label = .... # Load in etc
label = self.transform(label)
return image, label
Now you could make two datasets (trainset and testset).
trainset = CancerDataset(dataset_folder = '/content/drive/My Drive/cancer_data/', datatype='train')
testset = CancerDataset(dataset_folder = '/content/drive/My Drive/cancer_data/', datatype='test')

Related

Why are albumentations Augmentations (Yolo / YoloV5) altering Bounding Boxes if no augmentations are being placed?

I was using the Albumentations library in order to perform some data augmentations on an object detection dataset that I intended to train a YoloV5 model on.
I have to perform the augmentations seperately and save the images locally to disk, but when I do I noticed that some of the output bounding boxes returned aren't generating properly.
I have my augmentations set up in a seperate aug.py file, shown below (augmentations purposefully removed in debugging attempts, see below) -
import albumentations as A
import cv2
PROB = 0.5
bbp = A.BboxParams(format="yolo")
horizontal_flip_transform = A.Compose([
], bbox_params = bbp)
vertical_flip_transform = A.Compose([
], bbp)
pixel_dropout_transform = A.Compose([
], bbox_params = bbp)
random_rotate = A.Compose([
], bbox_params = bbp )
#NOTE: THIS METHOD IMPLIES THAT THE IMAGE WIDTHS MUST BE AT LEAST 50 PIXELS
#Remove this aug to remove this constraint
random_crop = A.Compose([
], bbox_params = bbp)
augs = [horizontal_flip_transform, vertical_flip_transform, pixel_dropout_transform, random_rotate, random_crop]
def get_augmentations():
return augs
And the relevant parts of my implementation for performing the augmentations and saving them to disk is below:
def run_augments_on_image(img_name, bboxes, max_images_to_generate = 500):
ret = []
img = np.array(Image.open(img_name), dtype=np.uint8)
transforms = get_augmentations()
for i in range(min(len(transforms), max_images_to_generate)):
transformed = transforms[i](image=img, bboxes = bboxes)
ret.append((transformed["image"] , transformed["bboxes"]))
return ret
def run_and_save_augments_on_image_sets(batch_img_names, bboxes_urls, max_images_to_generate, dataset_dir, trainval):
num_images = 0
for i in range(len(batch_img_names)):
bboxes = []
with open(os.path.join(dataset_dir, trainval, 'labels', bboxes_urls[i]), 'r') as f:
for row in f:
x = row.strip().split(' ')
x.append(row[0])
x.pop(0)
x[0] = float(x[0])
x[1] = float(x[1])
x[2] = float(x[2])
x[3] = float(x[3])
bboxes.append(x)
trans = run_augments_on_image(os.path.join(dataset_dir, trainval, 'images', batch_img_names[i]), bboxes)
img_index = len(os.listdir(os.path.join(dataset_dir, 'train' , 'images'))) + len(os.listdir(os.path.join(dataset_dir, 'valid', 'images'))) + 1
for j in range(len(trans)):
img_trans, bboxes_trans = trans[j]
p = Image.fromarray(img_trans).save(os.path.join(dataset_dir, trainval, 'images' , f'image-{img_index}.{batch_img_names[j].split(".")[-1]}'))
with open(os.path.join(dataset_dir, trainval, 'labels', f'image-{img_index}.txt'), 'w') as f:
for boxs in bboxes_trans:
print(f'{boxs[-1]} {boxs[0]} {boxs[1]} {boxs[2]} {boxs[3]}', file=f)
num_images += 1
img_index += 1
if num_images >= max_images_to_generate:
break
if num_images >= max_images_to_generate:
break
For testing purposes (some of the bounding boxes were off), I removed all the actual augmentations, expecting the input image label (one augmented image example shown below) to be equal to augmented label since there were no augmentations. But, as you can see, the two labels are different.
img-original.txt
0 0.5662285714285714 0.2740066225165563 0.5297714285714286 0.4837913907284769
img-augmented.txt
0 0.51488 0.47173333333333334 0.6405099999999999 0.6527333333333334
(The labels above are in normalized xywh YOLO format)
Why is albumentations altering the labels? None of the augmentations in augs.py contain anything.

Working with large multiple datasets where each dataset contains multiple values - Pytorch

I'm training a Neural Network and have overall > 15GB of data inside a folder, the folder has multiple pickle files, and each file contains two lists that each holds multiple values.
This looks like the following:
dataset_folder:\
file.pickle
file_2.pickle
...
...
file_n.pickle
Each file_*.pickle contains a variable length list (list x and list y).
How to load all the data to train the model without having memory issue?
By implementing the custom dataset class provided from Pytorch, we need to implement three methods so pytorch loader can work with your data
__len__
__getitem__
__init__
Let's go through how to implement each one of them seperatly.
__init__
def __init__(self):
# Original Data has the following format
"""
dict_object =
{
"x":[],
"y":[]
}
"""
DIRECTORY = "data/raw"
self.dataset_file_name = os.listdir(DIRECTORY)
self.dataset_file_name_index = 0
self.dataset_length =0
self.prefix_sum_idx = list()
# Loop over each file and calculate the length of overall dataset
# you might need to check if file_name is file
for file_name in os.listdir(DIRECTORY):
with (open(f'{DIRECTORY}/{file_name}', "rb")) as openfile:
dict_object = pickle.load(openfile)
curr_page_sum = len(dict_object["x"]) + len(dict_object["y"])
self.prefix_sum_idx.append(curr_page_sum)
self.dataset_length += curr_page_sum
# prefix sum so we have an idea of where each index appeared in which file.
for i in range (1,len(self.prefix_sum_idx)):
self.prefix_sum_idx[i] = self.prefix_sum_idx[i] + self.prefix_sum_idx[i-1]
assert self.prefix_sum_idx[-1] == self.dataset_length
self.x = []
self.y = []
As you can see above, the main idea is to use prefix sum to "treat" all the dataset as once, so the logic is whenever you need to get access to a specific index later, you simply look into prefix_sum_idx to see this where this idx appear.
In the image above, let's say we need to access the index 150. Thanks to prefix sum, we are now able to know that 150 exist in the second .pickle file. Still we need a fast mechanism to know where that idx exist in the prefix_sum_idx. This will be explained in the __getitem__
__getitem__
def read_pickle_file(self, idx):
file_name = self.dataset_file_name[idx]
dict_object = dict()
with (open(f'{YOUR_DIRECTORY}/{file_name}', "rb")) as openfile:
dict_object = pickle.load(openfile)
self.x = dict_object['x']
self.y = #some logic here
......
# Some logic here....
def __getitem__(self,idx):
# Similar to C++ std::upper_bound - O(log n)
temp = bisect.bisect_right(self.prefix_sum_idx, idx)
self.read_pickle_file(temp)
local_idx = idx - self.prefix_sum_idx[temp]
return self.x[local_idx],self.y[local_idx]
check bisect_right() docs for details on how it works, but simply it returns the rightmost place in the sorted list to insert the given element and keep it sorted. In our approach, we're interested only in the following question, "which file should I access in order to get the appropriate data". More importantly, it does so in O(log n)
__len__
def __len__(self):
return self.dataset_length
In order to get the length of our dataset, we loop through each file in and accumulate the results as shown in __init__.
The full code sample goes like this:
import pickle
import torch
import torch.nn as nn
import numpy
import os
import bisect
from torch.utils.data import Dataset, DataLoader
from src.data.make_dataset import main
from torch.nn import functional as F
class dataset(Dataset):
def __init__(self):
# Original Data has the following format
"""
dict_object =
{
"x":[],
"y":[]
}
"""
DIRECTORY = "data/raw"
self.dataset_file_name = os.listdir(DIRECTORY)
self.dataset_file_name_index = 0
self.dataset_length =0
self.prefix_sum_idx = list()
# Loop over each file and calculate the length of overall dataset
# you might need to check if file_name is file
for file_name in os.listdir(DIRECTORY):
with (open(f'{DIRECTORY}/{file_name}', "rb")) as openfile:
dict_object = pickle.load(openfile)
curr_page_sum = len(dict_object["x"]) + len(dict_object["y"])
self.prefix_sum_idx.append(curr_page_sum)
self.dataset_length += curr_page_sum
# prefix sum so we have an idea of where each index appeared in which file.
for i in range (1,len(self.prefix_sum_idx)):
self.prefix_sum_idx[i] = self.prefix_sum_idx[i] + self.prefix_sum_idx[i-1]
assert self.prefix_sum_idx[-1] == self.dataset_length
self.x = []
self.y = []
def read_pickle_file(self, idx):
file_name = self.dataset_file_name[idx]
dict_object = dict()
with (open(f'{YOUR_DIRECTORY}/{file_name}', "rb")) as openfile:
dict_object = pickle.load(openfile)
self.x = dict_object['x']
self.y = #some logic here
......
# Some logic here....
def __getitem__(self,idx):
# Similar to C++ std::upper_bound - O(log n)
temp = bisect.bisect_right(self.prefix_sum_idx, idx)
self.read_pickle_file(temp)
local_idx = idx - self.prefix_sum_idx[temp]
return self.x[local_idx],self.y[local_idx]
def __len__(self):
return self.dataset_length
large_dataset = dataset()
train_size = int (0.8 * len(large_dataset))
validation_size = len(large_dataset) - train_size
train_dataset, validation_dataset = torch.utils.data.random_split(large_dataset, [train_size, validation_size])
validation_loader = DataLoader(validation_dataset, batch_size=64, num_workers=4, shuffle=False)
train_loader = DataLoader(train_dataset,batch_size=64, num_workers=4,shuffle=False)

How to apply a function to convert the paths to arrays using cv2 in tensorflow data pipeline?

Any help will be highly appreciated
I'm trying to load two lists containing image paths and their corresponding labels. Something like this:
p0 = ['a','b',....] #paths to images .tif format
p1 = [1,2,3,......] #paths to images .tif format
labels = [0,1,1,...] #corresponding labels w.r.t both the lists
I used a tf.data in the following way:
def TFData(p_0, p_1, batch_size, labels=None, is_train=True):
dset = tf.data.Dataset.from_tensor_slices((p_0,p_1))
if labels is not None:
label = tf.data.Dataset.from_tensor_slices(labels)
AUTO = tf.data.experimental.AUTOTUNE
final_dset = tf.data.Dataset.zip((dset, label))
final_dset = final_dset.batch(batch_size, drop_remainder=is_train).prefetch(AUTO)
return final_dset
This returns:
<PrefetchDataset shapes: (((64,), (64,)), (64,)), types: ((tf.string, tf.string), tf.int32)>
My question is how to apply a function to convert the paths to arrays using cv2 as the images are .tif files? such that the result will be:
<PrefetchDataset shapes: (((64,256,256,3), (64,256,256,3)), (64,)), types: ((tf.float64, tf.float64), tf.int32)>
I'm using a dataset.map. However it's throwing error:
def to_array(p_0):
im_1 = cv2.imread(p_0,1)
#im = tfio.experimental.image.decode_tiff(paths)
im_1 = cv2.resize(im_1,(img_w,img_h)) #img_w=img_h=256
im_1 = np.asarray(im_1, dtype=np.float64)
im_1 /= 255
return im_1
def parse_fn(p_0):
[p_0,] = tf.py_function(to_array, [p_0], [tf.float64])
return p_0
def TFData(p_0, p_1, batch_size, labels=None, is_train=True):
dset_1 = tf.data.Dataset.from_tensor_slices(p_0)
dset_1 = dset_1.map(parse_fn)
dset_2 = tf.data.Dataset.from_tensor_slices(p_1)
dset_2 = dset_2.map(parse_fn)
if labels is not None:
label = tf.data.Dataset.from_tensor_slices(labels)
AUTO = tf.data.experimental.AUTOTUNE
final_dset = tf.data.Dataset.zip((dset_1, dset_2, label))
final_dset = final_dset.batch(batch_size, drop_remainder=is_train).prefetch(AUTO)
return final_dset
print(train_data) #where train_data is defined as TFData()
<PrefetchDataset shapes: ((<unknown>, <unknown>), (64,)), types: ((tf.float64, tf.float64), tf.int32)>
This throws an error:
for (t,p),l in train_data.as_numpy_iterator():
print(t)
print(p)
print(l)
print(type(t))
break
SystemError: <built-in function imread> returned NULL without setting an error
[[{{node EagerPyFunc}}]] [Op:IteratorGetNext]
Any help will be highly appreciated
I think your problem is in cv2.imread.
Have you checked outside the functions to see if it is reading and plotting the data accordingly?
Please, try with -1 instead:
im_1 = cv2.imread(p_0,-1)

How to read and display an image from a .rec file

I am using im2rec.py tool to first generate lst files and then to generate .rec and .idx files as following:
BASE_DIR = './'
IMAGES_DIR = os.path.join(BASE_DIR,'IMAGES')
DATASET_DIR = os.path.join(BASE_DIR,'Dataset')
TRAIN_RATIO = 0.8
TEST_DATA_RATIO = 0.1
Dataset_lst_file = os.path.join(DATASET_DIR,"dataset")
!python $BASE_DIR/tools/im2rec.py --list --recursive --test-ratio=$TEST_DATA_RATIO --train-ratio=$TRAIN_RATIO $Dataset_lst_file $IMAGES_DIR
!python $BASE_DIR/tools/im2rec.py --resize 224 --center-crop --num-thread 4 $Dataset_lst_file $IMAGES_DIR
I am successfully generating .lst, .rec and .idx files. However, my doubt is how can I read a specific image from the .rec file and plot it. For instance, to know if the images were recorded ok or just to explore my dataset.
------------Update----------
I was able to plot as following:
#https://mxnet.apache.org/versions/1.5.0/tutorials/basic/data.html
data_iter = mx.image.ImageIter(batch_size=4, data_shape=(3, 224, 224),
path_imgrec=Dataset_lst_file+'_train.rec',
path_imgidx=Dataset_lst_file+'_train.idx')
data_iter.reset()
for j in range(4):
batch = data_iter.next()
data = batch.data[0]
#print(batch)
label = batch.label[0].asnumpy()
for i in range(4):
ax = plt.subplot(1,4,i+1)
plt.imshow(data[i].asnumpy().astype(np.uint8).transpose((1,2,0)))
ax.set_title('class: ' + str(label[i]))
plt.axis('off')
plt.show()
This tutorial includes an example of image visualization from a .rec file: https://gluon-cv.mxnet.io/build/examples_detection/finetune_detection.html
dataset = gcv.data.RecordFileDetection('pikachu_train.rec')
classes = ['pikachu'] # only one foreground class here
image, label = dataset[0]
print('label:', label)
# display image and label
ax = viz.plot_bbox(image, bboxes=label[:, :4], labels=label[:, 4:5], class_names=classes)
plt.show()
For completeness to previous answer. This will display images to screen using plot_bbox or render images to a folder.
Usage :
dumpRecordFileDetection('./data/val.rec', False, True, classes, ctx)
def dumpRecordFileDetection(record_filename, display_ui, output_to_directory, classes, ctx):
"""Dump RecordFileDetection to screen or a directory"""
if isinstance(ctx, mx.Context):
ctx = [ctx]
dataset = gcv.data.RecordFileDetection(record_filename)
print('images:', len(dataset))
image, label = dataset[0]
bboxes=label[:, :4]
labels=label[:, 4:5]
print(image.shape, label.shape)
print('labeldata:', label)
print('bboxes:', bboxes)
print('labels:', labels)
image_dump_dir = os.path.join("./dump")
if not os.path.exists(image_dump_dir):
os.makedirs(image_dump_dir)
for i, batch in enumerate(dataset):
size = len(batch)
image, label = batch
print(image.shape, label.shape)
bboxes = label[:, :4]
labels = label[:, 4:5].astype(np.uint8)
if output_to_directory:
file_path = os.path.join("./dump", "{0}_.png".format(i))
# Format (c x H x W)
img = image.asnumpy().astype(np.uint8)
for box, lbl, in zip(bboxes, labels):
cv2.rectangle(img,(box[0], box[1]),(box[2], box[3]),(0, 0, 255), 2)
txt = "{0}".format(classes[lbl[0]])
cv2.putText(img,txt,(box[0], box[1]), cv2.FONT_HERSHEY_PLAIN,1,(0,255,0),1,cv2.LINE_AA, False)
cv2.imwrite(file_path, img)
if display_ui:
ax = viz.plot_bbox(image, bboxes=bboxes, labels=labels, class_names=classes)
plt.show()

How to print the detected classes after performing object detection on an image?

I am following the object_detection_tutorial.ipynb tutorial.
Here is the code ( I only put parts which are needed, the rest of the code is the same as the notebook):
my_results = [] # I added this, a list to hold the detected classes
PATH_TO_LABELS = 'D:\\TensorFlow\\models\\research\\object_detection\\data\\oid_v4_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
PATH_TO_TEST_IMAGES_DIR = pathlib.Path('C:\\Users\\Bhavin\\Desktop\\objects')
TEST_IMAGE_PATHS = sorted(list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpg")))
TEST_IMAGE_PATHS
model = load_model()
def run_inference_for_single_image(model, image):
image = np.asarray(image)
# The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
input_tensor = tf.convert_to_tensor(image)
# The model expects a batch of images, so add an axis with `tf.newaxis`.
input_tensor = input_tensor[tf.newaxis,...]
# Run inference
output_dict = model(input_tensor)
# All outputs are batches tensors.
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
num_detections = int(output_dict.pop('num_detections'))
output_dict = {key:value[0, :num_detections].numpy()
for key,value in output_dict.items()}
output_dict['num_detections'] = num_detections
# detection_classes should be ints.
output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
# Handle models with masks:
if 'detection_masks' in output_dict:
# Reframe the the bbox mask to the image size.
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
output_dict['detection_masks'], output_dict['detection_boxes'],
image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
tf.uint8)
output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
return output_dict
def show_inference(model, image_path):
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = np.array(Image.open(image_path))
# Actual detection.
output_dict = run_inference_for_single_image(model, image_np)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks_reframed', None),
use_normalized_coordinates=True,
line_thickness=8)
name = "Image" + str(i) + ".jpg"
img = Image.fromarray(image_np)
plt.imsave(name,image_np)
my_results.append(output_dict['detection_classes']) # I added this
print(my_results) # I added this
#img.show()
i = 1
for image_path in TEST_IMAGE_PATHS:
show_inference(model, image_path)
i += 1
I checked some related stack overflow questions and the answer had something to do with category index. But the code and examples used are very different from the tutorial I am following.
The line : my_results.append(output_dict['detection_classes'])
Gives me output: [array([55], dtype=int64)]
How do I extract the classes of the detected objects?
First import six
Add get_classes_name_and_scores method, before def show_inference(model, image_path):
get_classes_name_and_scores method returns {'name': 'person', 'score': '91%'}
def get_classes_name_and_scores(
boxes,
classes,
scores,
category_index,
max_boxes_to_draw=20,
min_score_thresh=.9): # returns bigger than 90% precision
display_str = {}
if not max_boxes_to_draw:
max_boxes_to_draw = boxes.shape[0]
for i in range(min(max_boxes_to_draw, boxes.shape[0])):
if scores is None or scores[i] > min_score_thresh:
if classes[i] in six.viewkeys(category_index):
display_str['name'] = category_index[classes[i]]['name']
display_str['score'] = '{}%'.format(int(100 * scores[i]))
return display_str
Then add after vis_util.visualize_boxes_and_labels_on_image_array
print(get_classes_name_and_scores(
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index))

Resources