getitem for custom dataloader is not working - pytorch

I am new to deep learning and Pytorch. I have data set of 6000 images that have all four classes in a single folder. I have a .csv file that has an image name and there corresponding one hot encoded representation (Ground_Truth_Label). I want to bind the image name to its label, so I can feed it into the neural network. I found that a custom Data Loader can be used for this. I tried the following code snippet in PyCharm:
class DFUDataset(Dataset):
def __init__(self, csv_file, root_dir, transform=None):
self.DFU = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.DFU)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
img_name = os.path.join(self.root_dir, self.DFU.iloc[idx, 0]) # image names
image = io.imread(img_name)
img_label = os.path.join(self.root_dir, self.DFU.iloc[0, idx[1:]]) # image labels
sample = {'image': image, 'img_label': img_label}
return sample
DFU_dataset = DFUDataset(
csv_file='C:/Users/aleems2/Desktop/dfu/DFUC2021_trainset_210427/DFUC2021_train/Labelled_data_ground_truth.csv',
root_dir="C:/Users/aleems2/Desktop/dfu/DFUC2021_trainset_210427/DFUC2021_train/Labelled_test_images")
However, when I am trying to debug the code then, a blue circle comes right beside getitem (Blue circle), and the debugger does not go to this function. It just runs def init, and then comes out of class DFUDataset(Dataset). I do not know what to override getitem with.
I have spent hours on this, but could not figure it out.

Related

Problem with extract label from my dataset

I have an image dataset with 35 classes, all the images are in one folder, and one part of the names of the images is their label. An example of image names is like this:
D34_Samsung_GalaxyS3Mini-images-flat-D01_I_flat_0001.jpg
And the label becomes D01 here.
In the Dataset class definition, the target variable should return the image label, right? If we consider the index, 34 should be returned for this example.
I have a code to define the dataset:
class MyDataset(Dataset):
def __init__(self, imgs , transform = None):
self.imgs = imgs
self.transform = transform or transforms.ToTensor()
self.class_to_idx = {}
def __getitem__(self, index):
image_path = self.imgs[index]
target = image_path.split('_')[0]
target = re.findall(r'D\d+.+' , target)
image = Image.open(image_path)
if self.transform is not None:
image = self.transform(image)
if target[0] in self.class_to_idx :
target = [self.class_to_idx[target[0]]]
else :
self.class_to_idx[target[0]] = len(self.class_to_idx)
target = [self.class_to_idx[target[0]]]
return image , target
def __len__(self):
return len(self.imgs)
But when I tested it, I realized it does not extract the labels correctly. That is, every time, the labels are always a number between 0 and 15 (batch-size=16).
There are 35 classes, but the target is always between 0 and 15. that is, the batch size; also, an image may get a different label each time the code is executed!
output of the above code
So I changed the Dataset code. I removed a few lines of code and directly obtained the label from the name of the images instead of using class_to_idx:
class MyDataset(Dataset):
def __init__(self, imgs , transform = None):
self.imgs = imgs
self.transform = transform or transforms.ToTensor()
self.class_to_idx = {}
def __getitem__(self, index):
image_path = self.imgs[index]
target = image_path.split('_')[0]
target = target.split('D')[1]
target = int(target)
image = Image.open(image_path)
if self.transform is not None:
image = self.transform(image)
return image , target
def __len__(self):
return len(self.imgs)
When I did the test, the numbers were no longer between 0 and 15, and there were real labels of images:
output of the changed code
My problem is that when I train the model with the first code, my CNN model trains correctly and does not give an error.
But by the second code (my edition), even though the output was correct in the test, the model cannot train and errors:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) in
1 criterion , optimizer , scheduler = lossAndOptim(model = model)
2
----> 3 losses_val , losses_trn , accs_val , accs_trn = train_model(model,
4 train_dl, valid_dl,
5 criterion, optimizer,
4 frames /usr/local/lib/python3.8/dist-packages/torch/nn/functional.py
in cross_entropy(input, target, weight, size_average, ignore_index,
reduce, reduction, label_smoothing) 3024 if size_average is not
None or reduce is not None: 3025 reduction =
_Reduction.legacy_get_string(size_average, reduce)
-> 3026 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
3027 3028
ValueError: Expected input batch_size (16) to match target batch_size (0).
Whatever I search, the answers I see are related to the model. But there is no problem with the model, and it does not give an error with the first code.
Thank you for your advice.
I edit the source code and I googled but the answers were not related to my problem.
I changed the code as below, and my problem was solved:
There was a need to return "target" as a dictionary.
class MyDataset(Dataset):
def __init__(self, imgs , transform = None):
self.imgs = imgs
self.transform = transform or transforms.ToTensor()
self.class_to_idx = {}
def __getitem__(self, index):
image_path = self.imgs[index]
target = image_path.split('-')[0]
label = target.split('_')[0]
label = label.split('D')[1]
name = target
image = Image.open(image_path)
if self.transform is not None:
image = self.transform(image)
if target in self.class_to_idx :
target = [self.class_to_idx[target]]
else :
self.class_to_idx[target] = (int(label)-1)
target = [self.class_to_idx[target]]
return image , target
def __len__(self):
return len(self.imgs)

Generating text/csv file for image path and mask path for semantic segmentation

I have a huge set of images(60k) and masks(60k) that need to be loaded into a PyTorch dataloader for semantic segmentation.
Directory Structure:
- Segmentation
-images
-color_left_trajectory_3000_00001.jpg
-color_left_trajectory_3000_00002.jpg
...
-masks
-color_segmentation_3000_00001.jpg
-color_segmentation_3000_00002.jpg
...
I want to know the most efficient way to load these into a dataloader in Pytorch. I was thinking of generating a csv file with the paths to images and masks. How will I go about generating the same? Any other suggestions are appreciated!
I recommend that you make a custom subclass from the dataset class. In the init function, the paths to the images and masks are generated and then saved.
This is an example:
import torch
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
class CustomData(Dataset):
def __init__(self,data_dir='Segmentation', data_transform=None,split= 'train'):
self.imgs = []
self.labels= []
self.transform = data_transform
self.data_dir = data_dir
#self.imgs_dir = os.path.join(data_dir, split, 'images')
#self.labels_dir = os.path.join(data_dir, split, 'labels')
self.imgs_dir = os.path.join(data_dir, 'images')
self.labels_dir = os.path.join(data_dir, 'labels')
for img_name in os.listdir(self.imgs_dir):
img_path = os.path.join(self.imgs_dir, img_name)
label_name = "color_segmentation_"+"_".join(img.split('.')[0].split('_')[-2:])+'.png'
label_path = os.path.join(self.labels_dir, label_name)
self.imgs.append(img_path)
self.labels.append(label_path)
def __len__(self):
return len(self.imgs)
def __getitem__(self, idx):
img = Image.open(self.imgs[idx])
label = Image.open(self.labels[idx])
if self.transform is not None:
img, label = self.transform(img, label)
return img, label
class ToTensor:
def __call__(self, image, target=None):
image = F.to_tensor(image)
if target is not None:
target = torch.as_tensor(np.array(target), dtype=torch.int64)
return image, target
if __name__ == '__main__':
data = CustomData(data_transform=ToTensor)
dataloader = DataLoader(data,batch_size=10)

WeightedRandomSampler for custom image dataloader pytorch

I am trying to solve class imbalance by using Weighted Random Sampler on a custom data loader. I can't seem to find the best way to implement this. The images are in a folder and labels are in a csv file. The dataloader code without the weighted random sampler is given below.
class CassavaDataset(Dataset):
def __init__(self, df, data_root, transforms=None, output_label=True):
super().__init__()
self.df = df.reset_index(drop=True).copy() # data
self.transforms = transforms
self.data_root = data_root
self.output_label = output_label
def __len__(self):
return self.df.shape[0] # or len(self.df)
def __getitem__(self, index: int):
# get labels
if self.output_label:
target = self.df.iloc[index]['label']
path = "{}/{}".format(self.data_root, self.df.iloc[index]['image_id'])
img = get_img(path)
if self.transforms:
img = self.transforms(image=img)['image']
# do label smoothing
if self.output_label == True:
return img, target
else:
return img
What will be the best way to get weights of each class and feed it to the sampler before augmentation? Thanks in advance!

Pytorch: GPU not fully utilized while training custom model on custom dataset

I'm trying to train my own model, but for some reason, my gpu is not fully utilized. Is there any way to solve this?
Here is the snipet of my Dataset.
I have already tried pin_memory=True, still not working.
environment: torch1.8 + cuda11.1 + RTX3090
class ImageDataset(Dataset):
def __init__(self, data_path, transform=None, image_size=512):
self.train_data_path = data_path['train']
self.train_label_path = data_path['label']
self.train_lables = os.listdir(self.train_label_path)
self.train_data = os.listdir(self.train_data_path)
self.transform = transform
self.image_size = to_2tuple(image_size)
def __len__(self):
return len(self.train_data)
def __getitem__(self, indx):
if indx >= len(self.train_data):
raise Exception("Index should be less than {}".format(len(self.train_data)))
image = Image.open(os.path.join(self.train_data_path, self.train_data[indx]))
final_label = Image.open(os.path.join(self.train_label_path, self.train_lables[indx]))
image = self.transform(image)
final_label = self.transform(final_label)
return image, final_label

Connecting slider to Graphics View in PyQt

I'm trying to display image data read in from a binary file (I have the code written for retrieving this data from a file and storing it as an image for use with QImage() ). What I would like to do is connect a slider to a Graphics View widget so that when you move the slider, it moves through the frames and displays the image from that frame (these are echograms ranging from 1-500 frames in length). I'm very new to PyQt and was curious how one might even begin doing this?
from PyQt4.QtCore import *
from PyQt4.QtGui import *
import numpy as np
class FileHeader(object):
fileheader_fields= ("filetype","fileversion","numframes","framerate","resolution","numbeams","samplerate","samplesperchannel","receivergain","windowstart","winlengthsindex","reverse","serialnumber","date","idstring","ID1","ID2","ID3","ID4","framestart","frameend","timelapse","recordInterval","radioseconds","frameinterval","userassigned")
fileheader_formats=('S3','B','i4','i4','i4','i4','f','i4','i4','i4','i4','i4','i4','S32','S256','i4','i4','i4','i4','i4','i4','i4','i4','i4','i4','S136')
def __init__(self,filename,parent=None):
a=QApplication([])
filename=str(QFileDialog.getOpenFileName(None,"open file","C:/vprice/DIDSON/DIDSON Data","*.ddf"))
self.infile=open(filename, 'rb')
dtype=dict(names=self.fileheader_fields, formats=self.fileheader_formats)
self.fileheader=np.fromfile(self.infile, dtype=dtype, count=1)
self.fileheader_length=self.infile.tell()
for field in self.fileheader_fields:
setattr(self,field,self.fileheader[field])
def get_frame_first(self):
frame=Frame(self.infile)
print self.fileheader
self.infile.seek(self.fileheader_length)
print frame.frameheader
print frame.data
def __iter__(self):
self.infile.seek(self.fileheader_length)
for _ in range(self.numframes):
yield Frame(self.infile)
#def close(self):
#self.infile.close()
def display(self):
print self.fileheader
class Frame(object):
frameheader_fields=("framenumber","frametime","version","status","year","month","day","hour","minute","second","hsecond","transmit","windowstart","index","threshold","intensity","receivergain","degc1","degc2","humidity","focus","battery","status1","status2","velocity","depth","altitude","pitch","pitchrate","roll","rollrate","heading","headingrate","sonarpan","sonartilt","sonarroll","latitude","longitude","sonarposition","configflags","userassigned")
frameheader_formats=("i4","2i4","S4","i4","i4","i4","i4","i4","i4","i4","i4","i4","i4","i4","i4","i4","i4","i4","i4","i4","i4","i4","S16","S16","f","f","f","f","f","f","f","f","f","f","f","f","f8","f8","f","i4","S60")
data_format="uint8"
def __init__(self,infile):
dtype=dict(names=self.frameheader_fields,formats=self.frameheader_formats)
self.frameheader=np.fromfile(infile,dtype=dtype,count=1)
for field in self.frameheader_fields:
setattr(self,field,self.frameheader[field])
ncols,nrows=96,512
self.data=np.fromfile(infile,self.data_format,count=ncols*nrows)
self.data=self.data.reshape((nrows,ncols))
class QEchogram():
def __init__(self):
self.__colorTable=[]
self.colorTable=None
self.threshold=[50,255]
self.painter=None
self.image=None
def echogram(self):
fileheader=FileHeader(self)
frame=Frame(fileheader.infile)
echoData=frame.data
#fileName = fileName
self.size=[echoData.shape[0],echoData.shape[1]]
# define the size of the data (and resulting image)
#size = [96, 512]
# create a color table for our image
# first define the colors as RGB triplets
colorTable = [(255,255,255),
(159,159,159),
(95,95,95),
(0,0,255),
(0,0,127),
(0,191,0),
(0,127,0),
(255,255,0),
(255,127,0),
(255,0,191),
(255,0,0),
(166,83,60),
(120,60,40),
(200,200,200)]
# then create a color table for Qt - this encodes the color table
# into a list of 32bit integers (4 bytes) where each byte is the
# red, green, blue and alpha 8 bit values. In this case we don't
# set alpha so it defaults to 255 (opaque)
ctLength = len(colorTable)
self.__ctLength=ctLength
__colorTable = []
for c in colorTable:
__colorTable.append(QColor(c[0],c[1],c[2]).rgb())
echoData = np.round((echoData - self.threshold[0])*(float(self.__ctLength)/(self.threshold[1]-self.threshold[0])))
echoData[echoData < 0] = 0
echoData[echoData > self.__ctLength-1] = self.__ctLength-1
echoData = echoData.astype(np.uint8)
self.data=echoData
# create an image from our numpy data
image = QImage(echoData.data, echoData.shape[1], echoData.shape[0], echoData.shape[1],
QImage.Format_Indexed8)
image.setColorTable(__colorTable)
# convert to ARGB
image = image.convertToFormat(QImage.Format_ARGB32)
# save the image to file
image.save(fileName)
self.image=QImage(self.size[0],self.size[1],QImage.Format_ARGB32)
self.painter=QPainter(self.image)
self.painter.drawImage(QRect(0.0,0.0,self.size[0],self.size[1]),image)
def getImage(self):
self.painter.end()
return self.image
def getPixmap(self):
self.painter.end()
return QPixmap.fromImage(self.image)
if __name__=="__main__":
data=QEchogram()
fileName="horizontal.png"
data.echogram()
dataH=data.data
print "Horizontal data", dataH
I could give you a more specific answer if you showed what you were trying so far, but for now I will just make assumptions and give you an example.
First what you would do is create a QSlider. You set the QSlider minimum/maximum to the range of images that you have available. When you slide it, the sliderMoved signal will fire and tell you what the new value is.
Next, you can create a list containing all of your QPixmap images ahead of time. If these images are huge and you are concerned about memory, you might have to create them on demand using your already coded approach. But we will assume you can put them in a list for now, to make the example easier.
Then you create your QGraphics set up, using a single QGraphicsPixmapItem. This item can have its pixmap replaced on demand.
Putting it all together, you get something like this:
from PyQt4 import QtCore, QtGui
class Widget(QtGui.QWidget):
def __init__(self, parent=None):
super(Widget, self).__init__(parent)
self.resize(640,480)
self.layout = QtGui.QVBoxLayout(self)
self.scene = QtGui.QGraphicsScene(self)
self.view = QtGui.QGraphicsView(self.scene)
self.layout.addWidget(self.view)
self.image = QtGui.QGraphicsPixmapItem()
self.scene.addItem(self.image)
self.view.centerOn(self.image)
self._images = [
QtGui.QPixmap('Smiley.png'),
QtGui.QPixmap('Smiley2.png')
]
self.slider = QtGui.QSlider(self)
self.slider.setOrientation(QtCore.Qt.Horizontal)
self.slider.setMinimum(0)
# max is the last index of the image list
self.slider.setMaximum(len(self._images)-1)
self.layout.addWidget(self.slider)
# set it to the first image, if you want.
self.sliderMoved(0)
self.slider.sliderMoved.connect(self.sliderMoved)
def sliderMoved(self, val):
print "Slider moved to:", val
try:
self.image.setPixmap(self._images[val])
except IndexError:
print "Error: No image at index", val
if __name__ == "__main__":
app = QtGui.QApplication([])
w = Widget()
w.show()
w.raise_()
app.exec_()
You can see that we set the range of the slider to match your image list. At any time, you can change this range if the contents of your image list change. When the sliderMoved fires, it will use the value as the index of the image list and set the pixmap.
I also added a check to our sliderMoved() SLOT just in case your slider range gets out of sync with your image list. If you slide to an index that doesn't exist in your image list, it will fail gracefully and leave the existing image.
A lot of the work you are doing--converting image data to QImage, displaying frames with a slider--might be solved better using a library written for this purpose. There are a couple libraries I can think of that work with PyQt and provide everything you need:
guiqwt
pyqtgraph
(disclaimer: shameless plug)
If you can collect all of the image data into a single 3D numpy array, the code for displaying this in pyqtgraph looks like:
import pyqtgraph as pg
pg.image(imageData)
This would give you a zoomable image display with frame slider and color lookup table controls.

Resources