I am new to PyTorch and working on a GAN model. I want to load my image dataset. The way its done using Keras is:
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
def load_images(path, size=(128,128)):
data_list = list()
# enumerate filenames in directory, assume all are images
for filename in listdir(path):
# load and resize the image
pixels = load_img(path + filename, target_size=size)
# convert to numpy array
pixels = img_to_array(pixels)
# store.
data_list.append(pixels)
return asarray(data_list)
# dataset path
path = 'mypath/'
# load dataset A
dataA = load_images(path + 'A/')
dataAB = load_images(path + 'B/')
I want to know how to do the same in PyTorch.
Any help is appreciated. Thanks
import torchvision, torch
from torchvision import datasets, models, transforms
def load_training(root_path, dir, batch_size, kwargs):
transform = transforms.Compose(
[transforms.Resize([256, 256]),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor()])
data = datasets.ImageFolder(root=root_path + dir, transform=transform)
train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, drop_last=True, **kwargs)
return train_loader
I hope it'll work ...
Related
Please help me out, I am very confused. The dataset contains three folders train, nuisances, and testing set. The train folder contains 10 classes like aeroplane, train, etc. However, the nuisances folder consists of 5 classes like nuisances = ['shape', 'pose', 'texture', 'context', 'weather', 'occlusion'] and each class contains 10 classes same as the training dataset.
Dataset Structure
- dataset
- train folder
- images
- aeroplane
- train
- etc
- labels.csv
- nuisances folder
-context
- Images
- labels.csv
-occlusion
- Images
- labels.csv
- test folder
- images
- labels.csv
When testing, the test script generates a single .csv file containing all classes img_name, preds.
Code
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from torchvision import transforms
from PIL import Image
import torch
import torch.nn as nn
from glob import glob
from pathlib import PurePath
import numpy as np
import timm
import torchvision
import time
img_list = glob('/media/cvpr/CM_22/ROBINv1.1-cls-pose/iid_test/Images/*.jpg')
name_list = [
'aeroplane',
'bicycle',
'boat',
'bus',
'car',
'chair',
'diningtable',
'motorbike',
'sofa',
'train'
]
# conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 -c pytorch
class PoseData(Dataset):
def __init__(self, transforms) -> None:
"""
the data folder should look like
- datafolder
- Images
- labels.csv
"""
super().__init__()
self.img_list = glob('/media/cvpr/CM_22/ROBINv1.1-cls-pose/iid_test/Images/*.jpg')
self.img_list = sorted(self.img_list, key=lambda x: PurePath(x).parts[-1][:-4])
self.trs = transforms
def __len__(self):
return len(self.img_list)
def __getitem__(self, index):
image_dir = self.img_list[index]
image_name = PurePath(image_dir).parts[-1]
image = Image.open(image_dir)
image = self.trs(image)
return image, image_name
if __name__ == "__main__":
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
tfs = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
normalize,
])
model = timm.models.resnet152(pretrained=True, num_classes=10)
#model = torch.nn.DataParallel(model)
model.load_state_dict(torch.load('resnet152_best.pth.tar')['state_dict'], strict=False)
model = model.cuda()
model.eval()
dataset = PoseData(tfs)
loader = DataLoader(dataset, batch_size=128, shuffle=False, drop_last=False, num_workers=4)
image_dir = []
preds = []
for image, pth in loader:
image_dir.append(list(pth))
image = image.cuda()
with torch.no_grad():
model.eval()
pred = model(image)
#pred = torch.softmax(pred[:, :10], dim=1)
pred = torch.argmax(pred[:, :10], dim=1)
p = []
for i in range(pred.size(0)):
p.append(name_list[pred[i].item()])
p = np.array(p)
preds.append(p)
print(len(np.concatenate(preds)))
image_dir = np.array(sum(image_dir, []))
preds = np.concatenate(preds)
csv = {'imgs': np.array(image_dir), 'pred': np.array(preds),
}
csv = pd.DataFrame(csv)
print(csv)
csv.to_csv('evaluation/cls_ref/res/iid.csv', index=False)
The iid.csv is completely fine but I also require the separate .csv file for nuisances = ['shape', 'pose', 'texture', 'context', 'weather', 'occlusion'] so that I can evaluate accuracy on each nuisances classes. I have no idea how to build logic for this task
I am doing predictions on images where I write all classes' names and in the test folder, I have 20 images. Please give me some hint as, why I am getting error? How we can check the indices of the model?
Code
import numpy as np
import sys, random
import torch
from torchvision import models, transforms
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
import glob
# Paths for image directory and model
IMDIR = './test'
MODEL = 'checkpoint/resnet18/Monday_31_May_2021_21h_25m_05s/resnet18-1000-regular.pth'
# Load the model for testing
model = models.resnet18()
model.named_children()
torch.save(model.state_dict, MODEL)
model.eval()
# Class labels for prediction
class_names = ['BC', 'BK', 'CC', 'CL', 'CM', 'DF', 'DG', 'DS', 'HL', 'IF', 'JD', 'JS', 'LD', 'LP', 'LS', 'PO', 'RI',
'SD', 'SG', 'TO']
# Retreive 9 random images from directory
files = Path(IMDIR).resolve().glob('*.*')
print(files)
images = random.sample(list(files), 1)
print(images)
# Configure plots
fig = plt.figure(figsize=(9, 9))
rows, cols = 3, 3
# Preprocessing transformations
preprocess = transforms.Compose([
transforms.Resize((256, 256)),
# transforms.CenterCrop(size=224),
transforms.ToTensor(),
transforms.Normalize(0.5306, 0.1348)
])
# Enable gpu mode, if cuda available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Perform prediction and plot results
with torch.no_grad():
for num, img in enumerate(images):
img = Image.open(img).convert('RGB')
inputs = preprocess(img).unsqueeze(0).cpu()
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
print(preds)
label = class_names[preds]
plt.subplot(rows, cols, num + 1)
plt.title("Pred: " + label)
plt.axis('off')
plt.imshow(img)
'''
Sample run: python test.py test
'''
Traceback
Traceback (most recent call last):
File "/media/khawar/HDD_Khawar/CVPR/pytorch-cifar100/test_box.py", line 57, in <module>
label = class_names[preds]
IndexError: list index out of range
Your error stems from the fact that you don't do any modification to the linear layers of your resnet model.
I suggest adding this code:
# What you have
model = models.resnet18()
# What you need
model.fc = nn.Sequential(
nn.Linear(model.fc.in_features, len(class_names)))
This changes the last linear layers to outputting the correct amount of nodes
Sarthak
Here is an autoencoder I created from Pytorch tutorials :
epochs = 1000
from pylab import plt
plt.style.use('seaborn')
import torch.utils.data as data_utils
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
import numpy as np
import pandas as pd
import datetime as dt
features = torch.tensor(np.array([ [1,2,3],[1,2,3],[100,200,500] ]))
print(features)
batch = 10
data_loader = torch.utils.data.DataLoader(features, batch_size=2, shuffle=False)
encoder = nn.Sequential(nn.Linear(3,batch), nn.Sigmoid())
decoder = nn.Sequential(nn.Linear(batch,3), nn.Sigmoid())
autoencoder = nn.Sequential(encoder, decoder)
optimizer = torch.optim.Adam(params=autoencoder.parameters(), lr=0.001)
encoded_images = []
for i in range(epochs):
for j, images in enumerate(data_loader):
# images = images.view(images.size(0), -1)
images = Variable(images).type(FloatTensor)
optimizer.zero_grad()
reconstructions = autoencoder(images)
loss = torch.dist(images, reconstructions)
loss.backward()
optimizer.step()
# encoded_images.append(encoder(images))
# print(decoder(torch.tensor(np.array([1,2,3])).type(FloatTensor)))
encoded_images = []
for j, images in enumerate(data_loader):
images = images.view(images.size(0), -1)
images = Variable(images).type(FloatTensor)
encoded_images.append(encoder(images))
I can see the encoded images do have newly created dimension of 10. In order to understand the matrix operations going on under the hood I'm attempting to print the matrix dimensions of encoder and decoder but shape is not available on nn.Sequential
How to print the matrix dimensions of nn.Sequential ?
A nn.Sequential is not a "layer", but rather a "container". It can store several layers and manage their execution (and some other functionalities).
In your case, each nn.Sequential holds both the linear layer and the non-linear nn.Sigmoid activation. To get the shape of the weights of the first layer in a nn.Sequential you can simply do:
encoder[0].weight.shape
i'm trying to visualize the Training plot of my Model , and i get this Error :
Traceback (most recent call last):
File "train.py", line 120, in <module>
plt.plot(H.history['acc'])
KeyError: 'acc'
& here's the Full Code :
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
# import the necessary packages
from pyimagesearch.resnet import ResNet
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.utils import np_utils
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import cv2
import os
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True,
help="path to input dataset")
ap.add_argument("-a", "--augment", type=int, default=-1,
help="whether or not 'on the fly' data augmentation should be used")
ap.add_argument("-p", "--plot", type=str, default="plot.png",
help="path to output loss/accuracy plot")
args = vars(ap.parse_args())
# initialize the initial learning rate, batch size, and number of
# epochs to train for
INIT_LR = 1e-1
BS = 8
EPOCHS = 50
# grab the list of images in our dataset directory, then initialize
# the list of data (i.e., images) and class images
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))
data = []
labels = []
# loop over the image paths
for imagePath in imagePaths:
# extract the class label from the filename, load the image, and
# resize it to be a fixed 64x64 pixels, ignoring aspect ratio
label = imagePath.split(os.path.sep)[-2]
image = cv2.imread(imagePath)
image = cv2.resize(image, (64, 64))
# update the data and labels lists, respectively
data.append(image)
labels.append(label)
# convert the data into a NumPy array, then preprocess it by scaling
# all pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
# encode the labels (which are currently strings) as integers and then
# one-hot encode them
le = LabelEncoder()
labels = le.fit_transform(labels)
labels = np_utils.to_categorical(labels, 3)
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels,
test_size=0.25, random_state=42)
# initialize an our data augmenter as an "empty" image data generator
aug = ImageDataGenerator()
# check to see if we are applying "on the fly" data augmentation, and
# if so, re-instantiate the object
if args["augment"] > 0:
print("[INFO] performing 'on the fly' data augmentation")
aug = ImageDataGenerator(
rotation_range=20,
zoom_range=0.15,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.15,
horizontal_flip=True,
fill_mode="nearest")
# initialize the optimizer and model
print("[INFO] compiling model...")
opt = SGD(lr=INIT_LR, momentum=0.9, decay=INIT_LR / EPOCHS)
model = ResNet.build(64, 64, 3, 2, (2, 3, 4),
(32, 64, 128, 256), reg=0.0001)
model.compile(loss="categorical_crossentropy", optimizer=opt,
metrics=["accuracy"])
# train the network
print("[INFO] training network for {} epochs...".format(EPOCHS))
H = model.fit_generator(
aug.flow(trainX, trainY, batch_size=BS),
validation_data=(testX, testY),
steps_per_epoch=len(trainX) // BS,
epochs=EPOCHS)
# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(testX, batch_size=BS)
print(classification_report(testY.argmax(axis=1),
predictions.argmax(axis=1), target_names=le.classes_))
# plot the training loss and accuracy
N = np.arange(0, EPOCHS)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history['val_loss'], label="val_loss")
plt.plot(N, H.history['acc'], label="accuracy")
plt.plot(N, H.history['val_acc'], label="val_acc")
plt.title("Training Loss and Accuracy on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig(args["plot"])
So, this code , is originally from the blog Pyimagesearch , on data augmentation , so , i decided to give it a try , the original "pipeline" was meant for a binary classification , so , since i was intrested in a mutli-class "task" , i brought some changes.
but the plotting part of the code , seems correct to me , i cheked on multiple posts here on Stack , checked the keras documentation , and nothing , i do not understand , why it dose not work !!!!
any suggestion would be much appreciated .
thank you.
Have you tried H.history['accuracy']? Since you compiled using 'accuracy' it will probably have the same string.
Now you can always inspect what you've got:
for key in H.history.keys():
print(key)
You will see what is logged there
I'm using Keras and resnet 101 for my training and wants to export my model into TensorFlow serving with this method exporter.export_inference_graph and it gives me the error.
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value conv2_block2_1_bn/moving_variance/local_step_1
Is there any specific reason that you want to Save the Model using export_saved_model?
If your Goal is to Save the pretrained model, resnet and perform inference using Tensorflow Serving, you can do it using the code mentioned below:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras import Model
my_resnet = ResNet50(weights='imagenet', include_top=False, input_shape=(224,224,3))
# Add Global Average Pooling Layer
x = my_resnet.output
x = GlobalAveragePooling2D()(x)
# Add a Output Layer
my_resnet_output = Dense(5, activation='softmax')(x)
# Combine whole Neural Network
my_resnet_model = Model(inputs=my_resnet.input, outputs=my_resnet_output)
my_resnet_model.save('my_flowers')
The last line of code saves the model in .pb format.
Now, we need to write the Code for Client File and can perform the inference using Tensorflow Serving.
import grpc
import requests
import tensorflow as tf
import cv2
import os
import numpy as np
def main():
img_array = cv2.imread('daisy.jpg')
new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
new_array = new_array / 255
import json
data = json.dumps(
{"signature_name": "serving_default", "instances": new_array.reshape(-1, 224, 224, 3).tolist()})
print('Data: {} ... {}'.format(data[:50], data[len(data) - 52:]))
headers = {"content-type": "application/json"}
json_response = requests.post('http://35.226.32.128/v1/models/test0221/versions/1:predict', data=data, headers=headers)
predictions = json.loads(json_response.text)['predictions']
np.argmax(predictions[0])
dicti
for flower, label in dicti.items():
if label == np.argmax(predictions[0]):
print(flower)
if __name__ == '__main__':
main()