How to determine the optimal number of "Steps" and "Batch Size" for test dataset in Keras ImageDataGenerator? - python-3.x

I have trained an image similarity network. The network is designed to distinguish between similar/dissimilar pairs of images.
Whare a pair contains a camera image and its corresponding sketch image.
The test dataset contains 4 image directories (camera_positive, sketch_positive, camera_negative, sketch_negative).
I am facing problem while evaluating the performance of the network on the test dataset.
As the test dataset is huge to fit into the memory, I decided to use Keras ImageDataGenerator.
I implemented the following code. Each directory contains 20 images (for small demonstration).
Therefore, in total 80 images and 40 predictions.
As the ImageDataGenerator gives us the option to save the image I used "save_to_dir" parameter as can be seen in the following code to verify the correct working.
Each directory contains 20 images therefore, I am expecting after running the predictions it will save the same images to the specified directories.
After running the code, it generates 31 images in each folder instead of 20!
I played around the different step sizes but no one gives accurate results.
What is wrong with this code. Please suggest!
import os
import numpy as np
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator
batch_size = 1
image_size = 224
class_mode = None
"""
c_pos/neg: camera positive/neg image
s_pos/neg: sketch positive/neg image
"""
c_pos = r"testing\c_pos"
c_neg = r"testing\c_neg"
s_pos = r"testing\s_pos"
s_neg = r"testing\s_neg"
datagen_constructor = ImageDataGenerator()
def initialize_generator(generator, c_pos, s_pos, c_neg, s_neg):
camera_pos=generator.flow_from_directory(
c_pos,
target_size=(image_size, image_size),
color_mode="rgb",
batch_size=batch_size,
class_mode=class_mode,
shuffle = False,
seed=7,
save_to_dir='results/c_pos',
save_format='jpeg',
save_prefix='CPOS'
)
sketch_pos=generator.flow_from_directory(
s_pos,
target_size=(image_size, image_size),
color_mode="rgb",
batch_size=batch_size,
class_mode=class_mode,
shuffle = False,
seed=7,
save_to_dir='results/s_pos',
save_format='jpeg',
save_prefix='SPOS'
)
camera_neg=generator.flow_from_directory(
c_neg,
target_size=(image_size, image_size),
color_mode="rgb",
batch_size=batch_size,
class_mode=class_mode,
shuffle = False,
seed=7,
save_to_dir='results/c_neg',
save_format='jpeg',
save_prefix='CNEG'
)
sketch_neg=generator.flow_from_directory(
s_neg,
target_size=(image_size, image_size),
color_mode="rgb",
batch_size=batch_size,
class_mode=class_mode,
shuffle = False,
seed=7,
save_to_dir='results/s_neg',
save_format='jpeg',
save_prefix='SNEG'
)
while True:
camerapos = np.expand_dims(camera_pos.next(), axis=0)
sketchpos = np.expand_dims(sketch_pos.next(), axis=0)
cameraneg = np.expand_dims(camera_neg.next(), axis=0)
sketchneg = np.expand_dims(sketch_neg.next(), axis=0)
camera = np.concatenate((camerapos[0], cameraneg[0]))
sketch = np.concatenate((sketchpos[0], sketchneg[0]))
camera = np.asarray(list(camera), dtype=np.float32)
sketch = np.asarray(list(sketch), dtype=np.float32)
yield [camera, sketch]
test_datagen = initialize_generator(datagen_constructor, c_pos, s_pos, c_neg, s_neg)
# Load pre-trained model
model = load_model("model.h")
# Evaluating network performance on test dataset
predict = model.predict_generator(test_datagen, steps = 20)

You could manually iterate through each folder and make a prediction like this:
model = load_model("model.h")
image_paths = [image.path for image in os.scandir(path_to_my_folder)]
for image_path in image_paths:
image = cv2.imread(image_path)
image_to_predict = np.expand_dims(image,axis=0) # this is important to add the batch index, keras only predicts on batches and here we have batch of size 1
prediction = model.predict(image_to_predict)
Then, you could compare each prediction with the ground truth label you know it belongs to.

Related

Predicting single image using Tensorflow not being accurate

I'm trying to build a CNN model in order to classify an image, but whenever the training is done and I try to feed it a single image (from the training dataset) it misclassifies this image always.
Please take a look at the code I wrote below.
Thank you in advance.
First, I declared an Image Data Generator for both my training and testing sets:
train_datagen = ImageDataGenerator(rescale = 1./255, rotation_range=20, horizontal_flip = True,
validation_split=0.3)
test_datagen = ImageDataGenerator(rescale = 1./255,validation_split=0.3)
Then, I used the flow_from_directory() function to load the images:
train_generator = train_datagen.flow_from_directory(
data_dir,
shuffle=False,
subset='training',
target_size = (224, 224),
class_mode = 'categorical'
)
test_generator = test_datagen.flow_from_directory(
data_dir,
shuffle=False,
subset='validation',
target_size = (224, 224),
class_mode = 'categorical'
)
I then loaded a pretrained model and added a few layers to build my model:
pretrained_model = VGG16(weights="imagenet", include_top=False,
input_tensor=input_shape)
pretrained_model.trainable = False
model = tf.keras.Sequential([
pretrained_model,
Flatten(name="flatten"),
Dense(3, activation="softmax")
])
I then trained the model :
INIT_LR = 3e-4
EPOCHS = 15
opt = Adam(lr=INIT_LR)
model.compile(loss="categorical_crossentropy", optimizer='Adam', metrics=["accuracy"])
H = model.fit(
train_generator,
validation_data=test_generator,
epochs=EPOCHS,
verbose= 1)
Then came the part to predict a single image:
I chose an image that was part of the training set, I even overfitted the model to make sure the predictions should be correct, but it was giving me wrong results for every image I input to the model.
I tried the following ways:
image = image.load_img(url,target_size = (224, 224))
img = tf.keras.preprocessing.image.img_to_array(image)
img = np.array([img])
img = img.astype('float32') / 255.
img = tf.keras.applications.vgg16.preprocess_input(img)
This didn't work
image = cv2.imread(url)
image = cv2.normalize(image, None,beta=255, dtype=cv2.CV_32F)
image = cv2.resize(image, (224, 224))
image = np.expand_dims(image, axis=0)
This also didn't work, I also tried many other ways to predict a single image, but none worked.
Finally, the only way was that I had to create an Image Data Generator and Flow From Directory for this single image, and it worked, but I believe that's not how it should be done.
The code img = tf.keras.applications.vgg16.preprocess_input(img) scales the pixel
values in the image to values between -1 to +1 assuming the original pixel values are in the range 0 to 255. In the previous line of code
img = img.astype('float32') / 255.
You rescaled the pixels. So remove that line of code. Now to predict a single image you need to expand the dimensions with
img = np.expand_dims(img, axis=0)
In your second code effort be aware the CV2 reads in images as BGR. If your model was trained on RGB images then your predictions will be wrong. Use the code below to convert the image to RGB.
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
As a side note you can replace tf.keras.applications.vgg16.preprocess_input(img) with the function below which will scale the images between -1 to +1
def scalar(img):
return img/127.5 - 1
This answer could be one starting point:
Resnet50 produces different prediction when image loading and resizing is done with OpenCV
These are possible differences (short gist):
RGB vs BGR (OpenCV loads BGR)
The interpolation method used (INTER_LINEAR vs INTER_NEAREST).
img_to_array() transforms the data type into float32 rather than uint8 which is obtained by default when loading with OpenCV.
tf.keras.applications.vgg16.preprocess_input(img). This preprocessing function can actually differ from what you have written above as image preprocessing; it is also notable that, if you do not preprocess it while training in this particular way (preprocess_input()) then it also makes sense to have bad results on the test set, since the preprocessings are different.
Hope these observations shed some light.

Low Validation Score on Pretrained Alexnet from Pytorch models for ImageNet 2012 dataset

I am using pre-trained AlexNet network to validate some prior work.
The code is as follows:
import os
import torch
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)
model.eval()
batchsize = 50000
workers = 1
dataset_path = 'data/imagenet_2012/'
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
val_data = datasets.ImageFolder(root=os.path.join(dataset_path, 'val'), transform=transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize,]))
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batchsize, num_workers=workers)
batch = next(iter(val_loader))
images, labels = batch
with torch.no_grad():
output = model(images)
for i in output:
out_soft = torch.nn.functional.softmax(i, dim=0)
print(int(torch.argmax(out_soft)))
When I execute this and compare with ILSVRC2012_validation_ground_truth.txt, I get top-1 accuracy of 5% only.
What am I doing wrong here?
Thank you.
So, Pytorch/Caffe have their own "ground truth" files, which can be obtained from here:
https://gist.github.com/ksimonyan/fd8800eeb36e276cd6f9#note
I manually tested the images in the validation folder of the Imagenet dataset against the val.txt file in the tar file provided at the link above to verify the order.
Update:
New validation accuracy based on the groundtruth in the zip file in the link:
Top_1 = 56.522%
Top_5 = 79.066%

batching huge data in tensorflow

I am trying to perform binary classification using the code/tutorial from
https://github.com/eisenjulian/nlp_estimator_tutorial/blob/master/nlp_estimators.py
print("Loading data...")
(x_train_variable, y_train), (x_test_variable, y_test) = imdb.load_data(num_words=vocab_size)
print(len(y_train), "train sequences")
print(len(y_test), "test sequences")
print("Pad sequences (samples x time)")
x_train = sequence.pad_sequences(x_train_variable,
maxlen=sentence_size,
padding='post',
value=0)
x_test = sequence.pad_sequences(x_test_variable,
maxlen=sentence_size,
padding='post',
value=0)
print("x_train shape:", x_train.shape)
print("x_test shape:", x_test.shape)
def train_input_fn():
dataset = tf.data.Dataset.from_tensor_slices((x_train, x_len_train, y_train))
dataset = dataset.shuffle(buffer_size=len(x_train_variable))
dataset = dataset.batch(100)
dataset = dataset.map(parser)
dataset = dataset.repeat()
iterator = dataset.make_one_shot_iterator()
return iterator.get_next()
def eval_input_fn():
dataset = tf.data.Dataset.from_tensor_slices((x_test, x_len_test, y_test))
dataset = dataset.batch(100)
dataset = dataset.map(parser)
iterator = dataset.make_one_shot_iterator()
return iterator.get_next()
def cnn_model_fn(features, labels, mode, params):
input_layer = tf.contrib.layers.embed_sequence(
features['x'], vocab_size, embedding_size,
initializer=params['embedding_initializer'])
training = mode == tf.estimator.ModeKeys.TRAIN
dropout_emb = tf.layers.dropout(inputs=input_layer,
rate=0.2,
training=training)
conv = tf.layers.conv1d(
inputs=dropout_emb,
filters=32,
kernel_size=3,
padding="same",
activation=tf.nn.relu)
# Global Max Pooling
pool = tf.reduce_max(input_tensor=conv, axis=1)
hidden = tf.layers.dense(inputs=pool, units=250, activation=tf.nn.relu)
dropout_hidden = tf.layers.dropout(inputs=hidden,
rate=0.2,
training=training)
logits = tf.layers.dense(inputs=dropout_hidden, units=1)
# This will be None when predicting
if labels is not None:
labels = tf.reshape(labels, [-1, 1])
optimizer = tf.train.AdamOptimizer()
def _train_op_fn(loss):
return optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return head.create_estimator_spec(
features=features,
labels=labels,
mode=mode,
logits=logits,
train_op_fn=_train_op_fn)
cnn_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,
model_dir=os.path.join(model_dir, 'cnn'),
params=params)
train_and_evaluate(cnn_classifier)
The example here loads data from IMDB movie reviews. I have my own dataset in the form of text which is approx 2GB huge. Now in this example the line
(x_train_variable, y_train), (x_test_variable, y_test) = imdb.load_data(num_words=vocab_size) tries to load whole dataset in memory. If I try to do the same I run out of memory. How can I restructure this logic to read data in batches from my disk?
You want to change the dataset = tf.data.Dataset.from_tensor_slices((x_train, x_len_train, y_train)) line. There are lots of ways of creating a dataset - from_tensor_slices is the easiest, but won't work on its own if you can't load the entire dataset to memory.
The best way depends on how you have the data stored, or how you want to store it/manipulate it. The simplest in my opinion with very little down-side (unless running on multiple GPUs) is to have the original dataset just give indices to data, and write a normal numpy function for loading the ith example.
dataset = tf.data.Dataset.from_tensor_slices(tf.range(epoch_size))
def tf_map_fn(i):
def np_map_fn(i):
return load_ith_example(i)
inp1, inp2 = tf.py_func(np_map_fn, (i,), Tout=(tf.float32, tf.float32), stateful=False)
# other preprocessing/data augmentation goes here.
# unbatched sizes
inp1.set_shape(shape1)
inp2.set_shape(shape2)
return inp1, inp2
dataset = dataset.repeat().shuffle(epoch_size).map(tf_map_fn, 8)
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(1) # start loading data as GPU trains on previous batch
inp1, inp2 = dataset.make_one_shot_iterator().get_next()
Here I assume your outputs are float32 tensors (Tout=...). set_shape calls aren't strictly necessary, but if you know the shape it'll do better error checks.
So long as your preprocessing doesn't take longer than your network to run, this should run just as fast as any other method on a single GPU machine.
The other obvious way is to convert your data to tfrecords, but that'll take up more space on disk and is more of a pain to manage if you ask me.

Using Model file of convolutional neural network

I have trained my model and saved it using model.save.
How can i use model file to predict images.
I used this article How to predict input image using trained model in Keras? and used this codes
# Modify 'test1.jpg' and 'test2.jpg' to the images you want to predict on
from keras.models import load_model
from keras.preprocessing import image
import numpy as np
# dimensions of our images
img_width, img_height = 320, 240
# load the model we saved
model = load_model('model1.h5')
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
# predicting images
img = image.load_img('yes.jpeg', target_size=(img_width, img_height))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
images = np.vstack([x])
classes = model.predict_classes(images, batch_size=10)
print(classes)
# predicting multiple images at once
##img = image.load_img('yes.jpeg', target_size=(img_width, img_height))
##y = image.img_to_array(img)
##y = np.expand_dims(y, axis=0)
# pass the list of multiple images np.vstack()
##images = np.vstack([x, y])
##classes = model.predict_classes(images, batch_size=10)
# print the classes, the images belong to
print(classes)
print(classes[0])
print(classes[0][0])
but this result is
[[1]]
[[1]]
[1]
1
how can i convert it into class indices?
Do not recompile your model unless you want to train it again. simply load your model then predict.
Compiling will reset the weights.

How do I load custom image based datasets into Pytorch for use with a CNN?

I have searched for hours on the internet to find a good solution to my issue. Here is some relevant background information to help you answer my question.
This is my first ever deep learning project and I have no idea what I am doing. I know the theory but not the practical elements.
The data that I am using can be found on kaggle at this link:
(https://www.kaggle.com/alxmamaev/flowers-recognition)
I am aiming to classify flowers based on the images provided in the dataset using a CNN.
Here is some sample code I have tried to use to load data in so far, this is my best attempt but as I mentioned I am clueless and Pytorch docs didn't offer much help that I could understand at my level.
(https://pastebin.com/fNLVW1UW)
# Loads the images for use with the CNN.
def load_images(image_size=32, batch_size=64, root="../images"):
transform = transforms.Compose([
transforms.Resize(32),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_set = datasets.ImageFolder(root=root, train=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)
return train_loader
# Defining variables for use with the CNN.
classes = ('daisy', 'dandelion', 'rose', 'sunflower', 'tulip')
train_loader_data = load_images()
# Training samples.
n_training_samples = 3394
train_sampler = SubsetRandomSampler(np.arange(n_training_samples, dtype=np.int64))
# Validation samples.
n_val_samples = 424
val_sampler = SubsetRandomSampler(np.arange(n_training_samples, n_training_samples + n_val_samples, dtype=np.int64))
# Test samples.
n_test_samples = 424
test_sampler = SubsetRandomSampler(np.arange(n_test_samples, dtype=np.int64))
Here are my direct questions that I require answers too:
How do I fix my code to load in the dataset in an 80/10/10 split for training/test/validation?
How do i create the required labels/classes for these images which are already divided by folders in /images ?
Looking at the data from Kaggle and your code, there are problems in your data loading.
The data should be in a different folder per class label for PyTorch ImageFolder to load it correctly. In your case, since all the training data is in the same folder, PyTorch is loading it as one train set. You can correct this by using a folder structure like - train/daisy, train/dandelion, test/daisy, test/dandelion and then passing the train and the test folder to the train and test ImageFolder respectively. Just change the folder structure and you should be good. Take a look at the official documentation of torchvision.datasets.Imagefolder which has a similar example.
As you said, these images which are already divided by folders in /images. PyTorch ImageFolder assumes that images are organized in the following way. But this folder structure is only correct if you are using all the images for train set:
```
/images/daisy/100080576_f52e8ee070_n.jpg
/images/daisy/10140303196_b88d3d6cec.jpg
.
.
.
/images/dandelion/10043234166_e6dd915111_n.jpg
/images/dandelion/10200780773_c6051a7d71_n.jpg
```
where 'daisy', 'dandelion' etc. are class labels.
The correct folder structure if you want to split the dataset into train and test set in your case (note that I know you want to split the dataset into train, validation, and test set, but it doesn't matters as this is just an example to get the idea out):
```
/images/train/daisy/100080576_f52e8ee070_n.jpg
/images/train/daisy/10140303196_b88d3d6cec.jpg
.
.
/images/train/dandelion/10043234166_e6dd915111_n.jpg
/images/train/dandelion/10200780773_c6051a7d71_n.jpg
.
.
/images/test/daisy/300080576_f52e8ee070_n.jpg
/images/test/daisy/95140303196_b88d3d6cec.jpg
.
.
/images/test/dandelion/32143234166_e6dd915111_n.jpg
/images/test/dandelion/65200780773_c6051a7d71_n.jpg
```
Then, you can refer to the following full code example on how to write a dataloader:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.utils.data as data
import torchvision
from torchvision import transforms
EPOCHS = 2
BATCH_SIZE = 10
LEARNING_RATE = 0.003
TRAIN_DATA_PATH = "./images/train/"
TEST_DATA_PATH = "./images/test/"
TRANSFORM_IMG = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(256),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225] )
])
train_data = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=TRANSFORM_IMG)
train_data_loader = data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_data = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=TRANSFORM_IMG)
test_data_loader = data.DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
class CNN(nn.Module):
# omitted...
if __name__ == '__main__':
print("Number of train samples: ", len(train_data))
print("Number of test samples: ", len(test_data))
print("Detected Classes are: ", train_data.class_to_idx) # classes are detected by folder structure
model = CNN()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_func = nn.CrossEntropyLoss()
# Training and Testing
for epoch in range(EPOCHS):
for step, (x, y) in enumerate(train_data_loader):
b_x = Variable(x) # batch x (image)
b_y = Variable(y) # batch y (target)
output = model(b_x)[0]
loss = loss_func(output, b_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if step % 50 == 0:
test_x = Variable(test_data_loader)
test_output, last_layer = model(test_x)
pred_y = torch.max(test_output, 1)[1].data.squeeze()
accuracy = sum(pred_y == test_y) / float(test_y.size(0))
print('Epoch: ', epoch, '| train loss: %.4f' % loss.data[0], '| test accuracy: %.2f' % accuracy)
There now exists an easy package for the splitting, called 'split-folders'. See here.
E.g.
import splitfolders
splitfolders.ratio(image_path, output="output", seed=43, ratio=(.8,.1,.1))

Resources