Use bounding box model in webcam - pytorch

I have a model trained using the following code:
https://jovian.ml/aakanksha-ns/road-signs-bounding-box-prediction/v/2?utm_source=embed
Then I saved the model using the following code:
torch.save(model.state_dict(), 'checkpoint.pth')
How can I use the save model and in webcam? I created the below code but it only works for the classification model and not for the bounding box with the classification model.
from PIL import Image
import time
#Load the saved model
# Create a neural net class
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.nn
import torchvision
import PIL
import cv2
from torchvision import models
class BB_model(nn.Module):
def __init__(self):
super(BB_model, self).__init__()
resnet = models.resnet34(pretrained=True)
layers = list(resnet.children())[:8]
self.features1 = nn.Sequential(*layers[:6])
self.features2 = nn.Sequential(*layers[6:])
self.classifier = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
self.bb = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
def forward(self, x):
x = self.features1(x)
x = self.features2(x)
x = F.relu(x)
x = nn.AdaptiveAvgPool2d((1, 1))(x)
x = x.view(x.shape[0], -1)
return self.classifier(x), self.bb(x)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") ##Assigning the Device which will do the calculation
model = BB_model()
model.load_state_dict(torch.load("checkpoint_bb.pth"))
model = model.to(device)
model.eval()
video = cv2.VideoCapture(0)
# used to record the time when we processed last frame
prev_frame_time = 0
# used to record the time at which we processed current frame
new_frame_time = 0
encoder = {0:"Standing"}
# Let's preprocess the inputted frame
data_transforms = torchvision.transforms.Compose([
torchvision.transforms.Resize(size=(128, 128)),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
def argmax(prediction):
prediction = prediction.cpu()
prediction = prediction.detach().numpy()
top_1 = np.argmax(prediction, axis=1)
score = np.amax(prediction)
score = '{:6f}'.format(score)
prediction = top_1[0]
result = encoder[prediction]
return result,score
def preprocess(image):
image = PIL.Image.fromarray(image) #Webcam frames are numpy array format
#Therefore transform back to PIL image
print(image)
image = data_transforms(image)
image = image.float()
#image = Variable(image, requires_autograd=True)
image = image.unsqueeze(0) #I don't know for sure but Resnet-50 model seems to only
#accpets 4-D Vector Tensor so we need to squeeze another
return image
while True:
_, frame = video.read()
image = frame[100:450, 150:570]
image_data = preprocess(image)
print(image_data)
prediction = model(image_data)
result, score = argmax(prediction)
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(frame,
f"{result}",
(50, 50),
font, 1,
(0, 0, 255),
2,
cv2.LINE_4)
# time when we finish processing for this frame
new_frame_time = time.time()
fps = 1 / (new_frame_time - prev_frame_time)
prev_frame_time = new_frame_time
cv2.putText(frame,
f"FPS: {round(fps,1)}",
(50, 80),
font, 1,
(255, 255, 0),
2,
cv2.LINE_4)
cv2.imshow("Capturing", frame)
key=cv2.waitKey(1)
if key == ord('q'):
break
video.release()
cv2.destroyAllWindows()
When I run the above code, I received the error stating that
prediction = prediction.cpu()
AttributeError: 'tuple' object has no attribute 'cpu'
Since my code was build for standard classification and not bounding box, the argmax function is not working and I need help on how to change it. Thank you

Related

make multiple parallel predictions on tensorflow model

I want to make multiple predictions.
I have trained a segmentation model (images and masks) . You can find the model here.
The images have dimensions (32,32,3). The masks (32, 32).
What I am doing when I want to inference is:
Load the images array (tiles) with dim (62500, 32, 32, 3). You can find it here
Create tensorflow dataset from this array.
and then predict on each image, like:
masks = []
for k, element in enumerate(the_image_array):
the_img = np.asarray(np.expand_dims(element, 0))[-1, -1, :, :]
pred = model.predict(the_img[np.newaxis, :, :, :])[0]
mask = tf.where(pred > 0.5, 255, 0)
masks.append(mask)
Now, I want to do these predictions in parallel.
So, I tried:
import tensorflow as tf
import numpy as np
import os
from tensorflow.keras.models import load_model
from itertools import chain
from tensorflow.keras import backend as K
import multiprocessing
from multiprocessing import Pool
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
multiprocessing.set_start_method('spawn', force=True)
model = load_model('./model.h5',
custom_objects={"K": K})
def resize_and_rescale(image):
image = tf.image.resize(image,
(32, 32),
preserve_aspect_ratio=True)
image /= 255.0
return image
def prepare(ds):
ds = ds.map(resize_and_rescale)
return ds
def _apply_df(data):
img = np.asarray(np.expand_dims(data, 0))[-1,-1, :, :]
print(img.shape)
pred = model.predict(img[np.newaxis, :, :, :], verbose=2)[0]
#pred = model.predict(data)[0]
mask = tf.where(pred[:, :, -1] > 0.5, 255, 0)
return mask
def apply_by_multiprocessing(data, workers):
pool = Pool(processes=workers)
#result = pool.map(_apply_df, np.array_split(list(data.as_numpy_iterator()), workers))
result = pool.map(_apply_df, data.batch(np.ceil(len(data) / workers)))
pool.close()
return list(result)
def after_prepare(data):
tens_data = tf.data.Dataset.from_tensor_slices(data)
tens_data = prepare(tens_data)
return tens_data
def main():
tiles = np.load('tiles.npy')
print(len(tiles))
print(tiles[0].shape)
prep = after_prepare(tiles)
print(len(prep))
masks = apply_by_multiprocessing(prep, workers=4)
masks_flatten = list(chain.from_iterable(masks))
print(len(masks_flatten), masks_flatten[0].shape) #
return masks_flatten
if __name__=="__main__":
masks_flatten = main()
The len(masks_flatten) is 128 and the shape of an element is (32,).
I would expect it to be len=62500 and every element (mask) (32, 32).
--- UPDATE ---
So, I want something like this:
def _apply_df(data):
results = []
for el in data:
pred = model.predict(el[np.newaxis, :, :, :], verbose=2)[0]
mask = tf.where(pred[:, :, -1] > 0.5, 255, 0)
results.append(mask)
return results
but without using the loop. Doing it in parallel.
Your approach is not incorrect, but even inside a single worker, it's better to let the TensorFlow/NumPy vectorization do its job instead of writing an explicit for loop:
def _apply_df(data):
pred = model.predict(data)
mask = tf.where(pred.squeeze(axis=-1) > 0.5, 255, 0)
return mask
This is the complete code:
import tensorflow as tf
import numpy as np
import os
from tensorflow.keras.models import load_model
from itertools import chain
from tensorflow.keras import backend as K
import multiprocessing
from multiprocessing import Pool
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
multiprocessing.set_start_method("spawn", force=True)
model = load_model("./model.h5", custom_objects={"K": K})
def resize_and_rescale(image):
image = tf.image.resize(image, (32, 32), preserve_aspect_ratio=True)
image /= 255.0
return image
def prepare(ds):
ds = ds.map(resize_and_rescale)
return ds
def _apply_df(data):
pred = model.predict(data)
mask = tf.where(pred.squeeze(axis=-1) > 0.5, 255, 0)
return mask
def apply_by_multiprocessing(data, workers):
pool = Pool(processes=workers)
# result = pool.map(_apply_df, np.array_split(list(data.as_numpy_iterator()), workers))
result = pool.map(_apply_df, data.batch(np.ceil(len(data) / workers)))
pool.close()
return list(result)
def after_prepare(data):
tens_data = tf.data.Dataset.from_tensor_slices(data)
tens_data = prepare(tens_data)
return tens_data
def main():
tiles = np.load("tiles.npy")
prep = after_prepare(tiles)
masks = apply_by_multiprocessing(prep, workers=4)
masks_flatten = list(chain.from_iterable(masks))
print(len(masks_flatten), masks_flatten[0].shape) # 62500 (32, 32)
return masks_flatten
if __name__ == "__main__":
masks_flatten = main()

Mnist model performing very badly on custom data

I have used the resnet50 prebuilt and pretrained model from pytorch, on the MNIST dataset,
from torch import nn
from torchvision.models import ResNet50_Weights, resnet50
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.model = resnet50(weights=ResNet50_Weights.DEFAULT)
self.model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
num_ftrs = self.model.fc.in_features
self.model.fc = nn.Linear(num_ftrs, 10)
def forward(self, x):
return self.model(x)
it performs very well and after training for 10 epochs it has achieved an incredible 99.895% accuracy on the 50,000 test images.
model.eval()
with torch.no_grad():
correct = 0
total = 0
for images, labels in train_loader:
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the {} train images: {} %'.format(50000, 100 * correct / total))
[out]: Accuracy of the network on the 50000 train images: 99.895 %
I have used pygame to easily create my own numbers to input to the model. I start off with a very basic program just placing circles while the left mouse button is held, then I save the generated image into a png format.
if event.type == pg.MOUSEMOTION:
if (drawing):
mouse_position = pg.mouse.get_pos()
pg.draw.circle(screen, color, mouse_position, w)
elif event.type == pg.MOUSEBUTTONUP:
mouse_position = (0, 0)
drawing = False
last_pos = None
elif event.type == pg.MOUSEBUTTONDOWN:
drawing = True
I convert the image to grayscale and scale it down to 28x28 and into a tensor using PIL and torch.PILToTensor().
image = Image.open("image.png").convert("L").resize((28,28),Image.Resampling.LANCZOS)
transform = Compose([
PILToTensor(),
Lambda(lambda image: image.view(-1, 1, 28, 28))
])
img_tensor = transform(image).to(torch.float)
Then I feed this image to the network. I get no errors or anything the model just predicts really badly. For example when I gave it this
image of a 2
this code outputed:
with torch.no_grad():
outputs = model(img_tensor)
print(outputs)
_, predicted = torch.max(outputs.data, 1)
print(predicted)
[out]: tensor([[ 20.6237, 0.4952, -15.5033, 8.5165, 1.0938, 2.8278, 2.0153,
3.2825, -6.2655, -0.6992]])
tensor([0])
The sureness is outputted as list with the sureness for each class 0, 1, 2, 3... so as you can see the sureness for "2" is actually negative, does anyone know why this could be and how I could solve it?
Thank you very much
I have solved this, the problem was that when I converted the image to a tensor it had values from 0-255 instead of 0-1, that's why the model was behaving so unpredictably.

ValueError: Output tensors of a Functional model must be the output of a TensorFlow `Layer` when using custom callback to plot conv layer feature maps

I'm trying to implement a custom callback to get the feature maps of each Conv2D layer in the network plotted in TensorBoard.
When I run the code in Example 1 I get the following error:
<ipython-input-44-b691dabedd05> in on_epoch_end(self, epoch, logs)
28
29 # 3) Build partial model
---> 30 partial_model = keras.Model(
31 inputs=self.model.model.input,
32 outputs=output_layers
ValueError: Output tensors of a Functional model must be the output of a TensorFlow `Layer` (thus holding past layer metadata). Found: <keras.engine.base_layer.Layer object at 0x000002773C631CA0>
which seams as if it can't build the partial network, which is strange, because it succeeds when running is separately from the main thread.
Here is an example that illustrates the issue:
Example 1
import os
import io
import datetime as dt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar10
import matplotlib.pyplot as plt
'''
You can adjust the verbosity of the logs which are being printed by TensorFlow
by changing the value of TF_CPP_MIN_LOG_LEVEL:
0 = all messages are logged (default behavior)
1 = INFO messages are not printed
2 = INFO and WARNING messages are not printed
3 = INFO, WARNING, and ERROR messages are not printed
'''
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
DEBUG = False
class ConvModel(keras.Model):
def __init__(self, input_shape):
super().__init__()
self.input_image_shape = input_shape
self.model = keras.Sequential([
layers.Input(shape=input_shape),
layers.Conv2D(32, 3),
layers.BatchNormalization(),
layers.ReLU(),
layers.MaxPool2D(),
layers.Conv2D(64, 5),
layers.BatchNormalization(),
layers.ReLU(),
layers.MaxPool2D(),
layers.Conv2D(128, 3, kernel_regularizer=keras.regularizers.l2(0.01)),
layers.BatchNormalization(),
layers.ReLU(),
layers.Flatten(),
layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
layers.Dropout(0.5),
layers.Dense(10)
])
def call(self, inputs):
return self.model(inputs)
def find_sub_string(string: str, sub_string: str):
return True if string.find(sub_string) > -1 else False
def get_file_type(file_name: str):
file_type = None
if isinstance(file_name, str):
dot_idx = file_name.find('.')
if dot_idx > -1:
file_type = file_name[dot_idx + 1:]
return file_type
def get_image_from_figure(figure):
buffer = io.BytesIO()
plt.savefig(buffer, format='png')
plt.close(figure)
buffer.seek(0)
image = tf.image.decode_png(buffer.getvalue(), channels=4)
image = tf.expand_dims(image, 0)
return image
class ConvLayerVis(keras.callbacks.Callback):
def __init__(self, X, figure_configs: dict, log_dir: str, log_interval: int):
super().__init__()
self.X_test = X
n_dims = len(self.X_test.shape)
assert 2 < n_dims < 5, f'The shape of the test image should be less than 5 and grater than 2, but current shape is {self.X_test.shape}'
# In case the image is not represented as a tensor - add a dimension to the left for the batch
if len(self.X_test.shape) < 4:
self.X_test = np.reshape(self.X_test, (1,) + self.X_test.shape)
self.file_writer = tf.summary.create_file_writer(log_dir)
self.figure_configs = figure_configs
self.log_interval = log_interval
def on_training_begin(self, logs=None):
pass
def on_epoch_end(self, epoch, logs=None):
# 1) Get the layers
if epoch % self.log_interval == 0:
# 1) Get the layers
output_layer_tuples = [(idx, layer) for idx, layer in enumerate(self.model.model.layers) if find_sub_string(layer.name, 'conv2d') or find_sub_string(layer.name, 'max_pooling2d')]
output_layers = [layer_tuple[1] for layer_tuple in output_layer_tuples]
# 2) Get the layer names
conv_layer_name_tuples = [(layer_tuple[0], f'Layer #{layer_tuple[0]} - Conv 2D ') for layer_tuple in output_layer_tuples if find_sub_string(layer_tuple[1].name, 'conv2d')]
max_pool_layer_name_tuples = [(layer_tuple[0], f'Layer #{layer_tuple[0]} - Max Pooling 2D') for layer_tuple in output_layer_tuples if find_sub_string(layer_tuple[1].name, 'max_pooling2d')]
layer_name_tuples = (conv_layer_name_tuples + max_pool_layer_name_tuples)
layer_name_tuples.sort(key=lambda x: x[0])
layer_names = [layer_name_tuple[1] for layer_name_tuple in layer_name_tuples]
# 3) Build partial model
partial_model = keras.Model(
inputs=model.model.input,
outputs=output_layers
)
# 4) Get the feature maps
feature_maps = partial_model.predict(self.X_test)
# 5) Plot
rows, cols = self.figure_configs.get('rows'), self.figure_configs.get('cols')
for feature_map, layer_name in zip(feature_maps, layer_names):
fig, ax = plt.subplots(rows, cols, figsize=self.figure_configs.get('figsize'))
for row in range(rows):
for col in range(cols):
ax[row][col].imshow(feature_map[0, :, :, row+col], cmap=self.figure_configs.get('cmap'))
fig.suptitle(f'{layer_name}')
with self.file_writer.as_default():
tf.summary.image(f'{layer_name} Feature Maps', get_image_from_figure(figure=fig), step=epoch)
if __name__ == '__main__':
print(tf.config.list_physical_devices('GPU'))
# Load the data
(X, y), (X_test, y_test) = cifar10.load_data()
X, X_test = X.astype(np.float32) / 255.0, X_test.astype(np.float32) / 255.0
n, w, h, c = X.shape[0], X.shape[1], X.shape[2], X.shape[3]
n_test, w_test, h_test, c_test = X_test.shape[0], X_test.shape[1], X_test.shape[2], X_test.shape[3]
print(f'''
Dataset Stats:
Number of train images: {n}
Dimensions:
> Train:
width = {w}, height = {h}, channels = {c}
> Test:
width = {w_test}, height = {h_test}, channels = {c_test}
''')
# Model with keras.Sequential
model = ConvModel(input_shape=(w, h, c))
model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=keras.optimizers.Adam(learning_rate=3e-4), metrics=['accuracy'])
log_dir = f'./logs/{dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}'
callbacks = [
keras.callbacks.TensorBoard(
log_dir=log_dir,
write_images=True
),
ConvLayerVis(
X=X[0],
figure_configs=dict(rows=5, cols=5, figsize=(35, 35), cmap='gray'),
log_dir=f'{log_dir}/train',
log_interval=3
)
]
model.fit(
X,
y,
batch_size=64,
epochs=15,
callbacks=callbacks
)
Thanks in advance for any help regarding this issue.
Just figured out the problem:
output_layers = [layer_tuple[1].output for layer_tuple in output_layer_tuples]
Should have recovered the output attribute of each layer.

get InvalidArgumentError when using tf.image.resize_bilinear in Keras with multi-gpu environment

I use tf.image.resize_bilinear in a segmentation network, It seems this function does not support by multi-gpu model. The following code shows the simplified situation: (which can be run directly)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1'
from keras.backend.tensorflow_backend import set_session
from keras import backend as K
from keras.utils import multi_gpu_model
from keras.applications.mobilenet_v2 import preprocess_input
import tensorflow as tf
import numpy as np
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
set_session(sess)
batch = 4
num_classes = 2
size = 128
K.clear_session()
def _GetRandomImg():
shape = (batch, size, size, 3)
img = np.random.randint(low=0, high=256, size=shape)
return preprocess_input(img)
def _GetRandomLabel():
shape = (batch, size, size, num_classes)
label = np.random.randint(low=0, high=num_classes, size=shape)
label = np.exp(label)
label = label/ np.sum(label, axis=-1, keepdims=True)
return label
def DataGen():
while True:
x = _GetRandomImg()
y = _GetRandomLabel()
yield x, y
from keras.layers import Input, Conv2D, Lambda
from keras import Model
def GetModel():
inputs = Input(shape=(size, size, 3))
f = lambda x: tf.image.resize_bilinear(inputs, (size, size), align_corners=True)
x = Lambda(f, output_shape=(size, size, 3))(inputs)
outputs = Conv2D(num_classes, kernel_size=3, padding='same')(x)
model = Model(inputs=[inputs], outputs=[outputs])
return model
gen = DataGen()
with tf.device('/cpu:0'):
model = GetModel()
model = multi_gpu_model(model, gpus=2)
model.compile(loss='categorical_crossentropy', optimizer='sgd')
result = model.fit_generator(gen, epochs=2, verbose = 1, steps_per_epoch = 100)
it works fine with single gpu environment, but in multi-gpu environment, I got the following error:
InvalidArgumentError: Incompatible shapes: [3,128,128,2] vs. [6,128,128,2]
[[{{node loss/conv2d_1_loss/categorical_crossentropy/mul}}]]
[[{{node training/SGD/gradients/conv2d_1_1/concat_grad/Slice_1}}]]
the problem is solved. If tensorflow function is used in a customized Lambda layer, it is needed to explicitly use set_shape() function:
def MyResizeBilinear(x, height, width):
rows, cols = 1, 2
original_shape = K.int_shape(x)
new_shape = tf.constant(np.array([height, width], dtype='int32'))
x = tf.image.resize_bilinear(x, new_shape, align_corners=True)
new_height = None if original_shape[rows] is None else height
new_width = None if original_shape[cols] is None else width
output_shape = (None, new_height, new_width, None)
x.set_shape(output_shape)
return x

Keras pre-trained model switching to varying input size

Very much similar to this question except I am wondering how I could take my pre-trained model which had an input size of (128, 128, 3) images, keep its weights, and use it to predict on images of varying input size.
I get this, as it is, when I try to input an image of arbitrary size:
Traceback (most recent call last):
File "arg_test.py", line 127, in <module>
predict(args)
File "arg_test.py", line 71, in predict
predictions.append(model.predict(input_img)[0]) # returns a list of lists, one for each image in the batch
File "C:\Users\payne\Anaconda3\envs\ml-gpu\lib\site-packages\keras\engine\training.py", line 1147, in predict
x, _, _ = self._standardize_user_data(x)
File "C:\Users\payne\Anaconda3\envs\ml-gpu\lib\site-packages\keras\engine\training.py", line 749, in _standardize_user_data
exception_prefix='input')
File "C:\Users\payne\Anaconda3\envs\ml-gpu\lib\site-packages\keras\engine\training_utils.py", line 137, in standardize_input_data
str(data_shape))
ValueError: Error when checking input: expected input_1 to have shape (128, 128, 3) but got array with shape (2736, 3648, 3)
Here is my model:
def setUpModel(x_train, y_train):
filters = 256
kernel_size = 3
strides = 1
# Head module
input = Input(shape=(img_height//scale_fact, img_width//scale_fact, img_depth))
conv0 = Conv2D(filters, kernel_size, strides=strides, padding='same')(input)
# Body module
res = Conv2D(filters, kernel_size, strides=strides, padding='same')(conv0)
act = ReLU()(res)
res = Conv2D(filters, kernel_size, strides=strides, padding='same')(act)
res_rec = Add()([conv0, res])
for i in range(res_blocks):
res1 = Conv2D(filters, kernel_size, strides=strides, padding='same')(res_rec)
act = ReLU()(res1)
res2 = Conv2D(filters, kernel_size, strides=strides, padding='same')(act)
res_rec = Add()([res_rec, res2])
conv = Conv2D(filters, kernel_size, strides=strides, padding='same')(res_rec)
add = Add()([conv0, conv])
# Tail module
conv = Conv2D(filters, kernel_size, strides=strides, padding='same')(add)
act = ReLU()(conv)
up = UpSampling2D(size=scale_fact if scale_fact != 4 else 2)(act) # TODO: try "Conv2DTranspose"
# mul = Multiply([np.zeros((img_width,img_height,img_depth)).fill(0.1), up])(up)
# When it's a 4X factor, we want the upscale split in two procedures
if(scale_fact == 4):
conv = Conv2D(filters, kernel_size, strides=strides, padding='same')(up)
act = ReLU()(conv)
up = UpSampling2D(size=2)(act) # TODO: try "Conv2DTranspose"
output = Conv2D(filters=3,
kernel_size=1,
strides=1,
padding='same')(up)
model = Model(inputs=input, outputs=output)
This was only the architecture of the model that was used during training, but tha training is behind: I have my model.h5 file obtained through model.save().
Here is how I get predictions:
import argparse
import numpy as np
import matplotlib.pyplot as plt
import skimage.io
from keras.models import load_model
from keras.optimizers import Adam
from keras.optimizers import Adadelta
from constants import save_dir
from constants import model_name
from constants import crops_p_img
from constants import tests_path
from constants import img_height
from constants import img_width
from constants import scale_fact
from utils import float_im
from utils import crop_center
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-a', '--amount', type=int, default=crops_p_img,
help='how many (cropped to 128x128) samples to predict from within the image')
parser.add_argument('image', type=str,
help='image name (example: "bird.png") that must be inside the "./input/" folder')
parser.add_argument('-m', '--model', type=str, default=model_name,
help='model name (in the "./save/" folder), followed by ".h5"')
parser.add_argument('-r', '--random', action="store_true", # if var is in args, set to TRUE, else, set to FALSE
help='flag that will select a random 128x128 area in the input image instead of the center')
parser.add_argument('-f', '--full', action="store_true", # if var is in args, set to TRUE, else, set to FALSE
help='(WIP) flag that will get the whole image to be processed by the network')
args = parser.parse_args()
def predict(args):
model = load_model(save_dir + '/' + args.model)
# Setting up the proper optimizer TODO: needed?
if args.model == "my_full_model.h5":
optimizer = Adadelta(lr=1.0,
rho=0.95,
epsilon=None,
decay=0.0)
else:
optimizer = Adam(lr=0.001,
beta_1=0.9,
beta_2=0.999,
epsilon=None,
decay=0.0,
amsgrad=False)
model.compile(optimizer=optimizer,
loss='mean_squared_error')
image = skimage.io.imread(tests_path + args.image)
if image.shape[0] == 128:
args.amount = 1
predictions = []
images = []
# TODO: integrate FULL IMAGE
# if args.full:
# images.append(image)
# # Hack because GPU can only handle one image at a time
# input_img = (np.expand_dims(images[0], 0)) # Add the image to a batch where it's the only member
# predictions.append(model.predict(input_img)[0]) # returns a list of lists, one for each image in the batch
# else:
if True:
for i in range(args.amount):
# Cropping to fit input size
if (args.random or args.amount > 1) and image.shape[0] > 128:
images.append(random_crop(image))
else:
images.append(crop_center(image, img_width//scale_fact, img_height//scale_fact))
input_img = (np.expand_dims(images[i], 0))
predictions.append(model.predict(input_img)[0])
for i in range(len(predictions)):
show_pred_output(images[i], predictions[i])
# adapted from: https://stackoverflow.com/a/52463034/9768291
def random_crop(img):
crop_h, crop_w = img_width//scale_fact, img_height//scale_fact
print("Shape of input image to crop:", img.shape[0], img.shape[1])
if (img.shape[0] >= crop_h) and (img.shape[1] >= crop_w):
# Cropping a random part of the image
rand_h = np.random.randint(0, img.shape[0]-crop_h)
rand_w = np.random.randint(0, img.shape[1]-crop_w)
print("Random position for the crop:", rand_h, rand_w)
tmp_img = img[rand_h:rand_h+crop_h, rand_w:rand_w+crop_w]
new_img = float_im(tmp_img) # From [0,255] to [0.,1.]
else:
return img
return new_img
def show_pred_output(input, pred):
plt.figure(figsize=(20, 20))
plt.suptitle("Results")
plt.subplot(1, 2, 1)
plt.title("Input: 128x128")
plt.imshow(input, cmap=plt.cm.binary).axes.get_xaxis().set_visible(False)
plt.subplot(1, 2, 2)
plt.title("Output: 512x512")
plt.imshow(pred, cmap=plt.cm.binary).axes.get_xaxis().set_visible(False)
plt.show()
if __name__ == '__main__':
print(" - ", args)
predict(args)
You should replace this line:
input = Input(shape=(None, None, img_depth))
None in an shape means variable size. Since the model is just convolutions it should work with images of any size.
After training your model with a specific input shape you can save the trained model weights by using model.save_weights() and then assign those weights to the model that has unknown shape by using model1.load_weights().
For example, i have trained the model with input-shape (28,28,1)
model=keras.Sequential([
keras.Input(shape=(28,28,1)),
keras.layers.Conv2D(32,kernel_size=(3,3),activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2,2)),
keras.layers.Conv2D(64,kernel_size=(3,3),activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2,2)),
keras.layers.GlobalAveragePooling2D(),
keras.layers.Dropout(0.5),
keras.layers.Dense(10,activation='softmax')
])
After training you can save model weights by
model.save_weights('model-weights')
Then define a model with unknown input shape
model2=keras.Sequential([
keras.Input(shape=(None,None,1)),
keras.layers.Conv2D(32,kernel_size=(3,3),activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2,2)),
keras.layers.Conv2D(64,kernel_size=(3,3),activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2,2)),
keras.layers.GlobalAveragePooling2D(),
keras.layers.Dropout(0.5),
keras.layers.Dense(10,activation='softmax')
])
Then assign the saved weights by
model2.load_weights('/content/model-weights')
Now you can predict with model 2 without training it. For more details, please refer to this gist. Thank You!

Resources