Keras pre-trained model switching to varying input size - python-3.x

Very much similar to this question except I am wondering how I could take my pre-trained model which had an input size of (128, 128, 3) images, keep its weights, and use it to predict on images of varying input size.
I get this, as it is, when I try to input an image of arbitrary size:
Traceback (most recent call last):
File "arg_test.py", line 127, in <module>
predict(args)
File "arg_test.py", line 71, in predict
predictions.append(model.predict(input_img)[0]) # returns a list of lists, one for each image in the batch
File "C:\Users\payne\Anaconda3\envs\ml-gpu\lib\site-packages\keras\engine\training.py", line 1147, in predict
x, _, _ = self._standardize_user_data(x)
File "C:\Users\payne\Anaconda3\envs\ml-gpu\lib\site-packages\keras\engine\training.py", line 749, in _standardize_user_data
exception_prefix='input')
File "C:\Users\payne\Anaconda3\envs\ml-gpu\lib\site-packages\keras\engine\training_utils.py", line 137, in standardize_input_data
str(data_shape))
ValueError: Error when checking input: expected input_1 to have shape (128, 128, 3) but got array with shape (2736, 3648, 3)
Here is my model:
def setUpModel(x_train, y_train):
filters = 256
kernel_size = 3
strides = 1
# Head module
input = Input(shape=(img_height//scale_fact, img_width//scale_fact, img_depth))
conv0 = Conv2D(filters, kernel_size, strides=strides, padding='same')(input)
# Body module
res = Conv2D(filters, kernel_size, strides=strides, padding='same')(conv0)
act = ReLU()(res)
res = Conv2D(filters, kernel_size, strides=strides, padding='same')(act)
res_rec = Add()([conv0, res])
for i in range(res_blocks):
res1 = Conv2D(filters, kernel_size, strides=strides, padding='same')(res_rec)
act = ReLU()(res1)
res2 = Conv2D(filters, kernel_size, strides=strides, padding='same')(act)
res_rec = Add()([res_rec, res2])
conv = Conv2D(filters, kernel_size, strides=strides, padding='same')(res_rec)
add = Add()([conv0, conv])
# Tail module
conv = Conv2D(filters, kernel_size, strides=strides, padding='same')(add)
act = ReLU()(conv)
up = UpSampling2D(size=scale_fact if scale_fact != 4 else 2)(act) # TODO: try "Conv2DTranspose"
# mul = Multiply([np.zeros((img_width,img_height,img_depth)).fill(0.1), up])(up)
# When it's a 4X factor, we want the upscale split in two procedures
if(scale_fact == 4):
conv = Conv2D(filters, kernel_size, strides=strides, padding='same')(up)
act = ReLU()(conv)
up = UpSampling2D(size=2)(act) # TODO: try "Conv2DTranspose"
output = Conv2D(filters=3,
kernel_size=1,
strides=1,
padding='same')(up)
model = Model(inputs=input, outputs=output)
This was only the architecture of the model that was used during training, but tha training is behind: I have my model.h5 file obtained through model.save().
Here is how I get predictions:
import argparse
import numpy as np
import matplotlib.pyplot as plt
import skimage.io
from keras.models import load_model
from keras.optimizers import Adam
from keras.optimizers import Adadelta
from constants import save_dir
from constants import model_name
from constants import crops_p_img
from constants import tests_path
from constants import img_height
from constants import img_width
from constants import scale_fact
from utils import float_im
from utils import crop_center
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-a', '--amount', type=int, default=crops_p_img,
help='how many (cropped to 128x128) samples to predict from within the image')
parser.add_argument('image', type=str,
help='image name (example: "bird.png") that must be inside the "./input/" folder')
parser.add_argument('-m', '--model', type=str, default=model_name,
help='model name (in the "./save/" folder), followed by ".h5"')
parser.add_argument('-r', '--random', action="store_true", # if var is in args, set to TRUE, else, set to FALSE
help='flag that will select a random 128x128 area in the input image instead of the center')
parser.add_argument('-f', '--full', action="store_true", # if var is in args, set to TRUE, else, set to FALSE
help='(WIP) flag that will get the whole image to be processed by the network')
args = parser.parse_args()
def predict(args):
model = load_model(save_dir + '/' + args.model)
# Setting up the proper optimizer TODO: needed?
if args.model == "my_full_model.h5":
optimizer = Adadelta(lr=1.0,
rho=0.95,
epsilon=None,
decay=0.0)
else:
optimizer = Adam(lr=0.001,
beta_1=0.9,
beta_2=0.999,
epsilon=None,
decay=0.0,
amsgrad=False)
model.compile(optimizer=optimizer,
loss='mean_squared_error')
image = skimage.io.imread(tests_path + args.image)
if image.shape[0] == 128:
args.amount = 1
predictions = []
images = []
# TODO: integrate FULL IMAGE
# if args.full:
# images.append(image)
# # Hack because GPU can only handle one image at a time
# input_img = (np.expand_dims(images[0], 0)) # Add the image to a batch where it's the only member
# predictions.append(model.predict(input_img)[0]) # returns a list of lists, one for each image in the batch
# else:
if True:
for i in range(args.amount):
# Cropping to fit input size
if (args.random or args.amount > 1) and image.shape[0] > 128:
images.append(random_crop(image))
else:
images.append(crop_center(image, img_width//scale_fact, img_height//scale_fact))
input_img = (np.expand_dims(images[i], 0))
predictions.append(model.predict(input_img)[0])
for i in range(len(predictions)):
show_pred_output(images[i], predictions[i])
# adapted from: https://stackoverflow.com/a/52463034/9768291
def random_crop(img):
crop_h, crop_w = img_width//scale_fact, img_height//scale_fact
print("Shape of input image to crop:", img.shape[0], img.shape[1])
if (img.shape[0] >= crop_h) and (img.shape[1] >= crop_w):
# Cropping a random part of the image
rand_h = np.random.randint(0, img.shape[0]-crop_h)
rand_w = np.random.randint(0, img.shape[1]-crop_w)
print("Random position for the crop:", rand_h, rand_w)
tmp_img = img[rand_h:rand_h+crop_h, rand_w:rand_w+crop_w]
new_img = float_im(tmp_img) # From [0,255] to [0.,1.]
else:
return img
return new_img
def show_pred_output(input, pred):
plt.figure(figsize=(20, 20))
plt.suptitle("Results")
plt.subplot(1, 2, 1)
plt.title("Input: 128x128")
plt.imshow(input, cmap=plt.cm.binary).axes.get_xaxis().set_visible(False)
plt.subplot(1, 2, 2)
plt.title("Output: 512x512")
plt.imshow(pred, cmap=plt.cm.binary).axes.get_xaxis().set_visible(False)
plt.show()
if __name__ == '__main__':
print(" - ", args)
predict(args)

You should replace this line:
input = Input(shape=(None, None, img_depth))
None in an shape means variable size. Since the model is just convolutions it should work with images of any size.

After training your model with a specific input shape you can save the trained model weights by using model.save_weights() and then assign those weights to the model that has unknown shape by using model1.load_weights().
For example, i have trained the model with input-shape (28,28,1)
model=keras.Sequential([
keras.Input(shape=(28,28,1)),
keras.layers.Conv2D(32,kernel_size=(3,3),activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2,2)),
keras.layers.Conv2D(64,kernel_size=(3,3),activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2,2)),
keras.layers.GlobalAveragePooling2D(),
keras.layers.Dropout(0.5),
keras.layers.Dense(10,activation='softmax')
])
After training you can save model weights by
model.save_weights('model-weights')
Then define a model with unknown input shape
model2=keras.Sequential([
keras.Input(shape=(None,None,1)),
keras.layers.Conv2D(32,kernel_size=(3,3),activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2,2)),
keras.layers.Conv2D(64,kernel_size=(3,3),activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2,2)),
keras.layers.GlobalAveragePooling2D(),
keras.layers.Dropout(0.5),
keras.layers.Dense(10,activation='softmax')
])
Then assign the saved weights by
model2.load_weights('/content/model-weights')
Now you can predict with model 2 without training it. For more details, please refer to this gist. Thank You!

Related

How to solve "RuntimeError: Expected target size [6, 1, 224, 224], got [6, 3, 224, 224]"?

I got this error in DeepCrack. I tried to solve it with several searches but I didn't find any solutions. How can I overcome the target size issue? Firstly, I faced tensor tuple issues, but I applied a stack that solve the tuple problem. Now, I don't understand the size fitting. I don't what is actual size here of CNN is. Can anyone please help me?
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import random_split
import matplotlib.pyplot as plt
import time
import os
import copy
cudnn.benchmark = True
plt.ion() # interactive mode
from random import *
from tqdm.notebook import tqdm, trange
from time import sleep
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from warnings import filterwarnings
filterwarnings('ignore')
# functions to show an image
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
plt.savefig('labels.JPG')
## codes for data augmentation
train_trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5), ## tamim: image will move left and right
transforms.RandomVerticalFlip(p=0.5), ## tamim: image will come to eye vertically
transforms.RandomRotation(degrees=(.5, 5)), ## very small rotation of the cracks
transforms.ToTensor(),
transforms.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
)
])
test_trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5), ## tamim: image will move left and right
transforms.RandomVerticalFlip(p=0.5), ## tamim: image will come to eye vertically
transforms.RandomRotation(degrees=(.5, 5)), ## very small rotation of the cracks
transforms.ToTensor(),
transforms.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
)
])
## Load data
from torchvision.datasets import ImageFolder
data = ImageFolder('../Data/Data_Structure(Annotated)', transform=train_trans , )
test_folder= ImageFolder("../Data/DATA_iPhone_13_Pro_Max", transform=test_trans, )
batch_size = 6
num_classes = 4
learning_rate = 0.01
num_epochs = 10
print("Follwing classes are there : \n",data.classes)
classes = ('Alligator Cracks', 'Delamination', 'Longitudinal Cracks', 'Transverse Cracks')
len(data)
##Splitting Data and Prepare Batches:
## Source: https://medium.com/thecyphy/train-cnn-model-with-pytorch-21dafb918f48
val_size = 127 ## Tamim:30% data for validation ##
train_size = len(data) - val_size
train_loader,val_loader = random_split(data,[train_size,val_size]) ## To randomly split the images into training and testing, PyTorch provides random_split()
print(f"Length of Train Data : {len(train_loader)}") ## changed the folder names
print(f"Length of Validation Data : {len(val_loader)}")
# Splitting train and validation data on batches
train_loader = torch.utils.data.DataLoader(train_loader, shuffle=True, batch_size=batch_size) ## defined train data & val data
val_loader = torch.utils.data.DataLoader(val_loader, shuffle=True, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_folder, shuffle=False, batch_size=batch_size)
# visualize images of a single batch
dataiter = iter(train_loader)
images, labels = next(dataiter)
# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))
# model = models.vgg19(pretrained=True)
# print(model)
from torch import nn
import torch
import torch.nn.functional as F
def Conv3X3(in_, out):
return torch.nn.Conv2d(in_, out, 3, padding=1)
class ConvRelu(nn.Module):
def __init__(self, in_, out):
super().__init__()
self.conv = Conv3X3(in_, out)
self.activation = torch.nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.activation(x)
return x
class Down(nn.Module):
def __init__(self, nn):
super(Down,self).__init__()
self.nn = nn
self.maxpool_with_argmax = torch.nn.MaxPool2d(kernel_size=2, stride=2, return_indices=True)
def forward(self,inputs):
down = self.nn(inputs)
unpooled_shape = down.size()
outputs, indices = self.maxpool_with_argmax(down)
return outputs, down, indices, unpooled_shape
class Up(nn.Module):
def __init__(self, nn):
super().__init__()
self.nn = nn
self.unpool=torch.nn.MaxUnpool2d(2,2)
def forward(self,inputs,indices,output_shape):
outputs = self.unpool(inputs, indices=indices, output_size=output_shape)
outputs = self.nn(outputs)
return outputs
class Fuse(nn.Module):
def __init__(self, nn, scale):
super().__init__()
self.nn = nn
self.scale = scale
self.conv = Conv3X3(64,1)
def forward(self,down_inp,up_inp):
outputs = torch.cat([down_inp, up_inp], 1)
outputs = F.interpolate(outputs, scale_factor=self.scale, mode='bilinear')
outputs = self.nn(outputs)
return self.conv(outputs)
class DeepCrack(nn.Module):
def __init__(self, num_classes=1000):
super(DeepCrack, self).__init__()
self.down1 = Down(torch.nn.Sequential(
ConvRelu(3,64),
ConvRelu(64,64),
))
self.down2 = Down(torch.nn.Sequential(
ConvRelu(64,128),
ConvRelu(128,128),
))
self.down3 = Down(torch.nn.Sequential(
ConvRelu(128,256),
ConvRelu(256,256),
ConvRelu(256,256),
))
self.down4 = Down(torch.nn.Sequential(
ConvRelu(256, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
))
self.down5 = Down(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
))
self.up1 = Up(torch.nn.Sequential(
ConvRelu(64, 64),
ConvRelu(64, 64),
))
self.up2 = Up(torch.nn.Sequential(
ConvRelu(128, 128),
ConvRelu(128, 64),
))
self.up3 = Up(torch.nn.Sequential(
ConvRelu(256, 256),
ConvRelu(256, 256),
ConvRelu(256, 128),
))
self.up4 = Up(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 256),
))
self.up5 = Up(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
))
self.fuse5 = Fuse(ConvRelu(512 + 512, 64), scale=16)
self.fuse4 = Fuse(ConvRelu(512 + 256, 64), scale=8)
self.fuse3 = Fuse(ConvRelu(256 + 128, 64), scale=4)
self.fuse2 = Fuse(ConvRelu(128 + 64, 64), scale=2)
self.fuse1 = Fuse(ConvRelu(64 + 64, 64), scale=1)
self.final = Conv3X3(5,1)
def forward(self,inputs):
# encoder part
out, down1, indices_1, unpool_shape1 = self.down1(inputs)
out, down2, indices_2, unpool_shape2 = self.down2(out)
out, down3, indices_3, unpool_shape3 = self.down3(out)
out, down4, indices_4, unpool_shape4 = self.down4(out)
out, down5, indices_5, unpool_shape5 = self.down5(out)
# decoder part
up5 = self.up5(out, indices=indices_5, output_shape=unpool_shape5)
up4 = self.up4(up5, indices=indices_4, output_shape=unpool_shape4)
up3 = self.up3(up4, indices=indices_3, output_shape=unpool_shape3)
up2 = self.up2(up3, indices=indices_2, output_shape=unpool_shape2)
up1 = self.up1(up2, indices=indices_1, output_shape=unpool_shape1)
fuse5 = self.fuse5(down_inp=down5,up_inp=up5)
fuse4 = self.fuse4(down_inp=down4, up_inp=up4)
fuse3 = self.fuse3(down_inp=down3, up_inp=up3)
fuse2 = self.fuse2(down_inp=down2, up_inp=up2)
fuse1 = self.fuse1(down_inp=down1, up_inp=up1)
output = self.final(torch.cat([fuse5,fuse4,fuse3,fuse2,fuse1],1))
return output, fuse5, fuse4, fuse3, fuse2, fuse1
if __name__ == '__main__':
inp = torch.randn((1,3,512,512))
model = DeepCrack()
out = model(inp)
model = DeepCrack()
print(model)
# specify loss function
criterion = nn.CrossEntropyLoss()
# specify loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# number of epochs to train the model
n_epochs = 10
for epoch in range(1, n_epochs+1):
# monitor training loss
train_loss = 0.0
###################
# train the model #
###################
for data in train_loader:
# _ stands in for labels, here
# no need to flatten images
images, _ = data
# clear the gradients of all optimized variables
optimizer.zero_grad()
# forward pass: compute predicted outputs by passing inputs to the model
outputs = model(images)
outputs = torch.stack(outputs, dim=0, out=None) ## Tamim: converted the tuple of tensors to one.
outputs = outputs ## Changed shape
print(outputs.shape) ## Tamim: printed the target tensor shape to see
print(outputs) ## Tamim: printed the target tensors
# calculate the loss
loss = criterion(outputs, images)
# backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# perform a single optimization step (parameter update)
optimizer.step()
# update running training loss
train_loss += loss.item()*images.size(0)
# print avg training statistics
train_loss = train_loss/len(train_loader)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(
epoch,
train_loss
))
Traceback (most recent call last):
File "test_deepcrack.py", line 324, in <module>
loss = criterion(outputs, images)
File "/apps/pkg/pytorch/1.10.2/cuda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/apps/pkg/pytorch/1.10.2/cuda/lib/python3.8/site-packages/torch/nn/modules/loss.py", line 1150, in forward
return F.cross_entropy(input, target, weight=self.weight,
File "/apps/pkg/pytorch/1.10.2/cuda/lib/python3.8/site-packages/torch/nn/functional.py", line 2846, in cross_entropy
return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
RuntimeError: Expected target size [6, 1, 224, 224], got [6, 3, 224, 224]

OSError: [Errno 9] Bad file descriptor in tensorflow Estimater When deploying model on Multiple GPUs using tensorflow mirror strategy

I am trying to deploying deep learning model on two GPUs with single machine. I am utilizing TensorFlow mirror strategy. I am getting the following error:
Traceback (most recent call last):
Code
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
import os
import json
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
tf.logging.set_verbosity(tf.logging.INFO)
from tensorflow.keras.datasets import mnist
def cnn_model_fn(features, labels, mode):
input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])
input_layer = tf.cast(input_layer, tf.float32)
labels = tf.cast(labels, tf.int32)
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
dropout = tf.layers.dropout(
inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
logits = tf.layers.dense(inputs=dropout, units=10)
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode)
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(
labels=labels, predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def per_device_batch_size(batch_size, num_gpus):
if num_gpus <= 1:
return batch_size
remainder = batch_size % num_gpus
if remainder:
err = ('When running with multiple GPUs, batch size '
'must be a multiple of the number of available GPUs. Found {} '
'GPUs with a batch size of {}; try --batch_size={} instead.'
).format(num_gpus, batch_size, batch_size - remainder)
raise ValueError(err)
return int(batch_size / num_gpus)
class InputFnProvider:
def __init__(self, train_batch_size):
self.train_batch_size = train_batch_size
self.__load_data()
def __load_data(self):
# Load training and eval data
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
self.train_data = X_train # Returns np.array
self.train_labels = Y_train
self.eval_data = X_test # Returns np.array
self.eval_labels = Y_test
def train_input_fn(self):
dataset = tf.data.Dataset.from_tensor_slices(({"x": self.train_data}, self.train_labels))
dataset = dataset.shuffle(1000).repeat().batch(self.train_batch_size)
return dataset
def eval_input_fn(self):
"""An input function for evaluation or prediction"""
dataset = tf.data.Dataset.from_tensor_slices(({"x": self.eval_data}, self.eval_labels))
dataset = dataset.batch(1)
return dataset
def main(unused_argv):
batch_size = 100
num_gpus = 2
input_fn_provider = InputFnProvider(per_device_batch_size(batch_size, num_gpus))
if num_gpus > 1:
distribution = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"],
cross_device_ops=tf.distribute.HierarchicalCopyAllReduce())
else:
distribution = None
# Pass to RunConfig
config = tf.estimator.RunConfig(
train_distribute=distribution,
model_dir="/tmp/mnist_convnet_model")
mnist_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn,
config=config)
# Train the model
mnist_classifier.train(
input_fn=input_fn_provider.train_input_fn,
steps=1000)
eval_results = mnist_classifier.evaluate(input_fn=input_fn_provider.eval_input_fn)
print(eval_results)
if __name__ == "__main__":
tf.app.run()
Surprisingly when I deploy the model on a single GPU using the same code, it works; however, when I try to deploy the model on two GPUs via changing a bit in code, I face the above-given error. I do not know about this error. Can anyone help?

Deep learning for image training

I tried training images and predict the text in it. But when training images all together I was getting array error. But now I am training each image of a letter but I am getting some error. The image generator file is added where it helps to create the images and importing it to the fit generator.
Error:
Using TensorFlow backend.
WARNING: Logging before flag parsing goes to stderr.
W0826 09:18:45.040408 3428 deprecation_wrapper.py:119] From C:\Users\workspace\test\venv\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.
W0826 09:18:45.056031 3428 deprecation_wrapper.py:119] From C:\Users\workspace\test\venv\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.
W0826 09:18:45.071652 3428 deprecation_wrapper.py:119] From C:\Users\workspace\test\venv\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.
Traceback (most recent call last):
File "C:/Users/workspace/test/killme.py", line 22, in <module>
o2 = Reshape((len(string.ascii_uppercase), ), name="symbol_{}".format(i+1))(o)
File "C:\Users\workspace\test\venv\lib\site-packages\keras\engine\base_layer.py", line 474, in __call__
output_shape = self.compute_output_shape(input_shape)
File "C:\Users\workspace\test\venv\lib\site-packages\keras\layers\core.py", line 398, in compute_output_shape
input_shape[1:], self.target_shape)
File "C:\Users\workspace\test\venv\lib\site-packages\keras\layers\core.py", line 386, in _fix_unknown_dimension
raise ValueError(msg)
ValueError: total size of new array must be unchanged
Code:
from keras.models import Sequential, Input, Model
from keras.layers import Dense, Reshape
from keras.utils import to_categorical
from keras.layers.convolutional import Conv2D # to add convolutional layers
from keras.layers.convolutional import MaxPooling2D # to add pooling layers
from keras.layers import Flatten # to flatten data for fully connected layers
import string
from generators import ImageGenerator, BasicGenerator
from numpy import reshape
height=20
width=200
font_size=20
i1=Input(shape=(height, width, 1))
character_count=int(width / font_size)
outputs=[]
for i in range(character_count):
o = Dense(len(string.ascii_uppercase), activation='relu')(i1)
o2 = Reshape((len(string.ascii_uppercase), ), name="symbol_{}".format(i+1))(o)
outputs.append(o2)
string_model = Model(inputs=i1, outputs=outputs)
string_model.layers[2].layer.trainable = False
generator = ImageGenerator(height, width, font_size, character_count)
string_model.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=["categorical_accuracy"])
string_model.summary()
string_model.fit_generator(generator,epochs=10)
You have to preprocess text data before just putting into the output dense. Converting into vocab would be a better idea. Create a CaptionGenerator to make it simple as follows.
from vgg16 import VGG16
from keras.applications import inception_v3
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Embedding, TimeDistributed, Dense, RepeatVector, Merge, Activation, Flatten
from keras.preprocessing import image, sequence
from keras.callbacks import ModelCheckpoint
import cPickle as pickle
EMBEDDING_DIM = 128
class CaptionGenerator():
def __init__(self):
self.max_cap_len = None
self.vocab_size = None
self.index_word = None
self.word_index = None
self.total_samples = None
self.encoded_images = pickle.load( open( "encoded_images.p", "rb" ) )
self.variable_initializer()
def variable_initializer(self):
df = pd.read_csv('Flickr8k_text/flickr_8k_train_dataset.txt', delimiter='\t')
nb_samples = df.shape[0]
iter = df.iterrows()
caps = []
for i in range(nb_samples):
x = iter.next()
caps.append(x[1][1])
self.total_samples=0
for text in caps:
self.total_samples+=len(text.split())-1
print "Total samples : "+str(self.total_samples)
words = [txt.split() for txt in caps]
unique = []
for word in words:
unique.extend(word)
unique = list(set(unique))
self.vocab_size = len(unique)
self.word_index = {}
self.index_word = {}
for i, word in enumerate(unique):
self.word_index[word]=i
self.index_word[i]=word
max_len = 0
for caption in caps:
if(len(caption.split()) > max_len):
max_len = len(caption.split())
self.max_cap_len = max_len
print "Vocabulary size: "+str(self.vocab_size)
print "Maximum caption length: "+str(self.max_cap_len)
print "Variables initialization done!"
def data_generator(self, batch_size = 32):
partial_caps = []
next_words = []
images = []
print "Generating data..."
gen_count = 0
df = pd.read_csv('Flickr8k_text/flickr_8k_train_dataset.txt', delimiter='\t')
nb_samples = df.shape[0]
iter = df.iterrows()
caps = []
imgs = []
for i in range(nb_samples):
x = iter.next()
caps.append(x[1][1])
imgs.append(x[1][0])
total_count = 0
while 1:
image_counter = -1
for text in caps:
image_counter+=1
current_image = self.encoded_images[imgs[image_counter]]
for i in range(len(text.split())-1):
total_count+=1
partial = [self.word_index[txt] for txt in text.split()[:i+1]]
partial_caps.append(partial)
next = np.zeros(self.vocab_size)
next[self.word_index[text.split()[i+1]]] = 1
next_words.append(next)
images.append(current_image)
if total_count>=batch_size:
next_words = np.asarray(next_words)
images = np.asarray(images)
partial_caps = sequence.pad_sequences(partial_caps, maxlen=self.max_cap_len, padding='post')
total_count = 0
gen_count+=1
print "yielding count: "+str(gen_count)
yield [[images, partial_caps], next_words]
partial_caps = []
next_words = []
images = []
def load_image(self, path):
img = image.load_img(path, target_size=(224,224))
x = image.img_to_array(img)
return np.asarray(x)
def create_model(self, ret_model = False):
#base_model = VGG16(weights='imagenet', include_top=False, input_shape = (224, 224, 3))
#base_model.trainable=False
image_model = Sequential()
#image_model.add(base_model)
#image_model.add(Flatten())
image_model.add(Dense(EMBEDDING_DIM, input_dim = 4096, activation='relu'))
image_model.add(RepeatVector(self.max_cap_len))
lang_model = Sequential()
lang_model.add(Embedding(self.vocab_size, 256, input_length=self.max_cap_len))
lang_model.add(LSTM(256,return_sequences=True))
lang_model.add(TimeDistributed(Dense(EMBEDDING_DIM)))
model = Sequential()
model.add(Merge([image_model, lang_model], mode='concat'))
model.add(LSTM(1000,return_sequences=False))
model.add(Dense(self.vocab_size))
model.add(Activation('softmax'))
print "Model created!"
if(ret_model==True):
return model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
return model
def get_word(self,index):
return self.index_word[index]
Follow Link For more info: : https://github.com/arjun-kava/caption_generator

RuntimeError: expected type torch.cuda.FloatTensor but got torch.FloatTensor

I keep getting the error message below. I cannot seem to pinpoint to the tensor mentioned. Below you'll find the trainer.py and main.py modules. The model I am developing is GAN on CelebA dataset. I am running the code on a remote server so have spent a handful amount of time debugging my model.
This is the full error message:
Traceback (most recent call last):
File "main.py", line 52, in <module>
main(opt)
File "main.py", line 47, in main
trainer.train(train_loader)
File "/home/path/trainer.py", line 45, in train
d_loss_cls = F.binary_cross_entropy_with_logits(out_cls, label_org, size_average=False) / out_cls.size(0)
File "/home/path/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py", line 2077, in binary_cross_entropy_with_logits
return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)
RuntimeError: expected type torch.cuda.FloatTensor but got torch.FloatTensor
trainer.py
from tqdm import tqdm
import torch
import torch.nn.functional as F
from model import Discriminator, Generator
from tensorboardX import SummaryWriter
class Trainer():
def __init__(self, opt):
# Generator
self.G = Generator(64, 5, 6)
# Discriminator
self.D = Discriminator(128, 64, 5, 6)
# Generator optimizer
self.g_optimizer = torch.optim.Adam(self.G.parameters(), opt.lr)
self.d_optimizer = torch.optim.Adam(self.D.parameters(), opt.lr)
self.opt = opt
if self.opt.cuda:
self.G = self.G.cuda()
self.D = self.D.cuda()
def train(self, data_loader):
"""Function to train the model
"""
print('Training model')
writer_d = SummaryWriter('runs/disc') # discriminator writer
writer_g = SummaryWriter('runs/gen') # generator writer
print('Start training...')
for epoch in tqdm(range(self.opt.epochs)):
for x_real, label_org in tqdm(data_loader):
pass
# Generate target domain labels randomly.
rand_idx = torch.randperm(label_org.size(0))
label_trg = label_org[rand_idx]
c_org = label_org.clone()
c_trg = label_org.clone()
if self.opt.cuda:
x_real = x_real.cuda() # Input images
c_org = c_org.cuda() # Original domain labels
c_trg = c_trg.cuda() # Target domain labels
label_org = label_org.cuda() # Labels for computing classification loss
label_trg = label_trg.cuda() # Labels for computing classification loss
out_src, out_cls = self.D(x_real)
d_loss_real = - torch.mean(out_src)
d_loss_cls = F.binary_cross_entropy_with_logits(out_cls, label_org, size_average=False) / out_cls.size(0)
# Compute loss with fake images
x_fake = self.G(x_real, c_trg)
out_src, out_cls = self.D(x_fake.detach())
d_loss_fake = torch.mean(out_src)
# Compute loss for gradient penalty
alpha = torch.rand(x_real.size(0), 1, 1, 1).cuda()
x_hat = (alpha * x_real.data + (1 - alpha) * x_fake.data).requires_grad_(True)
out_src, _ = self.D(x_hat)
# Backward and optimize
d_loss = d_loss_real + d_loss_fake + d_loss_cls
self.g_optimizer.zero_grad()
self.d_optimizer.zero_grad()
d_loss.backward()
self.d_optimizer.step()
if (i + 1) % 2 == 0:
# Original-to-target domain
x_fake = self.G(x_real, c_trg)
out_src, out_cls = self.D(x_fake)
g_loss_fake = - torch.mean(out_src)
g_loss_cls = F.binary_cross_entropy_with_logits(out_cls, label_trg, size_average=False) / out_cls.size(0)
# Target-to-original domain
x_reconst = self.G(x_fake, c_org)
g_loss_rec = torch.mean(torch.abs(x_real - x_reconst))
# Backward and optimize
g_loss = g_loss_fake + g_loss_rec
self.g_optimizer.zero_grad()
self.d_optimizer.zero_grad()
g_loss.backward()
self.g_optimizer.step()
# write loss to tensorboard
writer_d.add_scalar('data/loss', d_loss, epoch)
writer_d.add_scalar('data/loss', g_loss, epoch)
print('Finished Training')
def test(self, data_loader):
with torch.no_grad():
for i, (x_real, c_org) in enumerate(data_loader):
# Prepare input images and target domain labels.
if self.opt.cuda:
x_real = x_real.cuda()
# Translate images.
x_fake_list = [x_real]
for c_trg in c_trg_list:
x_fake_list.append(self.G(x_real, c_trg))
main.py
import argparse
import random
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from preprocess import pre_process
from celeb_dataset import CelebDataset
from trainer import Trainer
# Setting up the argument parser
parser = argparse.ArgumentParser()
parser.add_argument('--workers', type=int, help='number of data loading workers', default=4)
parser.add_argument('--batchSize', type=int, default=8, help='input batch size')
parser.add_argument('--epochs', type=int, default=20, help='number of epochs to train')
parser.add_argument('--lr', type=float, default=0.0002, help='learning rate')
parser.add_argument('--cuda', action='store_true', help='enables cuda')
parser.add_argument('--manualSeed', type=int, help='manual seed')
parser.add_argument('--dataset_path', type=str, default='./data/celeba', help='dataset path')
opt = parser.parse_args()
print(opt)
if opt.manualSeed is None:
opt.manualSeed = random.randint(1, 10000)
print("Random Seed: ", opt.manualSeed)
def main(opt):
# Setup the parameters for the training/testing
params = {
'batch_size': opt.batchSize,
'shuffle': True,
'num_workers': opt.workers
}
# preprocess and setup dataset and datalader
processed_data = pre_process(opt.dataset_path)
train_dataset = CelebDataset(processed_data[:-2000])
test_dataset = CelebDataset(processed_data[2000:])
train_loader = DataLoader(train_dataset, **params)
test_loader = DataLoader(test_dataset, **params)
trainer = Trainer(opt)
trainer.train(train_loader)
trainer.test(test_loader)
if __name__ == "__main__":
main(opt)
You are getting that error because one of out_cls, label_org is not on the GPU.
Where does your code enact the parser.add_argument('--cuda', action='store_true', help='enables cuda') option?
Perhaps something like:
trainer = Trainer(opt)
if opt.cuda:
trainer = trainer.cuda()

What should the generator passed to predict_generator() return?

I am calling Keras predict_generator() like:
bottleneck_features_train = model.predict_generator(train_gen, len(telemetry))
where train_gen() is defined like
def train_gen():
# ...
yield (X, y)
and X is a numpy array with shape (48, 299, 299, 3), y is a numpy array with shape (48,)
I get the error below. What should I do instead?
Otherwise, a link to a working example would help. Only examples I have found are for Keras 1 or using ImageDataGenerator.flow().
I am running Keras 2.0.2.
Here the error:
Traceback (most recent call last):
File "/home/fanta/workspace/CarND-Behavioral-Cloning-P3/cache.py", line 143, in <module>
tf.app.run()
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/platform/app.py", line 44, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "/home/fanta/workspace/CarND-Behavioral-Cloning-P3/cache.py", line 138, in main
bottleneck_features_train = model.predict_generator(train_gen, len(telemetry))
File "/usr/local/lib/python3.5/dist-packages/keras/legacy/interfaces.py", line 88, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 2094, in predict_generator
outs = self.predict_on_batch(x)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1677, in predict_on_batch
self._feed_input_shapes)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 100, in _standardize_input_data
'Found: array with shape ' + str(data.shape))
ValueError: The model expects 0 input arrays, but only received one array. Found: array with shape (48, 299, 299, 3)
Process finished with exit code 1
===== UPDATE =====
The issue is not related to the generator. Here below a short program to reproduce it. Note that if you switch the network from inception to vgg, it works fine.
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg16 import VGG16
from keras.layers import Input, AveragePooling2D
from keras.models import Model
from keras.datasets import cifar10
from scipy.misc import imresize
import pickle
import tensorflow as tf
import keras.backend as K
import numpy as np
network='inception' # Must be 'inception' or 'vgg'
dataset='cifar10'
batch_size=64
if network == 'vgg':
size = (224, 224)
elif network == 'inception':
size = (299, 299)
else:
assert False, "network must be either 'inception' or 'vgg'"
def create_model():
input_tensor = Input(shape=(size[0], size[1], 3))
if network == 'inception':
model = InceptionV3(input_tensor=input_tensor, include_top=False)
x = model.output
x = AveragePooling2D((8, 8), strides=(8, 8))(x)
model = Model(model.input, x)
elif network == 'vgg':
model = VGG16(input_tensor=input_tensor, include_top=False)
x = model.output
x = AveragePooling2D((7, 7))(x)
model = Model(model.input, x)
else:
assert False
return model
def main():
# Download and load cifar10 dataset
(X_train, y_train), (_, _) = cifar10.load_data()
# Reduce the dataset to the first 1000 entries, to save memory and computation time
X_train = X_train[0:1000]
y_train = y_train[0:1000]
# Resize dataset images to comply with expected input image size
X_train = [imresize(image, size) for image in X_train]
X_train = np.array(X_train)
# File name where to save bottlenecked features
train_output_file = "{}_{}_{}.p".format(network, dataset, 'bottleneck_features_train')
print("Saving to", train_output_file)
with tf.Session() as sess:
K.set_session(sess)
K.set_learning_phase(1)
model = create_model()
# We skip pre-processing and bottleneck the features
bottleneck_features_train = model.predict(X_train, batch_size=batch_size, verbose=1)
data = {'features': bottleneck_features_train, 'labels': y_train}
pickle.dump(data, open(train_output_file, 'wb'))
if __name__ == '__main__':
main()
At prediction step your generator should only yield the input and not the targets. So only the X, not the y.
Does that help?

Resources