I tried training images and predict the text in it. But when training images all together I was getting array error. But now I am training each image of a letter but I am getting some error. The image generator file is added where it helps to create the images and importing it to the fit generator.
Error:
Using TensorFlow backend.
WARNING: Logging before flag parsing goes to stderr.
W0826 09:18:45.040408 3428 deprecation_wrapper.py:119] From C:\Users\workspace\test\venv\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.
W0826 09:18:45.056031 3428 deprecation_wrapper.py:119] From C:\Users\workspace\test\venv\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.
W0826 09:18:45.071652 3428 deprecation_wrapper.py:119] From C:\Users\workspace\test\venv\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.
Traceback (most recent call last):
File "C:/Users/workspace/test/killme.py", line 22, in <module>
o2 = Reshape((len(string.ascii_uppercase), ), name="symbol_{}".format(i+1))(o)
File "C:\Users\workspace\test\venv\lib\site-packages\keras\engine\base_layer.py", line 474, in __call__
output_shape = self.compute_output_shape(input_shape)
File "C:\Users\workspace\test\venv\lib\site-packages\keras\layers\core.py", line 398, in compute_output_shape
input_shape[1:], self.target_shape)
File "C:\Users\workspace\test\venv\lib\site-packages\keras\layers\core.py", line 386, in _fix_unknown_dimension
raise ValueError(msg)
ValueError: total size of new array must be unchanged
Code:
from keras.models import Sequential, Input, Model
from keras.layers import Dense, Reshape
from keras.utils import to_categorical
from keras.layers.convolutional import Conv2D # to add convolutional layers
from keras.layers.convolutional import MaxPooling2D # to add pooling layers
from keras.layers import Flatten # to flatten data for fully connected layers
import string
from generators import ImageGenerator, BasicGenerator
from numpy import reshape
height=20
width=200
font_size=20
i1=Input(shape=(height, width, 1))
character_count=int(width / font_size)
outputs=[]
for i in range(character_count):
o = Dense(len(string.ascii_uppercase), activation='relu')(i1)
o2 = Reshape((len(string.ascii_uppercase), ), name="symbol_{}".format(i+1))(o)
outputs.append(o2)
string_model = Model(inputs=i1, outputs=outputs)
string_model.layers[2].layer.trainable = False
generator = ImageGenerator(height, width, font_size, character_count)
string_model.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=["categorical_accuracy"])
string_model.summary()
string_model.fit_generator(generator,epochs=10)
You have to preprocess text data before just putting into the output dense. Converting into vocab would be a better idea. Create a CaptionGenerator to make it simple as follows.
from vgg16 import VGG16
from keras.applications import inception_v3
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Embedding, TimeDistributed, Dense, RepeatVector, Merge, Activation, Flatten
from keras.preprocessing import image, sequence
from keras.callbacks import ModelCheckpoint
import cPickle as pickle
EMBEDDING_DIM = 128
class CaptionGenerator():
def __init__(self):
self.max_cap_len = None
self.vocab_size = None
self.index_word = None
self.word_index = None
self.total_samples = None
self.encoded_images = pickle.load( open( "encoded_images.p", "rb" ) )
self.variable_initializer()
def variable_initializer(self):
df = pd.read_csv('Flickr8k_text/flickr_8k_train_dataset.txt', delimiter='\t')
nb_samples = df.shape[0]
iter = df.iterrows()
caps = []
for i in range(nb_samples):
x = iter.next()
caps.append(x[1][1])
self.total_samples=0
for text in caps:
self.total_samples+=len(text.split())-1
print "Total samples : "+str(self.total_samples)
words = [txt.split() for txt in caps]
unique = []
for word in words:
unique.extend(word)
unique = list(set(unique))
self.vocab_size = len(unique)
self.word_index = {}
self.index_word = {}
for i, word in enumerate(unique):
self.word_index[word]=i
self.index_word[i]=word
max_len = 0
for caption in caps:
if(len(caption.split()) > max_len):
max_len = len(caption.split())
self.max_cap_len = max_len
print "Vocabulary size: "+str(self.vocab_size)
print "Maximum caption length: "+str(self.max_cap_len)
print "Variables initialization done!"
def data_generator(self, batch_size = 32):
partial_caps = []
next_words = []
images = []
print "Generating data..."
gen_count = 0
df = pd.read_csv('Flickr8k_text/flickr_8k_train_dataset.txt', delimiter='\t')
nb_samples = df.shape[0]
iter = df.iterrows()
caps = []
imgs = []
for i in range(nb_samples):
x = iter.next()
caps.append(x[1][1])
imgs.append(x[1][0])
total_count = 0
while 1:
image_counter = -1
for text in caps:
image_counter+=1
current_image = self.encoded_images[imgs[image_counter]]
for i in range(len(text.split())-1):
total_count+=1
partial = [self.word_index[txt] for txt in text.split()[:i+1]]
partial_caps.append(partial)
next = np.zeros(self.vocab_size)
next[self.word_index[text.split()[i+1]]] = 1
next_words.append(next)
images.append(current_image)
if total_count>=batch_size:
next_words = np.asarray(next_words)
images = np.asarray(images)
partial_caps = sequence.pad_sequences(partial_caps, maxlen=self.max_cap_len, padding='post')
total_count = 0
gen_count+=1
print "yielding count: "+str(gen_count)
yield [[images, partial_caps], next_words]
partial_caps = []
next_words = []
images = []
def load_image(self, path):
img = image.load_img(path, target_size=(224,224))
x = image.img_to_array(img)
return np.asarray(x)
def create_model(self, ret_model = False):
#base_model = VGG16(weights='imagenet', include_top=False, input_shape = (224, 224, 3))
#base_model.trainable=False
image_model = Sequential()
#image_model.add(base_model)
#image_model.add(Flatten())
image_model.add(Dense(EMBEDDING_DIM, input_dim = 4096, activation='relu'))
image_model.add(RepeatVector(self.max_cap_len))
lang_model = Sequential()
lang_model.add(Embedding(self.vocab_size, 256, input_length=self.max_cap_len))
lang_model.add(LSTM(256,return_sequences=True))
lang_model.add(TimeDistributed(Dense(EMBEDDING_DIM)))
model = Sequential()
model.add(Merge([image_model, lang_model], mode='concat'))
model.add(LSTM(1000,return_sequences=False))
model.add(Dense(self.vocab_size))
model.add(Activation('softmax'))
print "Model created!"
if(ret_model==True):
return model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
return model
def get_word(self,index):
return self.index_word[index]
Follow Link For more info: : https://github.com/arjun-kava/caption_generator
Related
I'm trying to one time run my model and see if it's working. I've searched the error and suggested answers was to instantiate the model once. I already did that. However, even after instantiating the model. it gives an error once requesting access to model parametes. What's the problem?
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from IPython import display
display.set_matplotlib_formats('svg')
data = np.array([[1,1]])
label = np.array([2])
for i in range(-50,51,1):
data = np.append(data, [[i,i]], axis=0)
label = np.append(label, [i+i])
# conver to tensor
T_data = torch.tensor(data).float()
T_label = torch.tensor(label).long()
# split data
train_data, test_data, train_label, test_label = train_test_split(T_data, T_label, test_size= .2)
# convert into Pytorch dataset
train_data = TensorDataset(train_data, train_label)
test_data = TensorDataset(test_data, test_label)
# translate into dataloader
batchsize = 32
train_loader = DataLoader(train_data, batch_size= batchsize, shuffle =True, drop_last=True)
test_loader = DataLoader(test_data, batch_size=test_data.tensors[0].shape[0])
class AddNN(nn.Module):
def __init__(self):
super().__init__()
self.input = nn.Linear(2,16)
## hidden layer
self.fc1 = nn.Linear(16,32)
self.fc2 = nn.Linear(32,1)
#output layer
self.output = nn.Linear(1,1)
# forward pass
def forward(self, x):
x = F.relu(self.input(x))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return self.output(x)
net =AddNN()
lossfun = nn.MSELoss()
optimizer = torch.optim.Adam(AddNN.parameters(), lr = .05)
It returns TypeError: parameters() missing 1 required positional argument: 'self'
What it means to have missing a required positional argument here??
After model instantiation, you should use that model object instead of the model class inside Adam and with .parameters()
net = AddNN()
lossfun = nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr = .05)
The dataset that I am using is the standard chest Xray dataset https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia. Have been getting this error (tuple index out of range) while fitting the CNN model. Is there a way to circumvent this issue? I suppose argument "validation_data" needs to be appended in some way.
import os
import glob
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
import random
#from pathlib import path
import pathlib2 as pathlib
from pathlib2 import Path
#from keras.models import sequential, Model, load_model
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, Activation
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import Callback, EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import confusion_matrix
from tensorflow.keras import backend as K
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
%matplotlib inline
import shutup; shutup.please()
# DATA PATH #
print (os.listdir("C:/Users/Syd_R/OneDrive/Desktop/Peeumonia_data/archive/chest_xray/chest_xray/"))
data_dir = Path("C:/Users/Syd_R/OneDrive/Desktop/Peeumonia_data/archive/chest_xray/chest_xray/")
train_dir = data_dir/'train'
val_dir = data_dir/'val'
test_dir = data_dir/'test'
# LOAD TRAINING DATA TO DATAFRAME #
def load_train():
normal_cases_dir =train_dir/'NORMAL'
pneumonia_cases_dir = train_dir/ 'PNEUMONIA'
# list of all images
normal_cases = normal_cases_dir.glob('*.jpeg')
pneumonia_cases = pneumonia_cases_dir.glob('*.jpeg')
train_data=[]
train_label=[]
for img in normal_cases:
train_data.append(img)
train_label.append('NORMAL')
for img in pneumonia_cases:
train_data.append(img)
train_label.append('PNEUMONIA')
df=pd.DataFrame(train_data)
df.columns = ['images']
df['labels'] = train_label
df=df.sample(frac=1).reset_index(drop=True)
return df
train_data = load_train()
train_data.shape
# VIZUALIZE THE AMOUNT OF TRAINING DATA WITH LABELS #
plt.bar(train_data['labels'].value_counts().index,train_data['labels'].value_counts().values)
plt.show()
# VIZUALIZE THE TRAINING IMAGE DATA BY RANDOM SAMPLING#
plt.figure(figsize=(10,5))
for i in range(10):
ax = plt.subplot(2,5,i+1)
num= random.randint(0, 5000+i)
im=train_data.loc[num].at['images']
im1=train_data.loc[num].at['labels']
img = cv2.imread(str(im))
img = cv2.resize(img, (224,224))
plt.imshow(img)
plt.title(im1)
plt.axis("off")
print(num)
# DATA PRE-PROCESSING #
def prepare_and_load(isval=True):
if isval==True:
normal_dir=val_dir/'NORMAL'
pneumonia_dir=val_dir/'PNEUMONIA'
else:
normal_dir=test_dir/'NORMAL'
pneumonia_dir=test_dir/'PNEUMONIA'
normal_cases = normal_dir.glob('*.jpeg')
pneumonia_cases = pneumonia_dir.glob('*.jpeg')
data,labels=([] for x in range (2))
def prepare(case):
for img in case:
img = cv2.imread(str(img))
img = cv2.resize(img, (224,224))
if img.shape[2] ==1:
img = np.dstack([img, img, img])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32)/255
if case==normal_cases:
label = to_categorical(0, num_classes=2)
else:
label = to_categorical(1, num_classes=2)
data.append(img)
labels.append(label)
return data,labels
prepare(normal_cases)
d,l=prepare(pneumonia_cases)
d=np.array(d)
l=np.array(1)
return d,l
val_data,val_labels = prepare_and_load(isval=True)
test_data,test_labels = prepare_and_load(isval=False)
print('Number of test images -->', len(test_data))
print('Number of validation images -->', len(val_data))
# DEFINE A FUNCTION TO GENERATE BATCHES FROM TRAINING IMAGES #
def data_gen(data, batch_size):
# Get tiotal number of samples in the data
n= len(data)
steps = n//batch_size
# Define two numpy arrays for containing batch data and labels
batch_data = np.zeros((batch_size, 224, 224, 3), dtype=np.float32)
batch_labels = np.zeros((batch_size,2), dtype=np.float32)
# Get a numpy array of all the indices of the input data
indices = np.arange(n)
# Initalize a counter
i=0
while True:
np.random.shuffle(indices)
# Get the next batch
count = 0
next_batch =indices [(i*batch_size): (i+1)*batch_size]
for j,idx in enumerate(next_batch):
img_name = data.iloc[idx]['images']
label = data.iloc[idx]['images']
if label=='NORMAL':
label=0
else:
label=1
# one hot encoding
encoded_label = to_categorical(label, num_classes=2)
# read the image and resize
img = cv2.imread(str(img_name))
img = cv2.resize(img,(224,224))
# check if it's grayscale
if img.shape[2]==1:
img = np.dstack([img, img, img])
# cv2 reads in BGR mode by default
orig_imag = cv2.cvtColor(img, cv2. COLOR_BGR2RGB)
# normalize the image pixels
orig_img = img.astype(np.float32)/255
batch_data[count]= orig_img
batch_labels[count] = encoded_label
count+=1
if count==batch_size-1:
break
i+=1
yield batch_data, batch_labels
if i>=steps:
i=0
# DEFINE THE CNN MODEL #
model = Sequential()
model.add(Conv2D(32, (3,3), input_shape=(224, 224, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(32, (3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64, (3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(2))
model.add(Activation('softmax'))
# DEFINE PARAMETERS FOR THE CNN MODEL #
batch_size = 64
nb_epochs = 3
# Get a train data generator
train_data_gen = data_gen(data= train_data, batch_size=batch_size)
# DEFINE THE NUMBER OF TRAINING STEPS #
nb_train_steps = train_data.shape[0]//batch_size
print("Number of training and validation steps: {} and {}".format(nb_train_steps, len(val_data)))
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
# FIT THE MODEL #
history = model.fit_generator(train_data_gen,
epochs=nb_epochs,
steps_per_epoch=nb_train_steps,
validation_data=(val_data, val_labels))
I use tf.image.resize_bilinear in a segmentation network, It seems this function does not support by multi-gpu model. The following code shows the simplified situation: (which can be run directly)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1'
from keras.backend.tensorflow_backend import set_session
from keras import backend as K
from keras.utils import multi_gpu_model
from keras.applications.mobilenet_v2 import preprocess_input
import tensorflow as tf
import numpy as np
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
set_session(sess)
batch = 4
num_classes = 2
size = 128
K.clear_session()
def _GetRandomImg():
shape = (batch, size, size, 3)
img = np.random.randint(low=0, high=256, size=shape)
return preprocess_input(img)
def _GetRandomLabel():
shape = (batch, size, size, num_classes)
label = np.random.randint(low=0, high=num_classes, size=shape)
label = np.exp(label)
label = label/ np.sum(label, axis=-1, keepdims=True)
return label
def DataGen():
while True:
x = _GetRandomImg()
y = _GetRandomLabel()
yield x, y
from keras.layers import Input, Conv2D, Lambda
from keras import Model
def GetModel():
inputs = Input(shape=(size, size, 3))
f = lambda x: tf.image.resize_bilinear(inputs, (size, size), align_corners=True)
x = Lambda(f, output_shape=(size, size, 3))(inputs)
outputs = Conv2D(num_classes, kernel_size=3, padding='same')(x)
model = Model(inputs=[inputs], outputs=[outputs])
return model
gen = DataGen()
with tf.device('/cpu:0'):
model = GetModel()
model = multi_gpu_model(model, gpus=2)
model.compile(loss='categorical_crossentropy', optimizer='sgd')
result = model.fit_generator(gen, epochs=2, verbose = 1, steps_per_epoch = 100)
it works fine with single gpu environment, but in multi-gpu environment, I got the following error:
InvalidArgumentError: Incompatible shapes: [3,128,128,2] vs. [6,128,128,2]
[[{{node loss/conv2d_1_loss/categorical_crossentropy/mul}}]]
[[{{node training/SGD/gradients/conv2d_1_1/concat_grad/Slice_1}}]]
the problem is solved. If tensorflow function is used in a customized Lambda layer, it is needed to explicitly use set_shape() function:
def MyResizeBilinear(x, height, width):
rows, cols = 1, 2
original_shape = K.int_shape(x)
new_shape = tf.constant(np.array([height, width], dtype='int32'))
x = tf.image.resize_bilinear(x, new_shape, align_corners=True)
new_height = None if original_shape[rows] is None else height
new_width = None if original_shape[cols] is None else width
output_shape = (None, new_height, new_width, None)
x.set_shape(output_shape)
return x
Very much similar to this question except I am wondering how I could take my pre-trained model which had an input size of (128, 128, 3) images, keep its weights, and use it to predict on images of varying input size.
I get this, as it is, when I try to input an image of arbitrary size:
Traceback (most recent call last):
File "arg_test.py", line 127, in <module>
predict(args)
File "arg_test.py", line 71, in predict
predictions.append(model.predict(input_img)[0]) # returns a list of lists, one for each image in the batch
File "C:\Users\payne\Anaconda3\envs\ml-gpu\lib\site-packages\keras\engine\training.py", line 1147, in predict
x, _, _ = self._standardize_user_data(x)
File "C:\Users\payne\Anaconda3\envs\ml-gpu\lib\site-packages\keras\engine\training.py", line 749, in _standardize_user_data
exception_prefix='input')
File "C:\Users\payne\Anaconda3\envs\ml-gpu\lib\site-packages\keras\engine\training_utils.py", line 137, in standardize_input_data
str(data_shape))
ValueError: Error when checking input: expected input_1 to have shape (128, 128, 3) but got array with shape (2736, 3648, 3)
Here is my model:
def setUpModel(x_train, y_train):
filters = 256
kernel_size = 3
strides = 1
# Head module
input = Input(shape=(img_height//scale_fact, img_width//scale_fact, img_depth))
conv0 = Conv2D(filters, kernel_size, strides=strides, padding='same')(input)
# Body module
res = Conv2D(filters, kernel_size, strides=strides, padding='same')(conv0)
act = ReLU()(res)
res = Conv2D(filters, kernel_size, strides=strides, padding='same')(act)
res_rec = Add()([conv0, res])
for i in range(res_blocks):
res1 = Conv2D(filters, kernel_size, strides=strides, padding='same')(res_rec)
act = ReLU()(res1)
res2 = Conv2D(filters, kernel_size, strides=strides, padding='same')(act)
res_rec = Add()([res_rec, res2])
conv = Conv2D(filters, kernel_size, strides=strides, padding='same')(res_rec)
add = Add()([conv0, conv])
# Tail module
conv = Conv2D(filters, kernel_size, strides=strides, padding='same')(add)
act = ReLU()(conv)
up = UpSampling2D(size=scale_fact if scale_fact != 4 else 2)(act) # TODO: try "Conv2DTranspose"
# mul = Multiply([np.zeros((img_width,img_height,img_depth)).fill(0.1), up])(up)
# When it's a 4X factor, we want the upscale split in two procedures
if(scale_fact == 4):
conv = Conv2D(filters, kernel_size, strides=strides, padding='same')(up)
act = ReLU()(conv)
up = UpSampling2D(size=2)(act) # TODO: try "Conv2DTranspose"
output = Conv2D(filters=3,
kernel_size=1,
strides=1,
padding='same')(up)
model = Model(inputs=input, outputs=output)
This was only the architecture of the model that was used during training, but tha training is behind: I have my model.h5 file obtained through model.save().
Here is how I get predictions:
import argparse
import numpy as np
import matplotlib.pyplot as plt
import skimage.io
from keras.models import load_model
from keras.optimizers import Adam
from keras.optimizers import Adadelta
from constants import save_dir
from constants import model_name
from constants import crops_p_img
from constants import tests_path
from constants import img_height
from constants import img_width
from constants import scale_fact
from utils import float_im
from utils import crop_center
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-a', '--amount', type=int, default=crops_p_img,
help='how many (cropped to 128x128) samples to predict from within the image')
parser.add_argument('image', type=str,
help='image name (example: "bird.png") that must be inside the "./input/" folder')
parser.add_argument('-m', '--model', type=str, default=model_name,
help='model name (in the "./save/" folder), followed by ".h5"')
parser.add_argument('-r', '--random', action="store_true", # if var is in args, set to TRUE, else, set to FALSE
help='flag that will select a random 128x128 area in the input image instead of the center')
parser.add_argument('-f', '--full', action="store_true", # if var is in args, set to TRUE, else, set to FALSE
help='(WIP) flag that will get the whole image to be processed by the network')
args = parser.parse_args()
def predict(args):
model = load_model(save_dir + '/' + args.model)
# Setting up the proper optimizer TODO: needed?
if args.model == "my_full_model.h5":
optimizer = Adadelta(lr=1.0,
rho=0.95,
epsilon=None,
decay=0.0)
else:
optimizer = Adam(lr=0.001,
beta_1=0.9,
beta_2=0.999,
epsilon=None,
decay=0.0,
amsgrad=False)
model.compile(optimizer=optimizer,
loss='mean_squared_error')
image = skimage.io.imread(tests_path + args.image)
if image.shape[0] == 128:
args.amount = 1
predictions = []
images = []
# TODO: integrate FULL IMAGE
# if args.full:
# images.append(image)
# # Hack because GPU can only handle one image at a time
# input_img = (np.expand_dims(images[0], 0)) # Add the image to a batch where it's the only member
# predictions.append(model.predict(input_img)[0]) # returns a list of lists, one for each image in the batch
# else:
if True:
for i in range(args.amount):
# Cropping to fit input size
if (args.random or args.amount > 1) and image.shape[0] > 128:
images.append(random_crop(image))
else:
images.append(crop_center(image, img_width//scale_fact, img_height//scale_fact))
input_img = (np.expand_dims(images[i], 0))
predictions.append(model.predict(input_img)[0])
for i in range(len(predictions)):
show_pred_output(images[i], predictions[i])
# adapted from: https://stackoverflow.com/a/52463034/9768291
def random_crop(img):
crop_h, crop_w = img_width//scale_fact, img_height//scale_fact
print("Shape of input image to crop:", img.shape[0], img.shape[1])
if (img.shape[0] >= crop_h) and (img.shape[1] >= crop_w):
# Cropping a random part of the image
rand_h = np.random.randint(0, img.shape[0]-crop_h)
rand_w = np.random.randint(0, img.shape[1]-crop_w)
print("Random position for the crop:", rand_h, rand_w)
tmp_img = img[rand_h:rand_h+crop_h, rand_w:rand_w+crop_w]
new_img = float_im(tmp_img) # From [0,255] to [0.,1.]
else:
return img
return new_img
def show_pred_output(input, pred):
plt.figure(figsize=(20, 20))
plt.suptitle("Results")
plt.subplot(1, 2, 1)
plt.title("Input: 128x128")
plt.imshow(input, cmap=plt.cm.binary).axes.get_xaxis().set_visible(False)
plt.subplot(1, 2, 2)
plt.title("Output: 512x512")
plt.imshow(pred, cmap=plt.cm.binary).axes.get_xaxis().set_visible(False)
plt.show()
if __name__ == '__main__':
print(" - ", args)
predict(args)
You should replace this line:
input = Input(shape=(None, None, img_depth))
None in an shape means variable size. Since the model is just convolutions it should work with images of any size.
After training your model with a specific input shape you can save the trained model weights by using model.save_weights() and then assign those weights to the model that has unknown shape by using model1.load_weights().
For example, i have trained the model with input-shape (28,28,1)
model=keras.Sequential([
keras.Input(shape=(28,28,1)),
keras.layers.Conv2D(32,kernel_size=(3,3),activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2,2)),
keras.layers.Conv2D(64,kernel_size=(3,3),activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2,2)),
keras.layers.GlobalAveragePooling2D(),
keras.layers.Dropout(0.5),
keras.layers.Dense(10,activation='softmax')
])
After training you can save model weights by
model.save_weights('model-weights')
Then define a model with unknown input shape
model2=keras.Sequential([
keras.Input(shape=(None,None,1)),
keras.layers.Conv2D(32,kernel_size=(3,3),activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2,2)),
keras.layers.Conv2D(64,kernel_size=(3,3),activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2,2)),
keras.layers.GlobalAveragePooling2D(),
keras.layers.Dropout(0.5),
keras.layers.Dense(10,activation='softmax')
])
Then assign the saved weights by
model2.load_weights('/content/model-weights')
Now you can predict with model 2 without training it. For more details, please refer to this gist. Thank You!
I am calling Keras predict_generator() like:
bottleneck_features_train = model.predict_generator(train_gen, len(telemetry))
where train_gen() is defined like
def train_gen():
# ...
yield (X, y)
and X is a numpy array with shape (48, 299, 299, 3), y is a numpy array with shape (48,)
I get the error below. What should I do instead?
Otherwise, a link to a working example would help. Only examples I have found are for Keras 1 or using ImageDataGenerator.flow().
I am running Keras 2.0.2.
Here the error:
Traceback (most recent call last):
File "/home/fanta/workspace/CarND-Behavioral-Cloning-P3/cache.py", line 143, in <module>
tf.app.run()
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/platform/app.py", line 44, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "/home/fanta/workspace/CarND-Behavioral-Cloning-P3/cache.py", line 138, in main
bottleneck_features_train = model.predict_generator(train_gen, len(telemetry))
File "/usr/local/lib/python3.5/dist-packages/keras/legacy/interfaces.py", line 88, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 2094, in predict_generator
outs = self.predict_on_batch(x)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1677, in predict_on_batch
self._feed_input_shapes)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 100, in _standardize_input_data
'Found: array with shape ' + str(data.shape))
ValueError: The model expects 0 input arrays, but only received one array. Found: array with shape (48, 299, 299, 3)
Process finished with exit code 1
===== UPDATE =====
The issue is not related to the generator. Here below a short program to reproduce it. Note that if you switch the network from inception to vgg, it works fine.
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg16 import VGG16
from keras.layers import Input, AveragePooling2D
from keras.models import Model
from keras.datasets import cifar10
from scipy.misc import imresize
import pickle
import tensorflow as tf
import keras.backend as K
import numpy as np
network='inception' # Must be 'inception' or 'vgg'
dataset='cifar10'
batch_size=64
if network == 'vgg':
size = (224, 224)
elif network == 'inception':
size = (299, 299)
else:
assert False, "network must be either 'inception' or 'vgg'"
def create_model():
input_tensor = Input(shape=(size[0], size[1], 3))
if network == 'inception':
model = InceptionV3(input_tensor=input_tensor, include_top=False)
x = model.output
x = AveragePooling2D((8, 8), strides=(8, 8))(x)
model = Model(model.input, x)
elif network == 'vgg':
model = VGG16(input_tensor=input_tensor, include_top=False)
x = model.output
x = AveragePooling2D((7, 7))(x)
model = Model(model.input, x)
else:
assert False
return model
def main():
# Download and load cifar10 dataset
(X_train, y_train), (_, _) = cifar10.load_data()
# Reduce the dataset to the first 1000 entries, to save memory and computation time
X_train = X_train[0:1000]
y_train = y_train[0:1000]
# Resize dataset images to comply with expected input image size
X_train = [imresize(image, size) for image in X_train]
X_train = np.array(X_train)
# File name where to save bottlenecked features
train_output_file = "{}_{}_{}.p".format(network, dataset, 'bottleneck_features_train')
print("Saving to", train_output_file)
with tf.Session() as sess:
K.set_session(sess)
K.set_learning_phase(1)
model = create_model()
# We skip pre-processing and bottleneck the features
bottleneck_features_train = model.predict(X_train, batch_size=batch_size, verbose=1)
data = {'features': bottleneck_features_train, 'labels': y_train}
pickle.dump(data, open(train_output_file, 'wb'))
if __name__ == '__main__':
main()
At prediction step your generator should only yield the input and not the targets. So only the X, not the y.
Does that help?