OSError: cannot identify image file <_io.BufferedReader - python-3.x

I am porting code to train a neural network. I wrote the code as part of an Udacity project and it worked fine in the Udacity environment.
Now I am porting the code to an Nvidia Jetson Nano running Ubuntu 18.04 and Python 3.6.8.
When iterating through the training data, somehow "._" sneakes into the file path prior the file name and issues an error message.
When I run the file, I get following error message:
Traceback (most recent call last):
File "train_rev6.py", line 427, in <module>
main()
File "train_rev6.py", line 419, in main
train_model(in_args)
File "train_rev6.py", line 221, in train_model
for inputs, labels in trainloader:
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 560, in __next__
batch = self.collate_fn([self.dataset[i] for i in indices])
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 560, in <listcomp>
batch = self.collate_fn([self.dataset[i] for i in indices])
File "/usr/local/lib/python3.6/dist-packages/torchvision/datasets/folder.py", line 132, in __getitem__
sample = self.loader(path)
File "/usr/local/lib/python3.6/dist-packages/torchvision/datasets/folder.py", line 178, in default_loader
return pil_loader(path)
File "/usr/local/lib/python3.6/dist-packages/torchvision/datasets/folder.py", line 160, in pil_loader
img = Image.open(f)
File "/usr/local/lib/python3.6/dist-packages/PIL/Image.py", line 2705, in open
% (filename if filename else fp))
OSError: cannot identify image file <_io.BufferedReader name='/home/mme/Documents/001_UdacityFinalProjectFlowersRev2/flowers/train/40/._image_04589.jpg'>
I suspect the error is due to the "._" prior the file name "image...", as this is not part of the file name and when I prompt
sudo find / -name image_00824.jpg
I get the correct path:
/home/mme/Documents/001_UdacityFinalProjectFlowersRev2/flowers/train/81/image_00824.jpg
without "._" prior the file name.
My issue here seems the same as in
OSError: cannot identify image file
(Adjusting and running from PIL import Image;Image.open(open("path/to/file", 'rb')) as suggested in the answer does not issue an error message.)
The file path is give in the command line:
python3 train_rev6.py --file_path "/home/mme/Documents/001_UdacityFinalProjectFlowersRev2/flowers" --arch "vgg16" --epochs 5 --gpu "gpu" --running_loss True --valid_loss True --valid_accuracy True --test True
The code below shows the two relevant functions.
Any idea how I get rid of this "._"?
def load_data(in_args):
"""
Function to:
- Specify diretories for training, validation and test set.
- Define your transforms for the training, validation and testing sets.
- Load the datasets with ImageFolder.
- Using the image datasets and the trainforms, define the dataloaders.
- Label mapping.
"""
# Specify diretories for training, validation and test set.
data_dir = in_args.file_path
train_dir = data_dir + "/train"
valid_dir = data_dir + "/valid"
test_dir = data_dir + "/test"
# Define your transforms for the training, validation, and testing sets
# Means: [0.485, 0.456, 0.406]. Standard deviations [0.229, 0.224, 0.225]. Calculated by ImageNet images.
# Transformation on training set: random rotation, random resized crop to 224 x 224 pixels, random horizontal and vertical flip, tranform to a tensor and normalize data.
train_transforms = transforms.Compose([transforms.RandomRotation(23),
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])
# Transformation on validation set: resize and center crop to 224 x 224 pixels, tranform to a tensor and normalize data.
valid_transforms = transforms.Compose([transforms.Resize(255),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])
# Transformation on test set: resize and center crop to 224 x 224 pixels, tranform to a tensor and normalize data.
test_transforms = transforms.Compose([transforms.Resize(255),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])
# Load the datasets with ImageFolder
global train_dataset
global valid_dataset
global test_dataset
train_dataset = datasets.ImageFolder(data_dir + "/train", transform=train_transforms)
valid_dataset = datasets.ImageFolder(data_dir + "/valid", transform=valid_transforms)
test_dataset = datasets.ImageFolder(data_dir + "/test", transform=test_transforms)
# Using the image datasets and the trainforms, define the dataloaders, as global variables.
global trainloader
global validloader
global testloader
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=64)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=64)
# Label mapping.
global cat_to_name
with open("cat_to_name.json", "r") as f:
cat_to_name = json.load(f)
print("Done loading data...")
return
def train_model(in_args):
"""
Function to build and train model.
"""
# Number of epochs.
global epochs
epochs = in_args.epochs
# Set running_loss to 0
running_loss = 0
# Prepare lists to print losses and accuracies.
global list_running_loss
global list_valid_loss
global list_valid_accuracy
list_running_loss, list_valid_loss, list_valid_accuracy = [], [], []
# If in testing mode, set loop counter to prematurly return to the main().
if in_args.test == True:
loop_counter = 0
# for loop to train model.
for epoch in range(epochs):
# for loop to iterate through training dataloader.
for inputs, labels in trainloader:
# If in testing mode, increase loop counter to prematurly return to the main() after 5 loops.
if in_args.test == True:
loop_counter +=1
if loop_counter == 5:
return
# Move input and label tensors to the default device.
inputs, labels = inputs.to(device), labels.to(device)
# Set gradients to 0 to avoid accumulation
optimizer.zero_grad()
# Forward pass, back propagation, gradient descent and updating weights and bias.
# Forward pass through model to get log of probabilities.
log_ps = model.forward(inputs)
# Calculate loss of model output based on model prediction and labels.
loss = criterion(log_ps, labels)
# Back propagation of loss through model / gradient descent.
loss.backward()
# Update weights / gradient descent.
optimizer.step()
# Accumulate loss for training image set for print out in terminal
running_loss += loss.item()
# Calculate loss for verification image set and accuracy for print out in terminal.
# Validation pass and print out the validation accuracy.
# Set loss of validation set and accuracy to 0.
valid_loss = 0
# test_loss = 0
valid_accuracy = 0
# test_accuracy = 0
# Set model to evaluation mode to turn off dropout so all images in the validation & test set are passed through the model.
model.eval()
# Turn off gradients for validation, saves memory and computations.
with torch.no_grad():
# for loop to evaluate loss of validation image set and its accuracy.
for valid_inputs, valid_labels in validloader:
# Move input and label tensors to the default device.
valid_inputs, valid_labels = valid_inputs.to(device), valid_labels.to(device)
# Run validation image set through model.
valid_log_ps = model.forward(valid_inputs)
# Calculate loss for validation image set.
valid_batch_loss = criterion(valid_log_ps, valid_labels)
# Accumulate loss for validation image set.
valid_loss += valid_batch_loss.item()
# Calculate probabilities
valid_ps = torch.exp(valid_log_ps)
# Get the most likely class using the ps.topk method.
valid_top_k, valid_top_class = valid_ps.topk(1, dim=1)
# Check if the predicted classes match the labels.
valid_equals = valid_top_class == valid_labels.view(*valid_top_class.shape)
# Calculate the percentage of correct predictions.
valid_accuracy += torch.mean(valid_equals.type(torch.FloatTensor)).item()
# Print out losses and accuracies
# Create string for running_loss.
str1 = ["Train loss: {:.3f} ".format(running_loss) if in_args.running_loss == True else ""]
str1 = "".join(str1)
# Create string for valid_loss.
str2 = ["Valid loss: {:.3f} ".format(valid_loss/len(validloader)) if in_args.valid_loss == True else ""]
str2 = "".join(str2)
# Create string for valid_accuracy.
str3 = ["Valid accuracy: {:.3f} ".format(valid_accuracy/len(validloader)) if in_args.valid_accuracy == True else ""]
str3 = "".join(str3)
# Print strings
print(f"{epoch+1}/{epochs} " + str1 + str2 + str3)
# Append current losses and accuracy to lists to print losses and accuracies.
list_running_loss.append(running_loss)
list_valid_loss.append(valid_loss/len(validloader))
list_valid_accuracy.append(valid_accuracy/len(validloader))
# Set running_loss to 0.
running_loss = 0
# Set model back to train mode.
model.train()
print("Done training model...")
return

A colleague at work pointed out that in Linux files beginning with a period are hidden files. So I selected "show hidden files" in the file explorer and there they were. I deleted them, which resolved the issue (see commands below).
Find and display all files beginning with "._" in all subfolder (display the selected files first to make sure these are the files you want to delete):
find test -name '._*' -print
Find and delete all files beginning with "._" in all subfolder
find test -name '._*' -delete

Related

Pytorch freezes when checking dataloader

I am running this block of codes for Pytorch and it seems to run forever/freeze in my notebook. I suspect it has something to do with my dataloader but I can't seem to figure out what is wrong here. I am running this on a GPU environment and I have previously ran tensorflow v2 keras for the CNN model and it was able to work.
In addition I have also tried to do model.train() and it was also stuck at the first epoch.
Code I am running
import time
start_time = time.time()
for data, label in train_dataloader:
print(data.size())
print(label.size())
break
print("Time taken: ", time.time() - start_time)
The dataloader is implemented with these line of codes
train_dataset = ChestXrayDataset("dataset/CheXpert-v1.0-small/train/train", train_data, IMAGE_SIZE, True)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
These are the parameters
IMAGE_SIZE = 224 # Image size (224x224)
IMAGENET_MEAN = [0.485, 0.456, 0.406] # Mean of ImageNet dataset (used for normalization)
IMAGENET_STD = [0.229, 0.224, 0.225] # Std of ImageNet dataset (used for normalization)
BATCH_SIZE = 96
LEARNING_RATE = 0.001
LEARNING_RATE_SCHEDULE_FACTOR = 0.1 # Parameter used for reducing learning rate
LEARNING_RATE_SCHEDULE_PATIENCE = 5 # Parameter used for reducing learning rate
MAX_EPOCHS = 100 # Maximum number of training epochs
I have checked the dataloader and this is what I got
<torch.utils.data.dataloader.DataLoader at 0x1f96cd5f6a0>
The class for ChestXrayDataset is shown here
class ChestXrayDataset(Dataset):
def __init__(self, folder_dir, dataframe, image_size, normalization):
"""
Init Dataset
Parameters
----------
folder_dir: str
folder contains all images
dataframe: pandas.DataFrame
dataframe contains all information of images
image_size: int
image size to rescale
normalization: bool
whether applying normalization with mean and std from ImageNet or not
"""
self.image_paths = [] # List of image paths
self.image_labels = [] # List of image labels
# Define list of image transformations
image_transformation = [
transforms.Resize((image_size, image_size)),
transforms.ToTensor()
]
if normalization:
# Normalization with mean and std from ImageNet
image_transformation.append(transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD))
self.image_transformation = transforms.Compose(image_transformation)
# Get all image paths and image labels from dataframe
for index, row in dataframe.iterrows():
image_path = os.path.join(folder_dir, row.Path)
self.image_paths.append(image_path)
if len(row) < 14:
labels = [0] * 14
else:
labels = []
for col in row[5:]:
if col == 1:
labels.append(1)
else:
labels.append(0)
self.image_labels.append(labels)
def __len__(self):
return len(self.image_paths)
def __getitem__(self, index):
"""
Read image at index and convert to torch Tensor
"""
# Read image
image_path = self.image_paths[index]
image_data = Image.open(image_path).convert("RGB") # Convert image to RGB channels
# TODO: Image augmentation code would be placed here
# Resize and convert image to torch tensor
image_data = self.image_transformation(image_data)
return image_data, torch.FloatTensor(self.image_labels[index])
Checking the length of dataframe.iterrows() and row[5:] would help.

Why concatenate cause shape error in Keras even the inputs are right?

I am new to Keras and I have a code for the model part:
# make inputs
self.input_samples = Input(shape=(self.input_shape, ))
self.input_labels = Input(shape=(self.nClass, ))
# Encoder for samples
self.E = self.encoder()(self.input_samples)
# Encoder for labels
self.E_LBLs = self.encoder4lbls()(self.input_labels)
# Decoder for reconstruction
self.D = self.decoder()(self.E)
# Task network
task_net = self.taskOut()
self.T = task_net(self.E)
self.T_LBLS = task_net(self.E_LBLs)
# define GAN for prior matching for samples and labels
self.A = self.adversarial() # This is the discriminator for latent code matching
print(type(self.E))
self.Adv = self.A(concatenate([self.E, self.E_LBLs], axis=0)) # logits for samples and labels
self.A.compile('Adam', loss='binary_crossentropy', metrics=['acc'])
# define MMD loss
# self.merge_embeds = concatenate([self.E, self.E_LBLs], axis=0, name='mmd')
model = Model([self.input_samples, self.input_labels], [self.E, self.E_LBLs, self.Adv])
When I want to output the self.Adv using model.predict([inouts1, inputs2]), it seems the concat operation in concatenate([self.E, self.E_LBLs], axis=0) . always wrong.
The error message is:
res_list = model.predict([trainSamples, trainLabels])
File "/DB/rhome/xchen/anaconda2/envs/Conda_python3_5/lib/python3.5/site-packages/keras/engine/training.py", line 1835, in predict
verbose=verbose, steps=steps)
File "/DB/rhome/xchen/anaconda2/envs/Conda_python3_5/lib/python3.5/site-packages/keras/engine/training.py", line 1339, in _predict_loop
outs[i][batch_start:batch_end] = batch_out
ValueError: could not broadcast input array from shape (64,1) into shape (32,1)
I am sure that self.E and self.E_LBLs are right. And their shapes are [N1x2000] and [N2x2000] respectively.
Do you have any idea? I cannot solve it.
Thanks.

Implementing Batch for Image Segmentation

I wrote a Python 3.5 script for doing street segmentation. Since I'm new in Image Segementation, I did not use predefined dataloaders from pytorch, instead I wrote them by my self (for better understanding). Until now I only use a batch size of 1. Now I want to generalize this for arbitrary batch sizes.
This is a snippet of my Dataloader:
def augment_data(batch_size):
# [...] defining some paths and data transformation (including ToTensor() function)
# The images are named by numbers (Frame numbers), this allows me to find the correct label image for a given input image.
all_input_image_paths = {int(elem.split('\\')[-1].split('.')[0]) : elem for idx, elem in enumerate(glob.glob(input_dir + "*"))}
all_label_image_paths = {int(elem.split('\\')[-1].split('.')[0]) : elem for idx, elem in enumerate(glob.glob(label_dir + "*"))}
dataloader = {"train":[], "val":[]}
all_samples = []
img_counter = 0
for key, value in all_input_image_paths.items():
input_img = Image.open(all_input_image_paths[key])
label_img = Image.open(all_label_image_paths[key])
# Here I use my own augmentation function which crops the input and label on the same position and do other things.
# We get a list of new augmented data
augmented_images = generate_augmented_images(input_img, label_img)
for elem in augmented_images:
input_as_tensor = data_transforms['norm'](elem[0])
label_as_tensor = data_transforms['val'](elem[1])
input_as_tensor.unsqueeze_(0)
label_as_tensor.unsqueeze_(0)
is_training_data = random.uniform(0.0, 1.0)
if is_training_data <= 0.7:
dataloader["train"].append([input_as_tensor, label_as_tensor])
else:
dataloader["val"].append([input_as_tensor, label_as_tensor])
img_counter += 1
shuffle(dataloader["train"])
shuffle(dataloader["val"])
dataloader_batched = {"train":[], "val":[]}
# Here I group my data to a given batch size
for elem in dataloader["train"]:
batch = []
for i in range(batch_size):
batch.append(elem)
dataloader_batched["train"].append(batch)
for elem in dataloader["val"]:
batch = []
for i in range(batch_size):
batch.append(elem)
dataloader_batched["val"].append(batch)
return dataloader_batched
This is a snippet of my training method with batch size 1:
while epoch <= num_epochs:
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step(3)
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
counter = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
counter += 1
max_num = len(dataloaders[phase])
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
epoch_loss = running_loss / dataset_sizes[phase]
If I execute this, I get of course the error:
for inputs, labels in dataloaders[phase]:
ValueError: not enough values to unpack (expected 2, got 1)
I understand why, because now I have a list of images and not only an input and label image as before. So guessed I need a second for loop which iterates over these batches. So I tried this:
# Iterate over data.
for elem in dataloaders[phase]:
for inputs, labels in elem:
counter += 1
max_num = len(dataloaders[phase])
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
# _, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
But for me it looks like the optimization step (back-prop) is only applied on the last image of the batch. Is that true? And if so, how can I fix this? I guess if I indent the with-Block, then I get again a batch size 1 optimization.
Thanks in advance
But for me it looks like the optimization step (back-prop) is only applied on the last image of the batch.
It should not apply only based on the last image. It should apply based on the batch size.
If you set bs=2 and it should apply to the batch of two images.
Optimization step actually will update the params of your network. Backprop is a fancy name for PyTorch autograd system that computes the first order gradients.

This Keras model works when created, but fails when loaded. Tensor splitting suspected

I'm experimenting with LSTMs, specifically, inputting a sequence into an LSTM, transferring the states into another LSTM, and decoding the sequence. I added an autoencoder between the two LSTMs, encoding and then decoding the transferred states via a lower dimensional latent space.
This works fine when I create the model and fit it. However, if I save this model, and then either try to continue training it, or even just use it without additional training, the model does not run and I get the following warning:
Traceback (most recent call last):
File "s2s_AE_2.py", line 140, in <module>
model.fit_generator(train_generator(),callbacks=[checkpointer], steps_per_epoch=30, epochs=2000, verbose=1,validation_data=val_generator(),validation_steps=30)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 2224, in fit_generator
class_weight=class_weight)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1877, in train_on_batch
class_weight=class_weight)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1476, in _standardize_user_data
exception_prefix='input')
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 86, in _standardize_input_data
str(len(data)) + ' arrays: ' + str(data)[:200] + '...')
ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 2 arrays: [array([[[ 0.47338937, 0.75865918, 0.37731877, 0.63840222,
0.14653083],
[ 0.52119932, 0.78308798, 0.45885839, 0.66738276,
0.20393343],
[ 0.5674261 , 0.806364...
My code is as follows:
from keras.models import Model
from keras.layers import Input, LSTM, Dense, TimeDistributed,Lambda, Dropout, Activation ,RepeatVector
from keras.callbacks import ModelCheckpoint
import numpy as np
from keras.layers import Lambda, Concatenate
from keras import backend as K
from keras.models import load_model
import os
seq_length=150
features_num=5
LSTM_latent_dim=40
AE_latent_dim=10
encoder_inputs = Input(shape=(seq_length, features_num))
encoder = LSTM(LSTM_latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
merged_encoder_states = Concatenate(axis=-1)([state_h, state_c])
encoded_states=Dense(AE_latent_dim,activation='relu')(merged_encoder_states)
decoded_states=Dense(LSTM_latent_dim*2, activation='relu')(encoded_states)
decoder_inputs=Input(shape=(1, features_num))
decoder_lstm = LSTM(LSTM_latent_dim, return_sequences=True, return_state=True)
decoder_dense = Dense(features_num)
all_outputs = []
inputs = decoder_inputs
states=[decoded_states[:,:LSTM_latent_dim],decoded_states[:,LSTM_latent_dim:]]
for _ in range(seq_length):
# Run the decoder on one timestep
outputs, state_h, state_c = decoder_lstm(inputs, initial_state=states)
outputs = decoder_dense(outputs)
# Store the current prediction (we will concatenate all predictions later)
all_outputs.append(outputs)
# Reinject the outputs as inputs for the next loop iteration
# as well as update the states
inputs = outputs
states = [state_h, state_c]
# Concatenate all predictions
decoder_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
#model = load_model('pre_model.h5')
filepath_for_w= 'AE2_p2p_s2s_model.h5'
try:
model = load_model(filepath_for_w) # if model was previouslly run, continue from it
print("loaded model")
except: print("new model")
print(model.summary())
model.compile(loss='mean_squared_error', optimizer='adam')
def create_wavelength(min_wavelength, max_wavelength, fluxes_in_wavelength, category ) :
#category :: 0 - train ; 2 - validate ; 4- test. 1;3;5 - dead space
c=(category+np.random.random())/6
k = fluxes_in_wavelength
#
base= (np.trunc(k*np.random.random()*(max_wavelength-min_wavelength)) +k*min_wavelength) /k
answer=base+c/k
return (answer)
def make_line(length,category):
shift= np.random.random()
wavelength = create_wavelength(30,10,1,category)
a=np.arange(length)
answer=np.sin(a/wavelength+shift)
return answer
def make_data(seq_num,seq_len,dim,category):
data=np.array([]).reshape(0,seq_len,dim)
for i in range (seq_num):
mini_data=np.array([]).reshape(0,seq_len)
for j in range (dim):
line = make_line(seq_len,category)
line=line.reshape(1,seq_len)
mini_data=np.append(mini_data,line,axis=0)
mini_data=np.swapaxes(mini_data,1,0)
mini_data=mini_data.reshape(1,seq_len,dim)
data=np.append(data,mini_data,axis=0)
return (data)
def train_generator():
while True:
sequence_length = seq_length+1
data=make_data(1000,sequence_length,features_num,0) # category=0 in train
#
encoder_input_data =data[:,1:,:] # all
#
decoder_input_data = data[:,0,:] # the first value in the sequence
decoder_input_data=decoder_input_data.reshape((decoder_input_data.shape[0],1,decoder_input_data.shape[1]))
#
#
decoder_target_data = encoder_input_data
yield [encoder_input_data, decoder_input_data], decoder_target_data
def val_generator():
while True:
sequence_length =seq_length+1
data=make_data(1000,sequence_length,features_num,2) # category=2 in val
#
#
# # decoder_target_data is the same as decoder_input_data but offset by one timestep
#
encoder_input_data =data[:,1:,:] # all
#
decoder_input_data = data[:,0,:] # the one before the last one.
decoder_input_data=decoder_input_data.reshape((decoder_input_data.shape[0],1,decoder_input_data.shape[1]))
#
#
decoder_target_data = encoder_input_data
yield [encoder_input_data, decoder_input_data], decoder_target_data
checkpointer=ModelCheckpoint(filepath_for_w, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', period=1)
model.fit_generator(train_generator(),callbacks=[checkpointer], steps_per_epoch=30, epochs=2000, verbose=1,validation_data=val_generator(),validation_steps=30)
model.save(filepath_for_w)
def predict_wave(input_wave,input_for_decoder): # input wave= x[n,:,:], ie points except the last seq_length; each wave has feature_num features. run this function for all such instances (=n)
#print (input_wave.shape)
#print (input_for_decoder.shape)
pred= model.predict([input_wave,input_for_decoder])
#
return pred
def predict_many_waves_from_input(x):
x, x2=x # x == encoder_input_data ; x==2 decoder_input_data
#
instance_num= x.shape[0]
#
#
multi_predict_collection=np.zeros((x.shape[0],seq_length,x.shape[2]))
#
for n in range(instance_num):
input_wave=x[n,:,:].reshape(1,x.shape[1],x.shape[2])
input_for_decoder=x2[n,:,:].reshape(1,x2.shape[1],x2.shape[2])
wave_prediction=predict_wave(input_wave,input_for_decoder)
multi_predict_collection[n,:,:]=wave_prediction
return (multi_predict_collection)
def test_maker():
if True:
sequence_length = seq_length +1
data=make_data(470,sequence_length,features_num,4) # category=4 in test
#
encoder_input_data =data[:,1:,:] # all
#
decoder_input_data = data[:,0,:] # the first value
decoder_input_data=decoder_input_data.reshape((decoder_input_data.shape[0],1,decoder_input_data.shape[1]))
#
#
decoder_target_data = encoder_input_data
return [encoder_input_data, decoder_input_data], decoder_target_data
x,y= test_maker()
a=predict_many_waves_from_input (x)
x=x[0] # keep the wave (generated data except last seq_length time points)
print (x.shape)
print (y.shape)
print (a.shape)
np.save ('a.npy',a)
np.save ('y.npy',y)
np.save ('x.npy',x)
print (np.mean(np.absolute(y[:,:,0]-a[:,:,0])))
print (np.mean(np.absolute(y[:,:,1]-a[:,:,1])))
print (np.mean(np.absolute(y[:,:,2]-a[:,:,2])))
print (np.mean(np.absolute(y[:,:,3]-a[:,:,3])))
print (np.mean(np.absolute(y[:,:,4]-a[:,:,4])))
The culprit might be this line:
states=[decoded_states[:,:LSTM_latent_dim],decoded_states[:,LSTM_latent_dim:]]
After combining the states of the encoding LSTM and passing them through the autoencoder, I split them back into c and h (the cell state and the hidden state, respectively) and feed them into the decoder LSTM.
It seems reasonable to me that this step occurs correctly when the initial model is used, but is somehow incorrectly saved into the model file (or incorrectly loaded from the model file), resulting in a defective loaded model.
Further supporting my assessment, in my opinion, is the fact that when this line is replaced with
states= [state_h, state_c]
, the loaded model is able to run correctly (fitting and predicting), but of course this does away with the state autoencoder so I cannot use it except for zooming in on the bug.
So, I ask your help regarding two questions:
Why does this problem occur?
How do I solve it?
A possible partial solution is to forgo the saving of the model in its entirety, and just save (and load) the model's weights.
Replacing the lines
model = load_model(filepath_for_w)
...
checkpointer=ModelCheckpoint(filepath_for_w, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', period=1)
...
model.save(filepath_for_w)
with
model.load_weights(filepath_for_w)
...
checkpointer=ModelCheckpoint(filepath_for_w, save_weights_only=True, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', period=1)
...
model.save_weights(filepath_for_w)
does the trick. The model can be loaded for further fitting and for prediction.
However this does not allow the saving of the entire model; I still need to keep the architecture in the code in order to populate it with the weights. It also does not explain why does this problem occurs to begin with.

TF | How to predict from CNN after training is done

Trying to work with the framework provided in the course Stanford cs231n, given the code below.
I can see the accuracy getting better and the net is trained however after the training process and checking the results on the validation set, how would I go to input one image into the model and see its prediction?
I have searched around and couldn't find some built in predict function in tensorflow as there is in keras.
Initializing the net and its parameters
# clear old variables
tf.reset_default_graph()
# setup input (e.g. the data that changes every batch)
# The first dim is None, and gets sets automatically based on batch size fed in
X = tf.placeholder(tf.float32, [None, 30, 30, 1])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)
def simple_model(X,y):
# define our weights (e.g. init_two_layer_convnet)
# setup variables
Wconv1 = tf.get_variable("Wconv1", shape=[7, 7, 1, 32]) # Filter of size 7x7 with depth of 3. No. of filters is 32
bconv1 = tf.get_variable("bconv1", shape=[32])
W1 = tf.get_variable("W1", shape=[4608, 360]) # 5408 is 13x13x32 where 13x13 is the output of 7x7 filter on 32x32 image with padding of 2.
b1 = tf.get_variable("b1", shape=[360])
# define our graph (e.g. two_layer_convnet)
a1 = tf.nn.conv2d(X, Wconv1, strides=[1,2,2,1], padding='VALID') + bconv1
h1 = tf.nn.relu(a1)
h1_flat = tf.reshape(h1,[-1,4608])
y_out = tf.matmul(h1_flat,W1) + b1
return y_out
y_out = simple_model(X,y)
# define our loss
total_loss = tf.losses.hinge_loss(tf.one_hot(y,360),logits=y_out)
mean_loss = tf.reduce_mean(total_loss)
# define our optimizer
optimizer = tf.train.AdamOptimizer(5e-4) # select optimizer and set learning rate
train_step = optimizer.minimize(mean_loss)
Function for evaluating the model whether for training or validation and plots the results:
def run_model(session, predict, loss_val, Xd, yd,
epochs=1, batch_size=64, print_every=100,
training=None, plot_losses=False):
# Have tensorflow compute accuracy
correct_prediction = tf.equal(tf.argmax(predict,1), y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# shuffle indicies
train_indicies = np.arange(Xd.shape[0])
np.random.shuffle(train_indicies)
training_now = training is not None
# setting up variables we want to compute and optimize
# if we have a training function, add that to things we compute
variables = [mean_loss,correct_prediction,accuracy]
if training_now:
variables[-1] = training
# counter
iter_cnt = 0
for e in range(epochs):
# keep track of losses and accuracy
correct = 0
losses = []
# make sure we iterate over the dataset once
for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
# generate indicies for the batch
start_idx = (i*batch_size)%Xd.shape[0]
idx = train_indicies[start_idx:start_idx+batch_size]
# create a feed dictionary for this batch
feed_dict = {X: Xd[idx,:],
y: yd[idx],
is_training: training_now }
# get batch size
actual_batch_size = yd[idx].shape[0]
# have tensorflow compute loss and correct predictions
# and (if given) perform a training step
loss, corr, _ = session.run(variables,feed_dict=feed_dict)
# aggregate performance stats
losses.append(loss*actual_batch_size)
correct += np.sum(corr)
# print every now and then
if training_now and (iter_cnt % print_every) == 0:
print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
.format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
iter_cnt += 1
total_correct = correct/Xd.shape[0]
total_loss = np.sum(losses)/Xd.shape[0]
print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
.format(total_loss,total_correct,e+1))
if plot_losses:
plt.plot(losses)
plt.grid(True)
plt.title('Epoch {} Loss'.format(e+1))
plt.xlabel('minibatch number')
plt.ylabel('minibatch loss')
plt.show()
return total_loss,total_correct
The functions calls that trains the model
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
print('Training')
run_model(sess,y_out,mean_loss,x_train,y_train,1,64,100,train_step,True)
print('Validation')
run_model(sess,y_out,mean_loss,x_val,y_val,1,64)
You do not need to go far, you simply pass your new (test) feature matrix X_test into your network and perform a forward pass - the output layer is the prediction. So the code is something like this
session.run(y_out, feed_dict={X: X_test})

Resources