Use a generator for Keras model.fit_generator - python-3.x

I originally tried to use generator syntax when writing a custom generator for training a Keras model. So I yielded from __next__. However, when I would try to train my mode with model.fit_generator I would get an error that my generator was not an iterator. The fix was to change yield to return which also necessitated rejiggering the logic of __next__ to track state. It's quite cumbersome compared to letting yield do the work for me.
Is there a way I can make this work with yield? I will need to write several more iterators that will have to have very clunky logic if I have to use a return statement.

I can't help debug your code since you didn't post it, but I abbreviated a custom data generator I wrote for a semantic segmentation project for you to use as a template:
def generate_data(directory, batch_size):
"""Replaces Keras' native ImageDataGenerator."""
i = 0
file_list = os.listdir(directory)
while True:
image_batch = []
for b in range(batch_size):
if i == len(file_list):
i = 0
random.shuffle(file_list)
sample = file_list[i]
i += 1
image = cv2.resize(cv2.imread(sample[0]), INPUT_SHAPE)
image_batch.append((image.astype(float) - 128) / 128)
yield np.array(image_batch)
Usage:
model.fit_generator(
generate_data('~/my_data', batch_size),
steps_per_epoch=len(os.listdir('~/my_data')) // batch_size)

I have recently played with the generators for Keras and I finally managed to prepare an example. It uses random data, so trying to teach NN on it makes no sense, but it's a good illustration of using a python generator for Keras.
Generate some data
import numpy as np
import pandas as pd
data = np.random.rand(200,2)
expected = np.random.randint(2, size=200).reshape(-1,1)
dataFrame = pd.DataFrame(data, columns = ['a','b'])
expectedFrame = pd.DataFrame(expected, columns = ['expected'])
dataFrameTrain, dataFrameTest = dataFrame[:100],dataFrame[-100:]
expectedFrameTrain, expectedFrameTest = expectedFrame[:100],expectedFrame[-100:]
Generator
def generator(X_data, y_data, batch_size):
samples_per_epoch = X_data.shape[0]
number_of_batches = samples_per_epoch/batch_size
counter=0
while 1:
X_batch = np.array(X_data[batch_size*counter:batch_size*(counter+1)]).astype('float32')
y_batch = np.array(y_data[batch_size*counter:batch_size*(counter+1)]).astype('float32')
counter += 1
yield X_batch,y_batch
#restart counter to yeild data in the next epoch as well
if counter >= number_of_batches:
counter = 0
Keras model
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten, Reshape
from keras.layers.convolutional import Convolution1D, Convolution2D, MaxPooling2D
from keras.utils import np_utils
model = Sequential()
model.add(Dense(12, activation='relu', input_dim=dataFrame.shape[1]))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adadelta', metrics=['accuracy'])
#Train the model using generator vs using the full batch
batch_size = 8
model.fit_generator(
generator(dataFrameTrain,expectedFrameTrain,batch_size),
epochs=3,
steps_per_epoch = dataFrame.shape[0]/batch_size,
validation_data = generator(dataFrameTest,expectedFrameTest,batch_size*2),
validation_steps = dataFrame.shape[0]/batch_size*2
)
#without generator
#model.fit(
# x = np.array(dataFrame),
# y = np.array(expected),
# batch_size = batch_size,
# epochs = 3
#)
Output
Epoch 1/3
25/25 [==============================] - 3s - loss: 0.7297 - acc: 0.4750 -
val_loss: 0.7183 - val_acc: 0.5000
Epoch 2/3
25/25 [==============================] - 0s - loss: 0.7213 - acc: 0.3750 -
val_loss: 0.7117 - val_acc: 0.5000
Epoch 3/3
25/25 [==============================] - 0s - loss: 0.7132 - acc: 0.3750 -
val_loss: 0.7065 - val_acc: 0.5000

This is the way I implemented it for reading files any size. And it works like a charm.
import pandas as pd
hdr=[]
for i in range(num_labels+num_features):
hdr.append("Col-"+str(i)) # data file do not have header so I need to
# provide one for pd.read_csv by chunks to work
def tgen(filename):
csvfile = open(filename)
reader = pd.read_csv(csvfile, chunksize=batch_size,names=hdr,header=None)
while True:
for chunk in reader:
W=chunk.values # labels and features
Y =W[:,:num_labels] # labels
X =W[:,num_labels:] # features
X= X / 255 # any required transformation
yield X, Y
csvfile = open(filename)
reader = pd.read_csv(csvfile, chunksize=batchz,names=hdr,header=None)
The back in the main I have
nval=number_of_validation_samples//batchz
ntrain=number_of_training_samples//batchz
ftgen=tgen("training.csv")
fvgen=tgen("validation.csv")
history = model.fit_generator(ftgen,
steps_per_epoch=ntrain,
validation_data=fvgen,
validation_steps=nval,
epochs=number_of_epochs,
callbacks=[checkpointer, stopper],
verbose=2)

I would like to upgrade Vaasha's code with TensorFlow 2.x to achieve training efficiencies as well as ease of data processing. This is particularly useful for image processing.
Process the data using Generator function as Vaasha had generated in the above example or using tf.data.dataset API. The latter approach is very useful when processing any datasets with metadata. For example, MNIST data can be loaded and processed with a few statements.
import tensorflow as tf # Ensure that TensorFlow 2.x is used
tf.compat.v1.enable_eager_execution()
import tensorflow_datasets as tfds # Needed if you are using any of the tf datasets such as MNIST, CIFAR10
mnist_train = tfds.load(name="mnist", split="train")
Use tfds.load the datasets. Once data is loaded and processed (for example, converting categorical variables, resizing, etc.).
Now upgrading keras model using TensorFlow 2.x
model = tf.keras.Sequential() # Tensorflow 2.0 upgrade
model.add(tf.keras.layers.Dense(12, activation='relu', input_dim=dataFrame.shape[1]))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adadelta',
metrics=['accuracy'])
#Train the model using generator vs using the full batch
batch_size = 8
model.fit_generator(generator(dataFrameTrain,expectedFrameTrain,batch_size),
epochs=3,
steps_per_epoch=dataFrame.shape[0]/batch_size,
validation_data=generator(dataFrameTest,expectedFrameTest,batch_size*2),
validation_steps=dataFrame.shape[0]/batch_size*2)
This will upgrade the model to run in TensorFlow 2.x

Related

Why i've got a three different MSE values

I wrote an mlp and want start to tune it to fit a best results. But i've stucked with several different MSE.
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn import metrics
import numpy
import joblib
# load dataset
#dataframe = read_csv("housing.csv", delim_whitespace=True, header=None)
dataframe = read_csv("100.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:6]
Y = dataset[:,6]
# define the model
def larger_model():
# create model
model = Sequential()
model.add(Dense(20, input_dim=6, kernel_initializer='normal', activation='relu'))
model.add(Dense(50, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal', activation='linear'))
# Compile model
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae','mse'])
return model
# evaluate model with standardized dataset
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=larger_model, epochs=100, batch_size=5, verbose=1)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=2)
results = cross_val_score(pipeline, X, Y, cv=kfold)
pipeline.fit(X, Y)
prediction = pipeline.predict(X)
result_test = Y
print("%.2f (%.2f) MSE" % (results.mean(), results.std()))
print('Mean Absolute Error:', metrics.mean_absolute_error(prediction, result_test))
print('Mean Squared Error:', metrics.mean_squared_error(prediction, result_test))
Gives me that result:
Epoch 98/100
200/200 [==============================] - 0s 904us/step - loss: 0.0086 - mae: 0.0669 - mse: 0.0086
Epoch 99/100
200/200 [==============================] - 0s 959us/step - loss: 0.0032 - mae: 0.0382 - mse: 0.0032
Epoch 100/100
200/200 [==============================] - 0s 894us/step - loss: 0.0973 - mae: 0.2052 - mse: 0.0973
200/200 [==============================] - 0s 600us/step
21.959478
-0.03 (0.02) MSE
Mean Absolute Error: 0.1959771416462339
Mean Squared Error: 0.0705598179059006
So i see here a 3 different mse results. Why so and which one i should take in mind to understand an overall model score when i willbe tune it?
Basically what I understood was if you print the results variable then you will get 2 MSE because you used n_splits=2.
-0.03 (0.02) MSE
Above output is the mean or average of the results(MSE) and std of the results(MSE).
Epoch 100/100
200/200 [==============================] - 0s 894us/step - loss: 0.0973 - mae: 0.2052 - mse: 0.0973
Above outputs mse = 0.0973 this is I think for split=2 and it will take only 50% of whole data(X) because remaining 50% it will take as validation data.
Mean Squared Error: 0.0705598179059006
Above output is coming where you are predicting on whole data, not 50% by using best model so obviously, you will get 3 different MSEs for the above 3 prints.
I am also solving a very similar kind of problem, so do one thing divide the dataset into train and test and use train data for training and when you are predicting use test dataset then calculate MSE on test data or else keep this as it is and take Mean Squared Error: 0.0705598179059006 as your final mse.

What is this AssertionError trying to tell me and how do I fix it?

I'm trying to implement an artificial neural network (autoencoder) on my dataset. The error I mentioned is occurring in the fit statement. All the data sets referenced in the code are numpy.ndarrays of float64 type.
The data set was a dataframe initially and then after I split it into train and test I converted them into ndarrays because I believe that's required to run the autoencoder code.
import pandas as pd
import numpy as np
from scipy import stats
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_recall_curve
from sklearn.metrics import recall_score, classification_report, auc, roc_curve
from sklearn.metrics import precision_recall_fscore_support, f1_score
from sklearn.preprocessing import StandardScaler
from pylab import rcParams
from keras.models import Model, load_model
from keras.layers import Input, Dense
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras import regularizers
attempt = ads
msk = np.random.rand(len(attempt)) < 0.8
train_x = attempt[msk]
test_x = attempt[~msk]
train_x = train_x[train_x.Successful_Quote == 0] #where normal transactions
train_x = train_x.drop(['Successful_Quote'], axis=1) #drop the class column
test_y = test_x['Successful_Quote'] #save the class column for the test set
test_x = test_x.drop(['Successful_Quote'], axis=1) #drop the class column
train_x = train_x.values #transform to ndarray
test_x = test_x.values
nb_epoch = 100
batch_size = 128
input_dim = train_x.shape[1] #num of columns, 30
encoding_dim = 14
hidden_dim = int(encoding_dim / 2) #i.e. 7
learning_rate = 1e-7
input_layer = Input(shape=(input_dim, ))
encoder = Dense(encoding_dim, activation="tanh", activity_regularizer=regularizers.l1(learning_rate))(input_layer)
encoder = Dense(hidden_dim, activation="relu")(encoder)
decoder = Dense(hidden_dim, activation='tanh')(encoder)
decoder = Dense(input_dim, activation='relu')(decoder)
autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.compile(metrics=['accuracy'],
loss='mean_squared_error',
optimizer='adam')
cp = ModelCheckpoint(filepath=r"C:\Users\kartik.kumar\Desktop\Pricing Project",
save_best_only=True,
verbose=0)
tb = TensorBoard(log_dir='./logs',
histogram_freq=0,
write_graph=True,
write_images=True)
history = autoencoder.fit(train_x, train_x,
epochs=nb_epoch,
batch_size=batch_size,
shuffle=True,
validation_data=(test_x, test_x),
verbose=1,
callbacks=[cp, tb]).history
Error message:
Train on 4203 samples, validate on 1517 samples
Epoch 1/100
Traceback (most recent call last):
File "<ipython-input-153-aea5b9ca41ad>", line 7, in <module>
callbacks=[cp, tb]).history
File "C:\Anaconda3\lib\site-packages\keras\engine\training.py", line 1039, in fit
validation_steps=validation_steps)
File "C:\Anaconda3\lib\site-packages\keras\engine\training_arrays.py", line 176, in fit_loop
np.random.shuffle(index_array)
File "mtrand.pyx", line 4823, in mtrand.RandomState.shuffle
File "C:\Anaconda3\lib\site-packages\numpy\core\_internal.py", line 298, in __init__
assert self._data.value == ptr
AssertionError
line 298 mentioned in the traceback leads to this:
if ctypes:
self._ctypes = ctypes
# get a void pointer to the buffer, which keeps the array alive
self._data = _get_void_ptr(array)
assert self._data.value == ptr
else:
# fake a pointer-like object that holds onto the reference
self._ctypes = _missing_ctypes()
self._data = self._ctypes.c_void_p(ptr)
self._data._objects = array
I expected the output to start training the model and give me something like this:
Train on 227468 samples, validate on 56962 samples
Epoch 1/100
227468/227468 [==============================] - 7s 29us/step - loss: 0.8688 - acc: 0.4782 - val_loss: 0.8266 - val_acc: 0.5893
Epoch 2/100
227468/227468 [==============================] - 5s 20us/step - loss: 0.7767 - acc: 0.6053 - val_loss: 0.7980 - val_acc: 0.6191
Epoch 3/100
227468/227468 [==============================] - 4s 19us/step - loss: 0.7575 - acc: 0.6291 - val_loss: 0.7855 - val_acc: 0.6376
Epoch 4/100
227468/227468 [==============================] - 4s 19us/step - loss: 0.7473 - acc: 0.6395 - val_loss: 0.7781 - val_acc: 0.6412
My only guess is there is an issue with the datatypes that I've used here.
Not sure if this is helpful, but: My python version is 3.7 and I'm using the latest tensorflow, keras, numpy and sklearn libraries.
I'm getting the same error when I run this as well:
RANDOM_SEED = 314 #used to help randomly select the data points
TEST_PCT = 0.2 # 20% of the data
train_x, test_x = train_test_split(attempt, test_size=TEST_PCT, random_state=RANDOM_SEED)

Compute mse after keras model. Prediction looks to be wrong--updated need to reshape array first

So I would like to compute R2 = 1 - residual_ss/y_ss after keras. I used the prediction model.predict() to compute residual_ss. However, the residual_ss is much larger than y_ss which results in a negative R2. Since residual_ss = n*mse and mse is also the loss function, the code shows the computation for mse after the model:
import keras
keras.__version__
from keras.datasets import boston_housing
import pandas as pd
import numpy as np
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std
test_data -= mean
test_data /= std
from keras import models
from keras import layers
def build_model():
# Because we will need to instantiate
# the same model multiple times,
# we use a function to construct it.
model = models.Sequential()
model.add(layers.Dense(64, activation='relu',
input_shape=(train_data.shape[1],)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
return model
model=build_model()
model.fit(train_data, train_targets, epochs=200, batch_size=32)
#try to get mse
y_pred = model.predict(train_data)
mse=np.mean((train_targets-y_pred)*(train_targets-y_pred))
print(mse)
Here is last 3 epochs and the mse in the end
Epoch 198/200
404/404 [=======] - 0s 17us/step - loss: 3.4695 - mean_absolute_error: 1.3338
Epoch 199/200
404/404 [=======] - 0s 22us/step - loss: 3.5412 - mean_absolute_error: 1.3260
Epoch 200/200
404/404 [=======] - 0s 20us/step - loss: 3.2775 - mean_absolute_error: 1.2858
162.25934358457062
I only use train_data and train_targets here. Why I got a mse not even close to the loss (mse) reported in each epoch? So the prediction is not close to the target. Please help.

Why my validation accuracy is much higher than train accuracy, but the test accuracy is only 0.5?

I am doing some image classification using inception_v3 model in keras, however, my train accuracy is lower than validation during the whole training process. And my validation accuracy is above 0.95 from the first epoch. I also find that train loss is much higher than validation loss. In the end, the test accuracy is 0.5, which is pretty bad.
At first, my optimizer is Adam with learning rate equals to 0.00001, the result is bad. Then I change it to SGD with learning rate of 0.00001, which doesn't make any change to the bad result. I also tried to increase the learning rate to 0.1, but the test accuracy is still around 0.5
import numpy as np
import pandas as pd
import keras
from keras import layers
from keras.applications.inception_v3 import preprocess_input
from keras.models import Model
from keras.layers.core import Dense
from keras.layers import GlobalAveragePooling2D
from keras.optimizers import Adam, SGD, RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.utils import plot_model
from keras.models import model_from_json
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt
import math
import copy
import pydotplus
train_path = 'data/train'
valid_path = 'data/validation'
test_path = 'data/test'
top_model_weights_path = 'model_weigh.h5'
# number of epochs to train top model
epochs = 100
# batch size used by flow_from_directory and predict_generator
batch_size = 2
img_width, img_height = 299, 299
fc_size = 1024
nb_iv3_layers_to_freeze = 172
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
# this is the augmentation configuration we will use for testing:
# only rescaling
valid_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
train_batches =
train_datagen.flow_from_directory(train_path,
target_size=(img_width, img_height),
classes=None,
class_mode='categorical',
batch_size=batch_size,
shuffle=True)
valid_batches =
valid_datagen.flow_from_directory(valid_path,
target_size=(img_width,img_height),
classes=None,
class_mode='categorical',
batch_size=batch_size,
shuffle=True)
test_batches =
ImageDataGenerator().flow_from_directory(test_path,
target_size=(img_width,
img_height),
classes=None,
class_mode='categorical',
batch_size=batch_size,
shuffle=False)
nb_train_samples = len(train_batches.filenames)
# get the size of the training set
nb_classes_train = len(train_batches.class_indices)
# get the number of classes
predict_size_train = int(math.ceil(nb_train_samples / batch_size))
nb_valid_samples = len(valid_batches.filenames)
nb_classes_valid = len(valid_batches.class_indices)
predict_size_validation = int(math.ceil(nb_valid_samples / batch_size))
nb_test_samples = len(test_batches.filenames)
nb_classes_test = len(test_batches.class_indices)
predict_size_test = int(math.ceil(nb_test_samples / batch_size))
def add_new_last_layer(base_model, nb_classes):
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(fc_size, activation='relu')(x)
pred = Dense(nb_classes, activation='softmax')(x)
model = Model(input=base_model.input, output=pred)
return model
# freeze base_model layer in order to get the bottleneck feature
def setup_to_transfer_learn(model, base_model):
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer=Adam(lr=0.00001),
loss='categorical_crossentropy',
metrics=['accuracy'])
base_model = keras.applications.inception_v3.InceptionV3(weights='imagenet', include_top=False)
model = add_new_last_layer(base_model, nb_classes_train)
setup_to_transfer_learn(model, base_model)
model.summary()
train_labels = train_batches.classes
train_labels = to_categorical(train_labels, num_classes=nb_classes_train)
validation_labels = valid_batches.classes
validation_labels = to_categorical(validation_labels, num_classes=nb_classes_train)
history = model.fit_generator(train_batches,
epochs=epochs,
steps_per_epoch=nb_train_samples // batch_size,
validation_data=valid_batches,
validation_steps=nb_valid_samples // batch_size,
class_weight='auto')
# save model to json
model_json = model.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
# serialize model to HDF5
model.save_weights(top_model_weights_path)
print("Saved model to disk")
# model visualization
plot_model(model,
show_shapes=True,
show_layer_names=True,
to_file='model.png')
(eval_loss, eval_accuracy) = model.evaluate_generator(
valid_batches,
steps=nb_valid_samples // batch_size,
verbose=1)
print("[INFO] evaluate accuracy: {:.2f}%".format(eval_accuracy * 100))
print("[INFO] evaluate loss: {}".format(eval_loss))
test_batches.reset()
predictions = model.predict_generator(test_batches,
steps=nb_test_samples / batch_size,
verbose=0)
# print(predictions)
predicted_class_indices = np.argmax(predictions, axis=1)
# print(predicted_class_indices)
labels = train_batches.class_indices
labels = dict((v, k) for k, v in labels.items())
final_predictions = [labels[k] for k in predicted_class_indices]
# print(final_predictions)
# save as csv file
filenames = test_batches.filenames
results = pd.DataFrame({"Filename": filenames,
"Predictions": final_predictions})
results.to_csv("results.csv", index=False)
# evaluation test result
(test_loss, test_accuracy) = model.evaluate_generator(
test_batches,
steps=nb_train_samples // batch_size,
verbose=1)
print("[INFO] test accuracy: {:.2f}%".format(test_accuracy * 100))
print("[INFO] test loss: {}".format(test_loss))
Here is a brief summary of training process:
Epoch 1/100
2000/2000 [==============================] - 146s 73ms/step - loss: 0.4941 - acc: 0.7465 - val_loss: 0.1612 - val_acc: 0.9770
Epoch 2/100
2000/2000 [==============================] - 140s 70ms/step - loss: 0.4505 - acc: 0.7725 - val_loss: 0.1394 - val_acc: 0.9765
Epoch 3/100
2000/2000 [==============================] - 139s 70ms/step - loss: 0.4505 - acc: 0.7605 - val_loss: 0.1643 - val_acc: 0.9560
......
Epoch 98/100
2000/2000 [==============================] - 141s 71ms/step - loss: 0.1348 - acc: 0.9467 - val_loss: 0.0639 - val_acc: 0.9820
Epoch 99/100
2000/2000 [==============================] - 140s 70ms/step - loss: 0.1495 - acc: 0.9365 - val_loss: 0.0780 - val_acc: 0.9770
Epoch 100/100
2000/2000 [==============================] - 138s 69ms/step - loss: 0.1401 - acc: 0.9458 - val_loss: 0.0471 - val_acc: 0.9890
Here is the result that I get:
[INFO] evaluate accuracy: 98.55%
[INFO] evaluate loss: 0.05201659869024259
2000/2000 [==============================] - 47s 23ms/step
[INFO] test accuracy: 51.70%
[INFO] test loss: 7.737395915810134
I wish someone can help me deal with this problem.
As the code is now, you're not freezing the layers of the model for transfer learning. In the setup_to_transfer_learn you're freezing the layer in base_model, and then compiling the new model (containing layers from the base model), but not actually freezing on the new model. Just change setup_to_transfer_learn:
def setup_to_transfer_learn(model):
for layer in model.layers[:-3]: # since you added three new layers (which should not freeze)
layer.trainable = False
model.compile(optimizer=Adam(lr=0.00001),
loss='categorical_crossentropy',
metrics=['accuracy'])
Then call the function like this:
model = add_new_last_layer(base_model, nb_classes_train)
setup_to_transfer_learn(model)
You should see a large difference in the number of trainable parameters when calling model.summary()
Finally, I solved the problem. I forget to do image preprocessing to my test data. After I add this, everything works really fine.
I change this:
test_batches = ImageDataGenerator().flow_from_directory(test_path,
target_size=(img_width, img_height),
classes=None,
class_mode='categorical',
batch_size=batch_size,
shuffle=False)
to this:
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_batches = test_datagen.flow_from_directory(test_path,
target_size=(img_width, img_height),
classes=None,
class_mode='categorical',
batch_size=batch_size,
shuffle=False)
And the test accuracy is 0.98, test loss is 0.06.
What actually happens is that when you use preprocessing the model may actually start learning those techniques. One way to check if your model is learning good features is using Grad-CAM

Keras fit_generator(), is this the correct usage?

So far I have come up with this hacky code here, this code runs and outputs
Epoch 10/10
1/3000 [..............................] - ETA: 27s - loss: 0.3075 - acc: 0.7270
6/3000 [..............................] - ETA: 54s - loss: 0.3075 - acc: 0.7355
.....
2996/3000 [============================>.] - ETA: 0s - loss: 0.3076 - acc: 0.7337
2998/3000 [============================>.] - ETA: 0s - loss: 0.3076 - acc: 0.7337
3000/3000 [==============================] - 59s - loss: 0.3076 - acc: 0.7337
Traceback (most recent call last):
File "C:/Users/Def/PycharmProjects/KerasUkExpenditure/TweetParsing.py", line 140, in <module>
(loss, acc) = model.fit_generator(generator(tokenizer=t, startIndex=startIndex,batchSize=amountOfData),
TypeError: 'History' object is not iterable
Process finished with exit code 1
I'm confused by "'History' object is not iterable", what does this mean?
This is the first time I've tried to do batch training and testing and I'm not sure i've implemented it correctly as most the examples I've seen online are for images. Here is the code
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.preprocessing.text import Tokenizer
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import re
"""
amount of samples out to the 1 million to use, my 960m 2GB can only handel
about 30,000ish at the moment depending on the amount of neurons in the
deep layer and the amount fo layers.
"""
maxSamples = 3000
#Load the CSV and get the correct columns
data = pd.read_csv("C:\\Users\\Def\\Desktop\\Sentiment Analysis Dataset1.csv")
dataX = pd.DataFrame()
dataY = pd.DataFrame()
dataY[['Sentiment']] = data[['Sentiment']]
dataX[['SentimentText']] = data[['SentimentText']]
dataY = dataY.iloc[0:maxSamples]
dataX = dataX.iloc[0:maxSamples]
testY = dataY.iloc[-1: -maxSamples]
testX = dataX.iloc[-1: -maxSamples]
"""
here I filter the data and clean it up bu remove # tags and hyper links and
also any characters that are not alpha numeric, I then add it to the vec list
"""
def removeTagsAndLinks(dataframe):
vec = []
for x in dataframe.iterrows():
#Removes Hyperlinks
zero = re.sub("(http|ftp|https)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,#?^=%&:/~+#-]*[\w#?^=%&/~+#-])?", "", x[1].values[0])
#Removes # tags
one = re.sub("#\\w+", '', zero)
#keeps only alpha-numeric chars
two = re.sub("\W+", ' ', one)
vec.append(two)
return vec
vec = removeTagsAndLinks(dataX)
xTest = removeTagsAndLinks(testX)
yTest = removeTagsAndLinks(testY)
"""
This loop looks for any Tweets with characters shorter than 2 and once found write the
index of that Tweet to an array so I can remove from the Dataframe of sentiment and the
list of Tweets later
"""
indexOfBlankStrings = []
for index, string in enumerate(vec):
if len(string) < 2:
del vec[index]
indexOfBlankStrings.append(index)
for row in indexOfBlankStrings:
dataY.drop(row, axis=0, inplace=True)
"""
This makes a BOW model out of all the tweets then creates a
vector for each of the tweets containing all the words from
the BOW model, each vector is the same size becuase the
network expects it
"""
def vectorise(tokenizer, list):
tokenizer.fit_on_texts(list)
return tokenizer.texts_to_matrix(list)
#Make BOW model and vectorise it
t = Tokenizer(lower=False, num_words=1000)
dim = vectorise(t, vec)
xTest = vectorise(t, xTest)
"""
Here im experimenting with multiple layers of the total
amount of words in the syllabus divided by ^2 - This
has given me quite accurate results compared to random guess's
of amount of neron's and amounts of layers.
"""
l1 = int(len(dim[0]) / 4) #To big for my GPU
l2 = int(len(dim[0]) / 8) #To big for my GPU
l3 = int(len(dim[0]) / 16)
l4 = int(len(dim[0]) / 32)
l5 = int(len(dim[0]) / 64)
l6 = int(len(dim[0]) / 128)
#Make the model
model = Sequential()
model.add(Dense(l1, input_dim=dim.shape[1]))
model.add(Dropout(0.15))
model.add(Dense(l2))
model.add(Dense(l1))
model.add(Dense(l3))
model.add(Dropout(0.2))
model.add(Dense(l4))
model.add(Dense(1, activation='relu'))
#Compile the model
model.compile(optimizer='RMSProp', loss='binary_crossentropy', metrics=['acc'])
"""
This here will use multiple batches to train the model.
startIndex:
This is the starting index of the array for which you want to
start training the network from.
dataRange:
The number of elements use to train the network in each batch so
since dataRange = 1000 this mean it goes from
startIndex...dataRange OR 0...1000
amountOfEpochs:
This is kinda self explanitory, the more Epochs the more it
is supposed to learn AKA updates the optimisation algo numbers
"""
amountOfEpochs = 10
dataRange = 1000
startIndex = 0
def generator(tokenizer, batchSize, totalSize=maxSamples, startIndex=0):
f = tokenizer.texts_to_sequences(vec[startIndex:totalSize])
l = np.asarray(dataY.iloc[startIndex:totalSize])
while True:
for i in range(1000, totalSize, batchSize):
batch_features = tokenizer.sequences_to_matrix(f[startIndex: batchSize])
batch_labels = l[startIndex: batchSize]
yield batch_features, batch_labels
##This runs the model for batch AKA load a little them process then load a little more
for amountOfData in range(1000, maxSamples, 1000):
#(loss, acc) = model.train_on_batch(x=dim[startIndex:amountOfData], y=np.asarray(dataY.iloc[startIndex:amountOfData]))
(loss, acc) = model.fit_generator(generator(tokenizer=t, startIndex=startIndex,batchSize=amountOfData),
steps_per_epoch=maxSamples, epochs=amountOfEpochs,
validation_data=(np.array(xTest), np.array(yTest)))
startIndex += 1000
The part towards the bottom is where I've tried to implement the fit_generator() and make my own generator, I wanted to load say 75,000 maxSamples then train the network 1000 samples at a time until it reaches the maxSample var which is why I've setup range to do the (0, maxSample, 1000) which I use in the generator() was this the correct use?
I ask because my network is not using the validation data and it seems to fit to the data extremely quickly which suggests overfitting or just using a very small dataset. am I iterating over all the maxSamples int he correct way? or am I just looping over the first iterations several times?
Thanks
The problem lies in this line:
(loss, acc) = model.fit_generator(...)
as fit_generator returns a single object of keras.callbacks.history class. That's why you have this error as singe object is not iterable. In order to get loss lists you need to retrieve them from history field in this callback which is a dictionary of recorded losses:
history = model.fit_generator(...)
loss = history.history["loss"]

Resources