Im using the same convolutional neural network structure on Keras and Lasagne.
Right now, i just changed to a simple network to see if it changed anything, but it didnt.
On Keras it works fine, it outputs values between 0 and 1 with a good accuracy.
On lasagne, the values dont come mostly wrong. It seems the output is the same as the input.
Basically : It outputs and train fine on keras. But not on my lasagne Version
Structure On Lasagne :
def structure(w=5, h=5):
try:
input_var = T.tensor4('inputs')
target_var = T.bmatrix('targets')
network = lasagne.layers.InputLayer(shape=(None, 1, h, w), input_var=input_var)
network = lasagne.layers.Conv2DLayer(
network, num_filters=64, filter_size=(3, 3), stride=1, pad=0,
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
network = lasagne.layers.Conv2DLayer(
network, num_filters=64, filter_size=(3, 3), stride=1, pad=0,
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2), stride=None, pad=(0, 0), ignore_border=True)
network = lasagne.layers.DenseLayer(
lasagne.layers.dropout(network, p=0.5),
num_units=256,
nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform())
network = lasagne.layers.DenseLayer(
lasagne.layers.dropout(network, p=0.5),
num_units=1,
nonlinearity=lasagne.nonlinearities.sigmoid)
print "...Output", lasagne.layers.get_output_shape(network)
return network, input_var, target_var
except Exception as inst:
print ("Failure to Build NN !", inst.message, (type(inst)), (inst.args), (inst))
return None
On Keras :
def getModel(w,h):
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
model = Sequential()
model.add(Convolution2D(64, 3, 3, border_mode='valid', input_shape=(1, h, w)))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Convolution2D(128, 3, 3, border_mode='valid'))
model.add(Activation('relu'))
model.add(Convolution2D(128, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
#
model.add(Flatten())
#
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.25))
#
model.add(Dense(1))
model.add(Activation('sigmoid'))
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer='sgd')
return model
And to train on Keras..
model.fit(x, y, batch_size=512, nb_epoch=500, verbose=2, validation_split=0.2, shuffle=True, show_accuracy=True)
And to train and predict on Lasagne :
To Train :
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.binary_crossentropy(prediction, target_var)
loss = loss.mean()
params = lasagne.layers.get_all_params(network, trainable=True)
# updates = lasagne.updates.sgd(loss, params, learning_rate=learning_rate)
updates = lasagne.updates.nesterov_momentum(loss_or_grads=loss, params=params, learning_rate=learning_rate, momentum=momentum_rho)
#
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.binary_crossentropy(test_prediction, target_var)
test_loss = test_loss.mean()
# Accuracy
test_acc = lasagne.objectives.binary_accuracy(test_prediction, target_var)
test_acc = test_acc.mean()
train_fn = theano.function([input_var, target_var], loss, updates=updates)
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
And im using these iterators which i hope isnt the cause of it.. Maybe it is?
def iterate_minibatches_getOutput(self, inputs, batchsize):
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt]
def iterate_minibatches(self, inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
To Predict :
test_prediction = lasagne.layers.get_output(self.network, deterministic=True)
predict_fn = theano.function([self.input_var], test_prediction)
index = 0
for batch in self.iterate_minibatches_getOutput(inputs=submission_feature_x, batchsize=self.batch_size):
inputs = batch
y = predict_fn(inputs)
start = index * self.batch_size
end = (index + 1) * self.batch_size
predictions[index * self.batch_size:self.batch_size * (index + 1)] = y
index += 1
print "debug -->", predictions[0:10]
print "debug max ---->", np.max(predictions)
print "debug min ----->", np.min(predictions)
This Prints :
debug --> [[ 0.3252553 ]
[ 0.3252553 ]
[ 0.3252553 ]
[ 0.3252553 ]
[ 0.3252553 ]
[ 0.3252553 ]
[ 0.3252553 ]
[ 0.3252553 ]
[ 0.3252553 ]
[ 0.32534513]]
debug max ----> 1.0
debug min -----> 0.0
The results are totally wrong.
However, what confuses me, is that it outputs fine on keras.
Also, the validation acc never changes:
Epoch 2 of 30 took 9.5846s
Training loss: 0.22714619
Validation loss: 0.17278196
Validation accuracy: 95.85454545 %
Epoch 3 of 30 took 9.6437s
Training loss: 0.22646923
Validation loss: 0.17249792
Validation accuracy: 95.85454545 %
Epoch 4 of 30 took 9.6464s
Training loss: 0.22563262
Validation loss: 0.17235395
Validation accuracy: 95.85454545 %
Epoch 5 of 30 took 10.5069s
Training loss: 0.22464556
Validation loss: 0.17226825
Validation accuracy: 95.85454545 %
...
Please Help!
What am i doing wrong?
These are the shapes being used:
x_train.shape (102746, 1, 17, 17)
y_train.shape (102746, 1)
x_val.shape (11416, 1, 17, 17)
y_val.shape (11416, 1)
THe problem was :
target_var = T.bmatrix('targets')
Should be :
target_var = T.fmatrix('targets')
Also, the learning rate was too low.
And on the Keras Script, there was another error :
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer='sgd')
Should be :
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer=sgd)
Related
I am trying to predict the outcome of a football fixture (using backpropagation) across 3 classes: home team wins, draw or away team wins; they are encoded as 0, 1, and 2, respectively.
Features: home_team, away_team, home_score, away_score, home_adv, match_imp
Target: outcome_final
Training, validation and test tensors:
X_train: torch.Size([25365, 554])
y_train: torch.Size([25365])
X_test: torch.Size([5436, 554])
y_test: torch.Size([5436])
X_val: torch.Size([5436, 554])
y_val: torch.Size([5436])
Network architecture:
Net(
(fc1): Linear(in_features=555, out_features=100, bias=True)
(fc2): Linear(in_features=100, out_features=3, bias=True)
(dropout): Dropout(p=0.2, inplace=False)
)
Weights and biases are generated at first:
fc1.weight: torch.Size([100, 554])
fc1.bias: torch.Size([100])
fc2.weight: torch.Size([3, 100])
fc2.bias: torch.Size([3])
ReLU activation function is used for the hidden layer, and Softmax activation function is used for the output layer.
The following code returns the error below.
# Creating the class for the neural network
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(554, 100)
self.fc2 = nn.Linear(100, 3)
self.dropout = nn.Dropout(p = 0.2)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = F.softmax(self.fc2(x), dim = 1)
return x
# Initializing model
model = Net().to(device)
X_train.to(device)
y_train.to(device)
X_val.to(device)
y_val.to(device)
# Initializing weights and biases
model.fc1.weight.data.normal_(0, 0.01)
model.fc1.bias.data.normal_(0, 0.01)
model.fc2.weight.data.normal_(0, 0.01)
model.fc2.bias.data.normal_(0, 0.01)
# TRAIN the model
def train_model(model, X_train, y_train, X_val, y_val, epochs = 10, learning_rate = 0.003):
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
# Losses and accuracies
train_losses = []
val_losses = []
train_accs = []
val_accs = []
# Training happens here
for epoch in range(epochs):
# Shuffling data
permutation = torch.randperm(X_train.size()[0]).to(device)
X_train = X_train[permutation]
y_train = y_train[permutation]
# Creating batches
batch_size = 5
n_batches = X_train.size()[0] // batch_size
for i in range(n_batches):
# Zeroing gradients
optimizer.zero_grad()
# Forward pass
output = model(X_train[i * batch_size : (i + 1) * batch_size])
loss = criterion(output, y_train[i * batch_size : (i + 1) * batch_size].long())
# Backward pass
loss.backward()
# Updating weights and biases
optimizer.step()
# Sending to CPU
model.to('cpu')
# Training loss and accuracy
train_loss = criterion(model(X_train), y_train.long())
train_losses.append(train_loss)
train_acc = accuracy_score(y_train, torch.argmax(model(X_train), dim = 1))
train_accs.append(train_acc)
print('Epoch: ', epoch + 1, 'Training Loss: ', train_loss, 'Training Accuracy: ', train_acc)
# Validation loss and accuracy
val_loss = criterion(model(X_val), y_val.long())
val_losses.append(val_loss)
val_acc = accuracy_score(y_val, torch.argmax(model(X_val), dim = 1))
val_accs.append(val_acc)
print('Epoch: ', epoch + 1, 'Validation Loss: ', val_loss, 'Validation Accuracy: ', val_acc)
# Sending back to GPU
model.to(device)
X_train.to(device)
y_train.to(device)
X_val.to(device)
y_val.to(device)
return train_losses, val_losses, train_accs, val_accs
# Let's train the model
model = Net().to(device)
train_losses, val_losses, train_accs, val_accs = train_model(model, X_train, y_train, X_val, y_val)
ERROR:
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_addmm)
I have tried ensuring all training and validation sets are converted as tensors and sent to the GPU. Yet, I am still getting this error.
Am I missing something here? Thanks in advance.
I build my CNN by using the the code:
def arbitrary_functionality(tensor):
return tf.abs(tensor)
def my_filter(shape, dtype=None):
f = np.array([
[[[-1]], [[2]], [[-2]], [[2]], [[-1]]],
[[[2]], [[-6]], [[8]], [[-6]], [[2]]],
[[[-2]], [[8]], [[-12]], [[8]], [[-2]]],
[[[2]], [[-6]], [[8]], [[-6]], [[2]]],
[[[-1]], [[2]], [[-2]], [[2]], [[-1]]]])
assert f.shape == shape
return K.variable(f, dtype='float32')
input_layer = Input(shape=(256, 256, 1))
conv = Conv2D(1, [5, 5], kernel_initializer=my_filter, input_shape=(256, 256, 1), trainable=True, padding='same')(input_layer)
conv = Conv2D(8, (5, 5), padding='same', strides=1, use_bias=False)(conv)
lambda_layer = Lambda(arbitrary_functionality)(conv)
output_layer = Activation(activation='tanh')(lambda_layer)
output_layer = AveragePooling2D(pool_size= (5, 5), strides=2)(output_layer)
hidden = Dense(256)(output_layer)
hidden = LeakyReLU(alpha=0.2)(hidden)
output = Dense(2, activation='softmax')(hidden)
model = Model(inputs=input_layer, outputs=output)
# Callback for loss logging per epoch
class LossHistory(Callback):
def on_train_begin(self, logs={}):
self.losses = []
self.val_losses = []
def on_epoch_end(self, batch, logs={}):
self.losses.append(logs.aget('loss'))
self.val_losses.append(logs.get('val_loss'))
history = LossHistory()
tensorboard = TensorBoard (log_dir='E:/logs/trail' , histogram_freq=0, write_graph=True , write_images=False)
adam = keras.optimizers.Adam(lr= lrate, beta_1= 0.9, beta_2= 0.999, epsilon= 1e-08, decay= decay)
model.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy', 'mse'])
batch_si = 64
fitted_model = model.fit(X_train, y_train, batch_size= batch_si, callbacks=[tensorboard], epochs=epochs, verbose=1, validation_split= 0.2 , shuffle=True)
# Save Model
model.save('E:/models/trail.h5', overwrite = True)
model.save_weights('E:/models/weights_trail.hdf5', overwrite=True)
# Evaluate the model
scores = model.evaluate(X_test, y_test, batch_size=batch_si, verbose=1)
print("Model Accuracy: {:5.2f}%".format(100*scores[1]))
# Load and Evaluate the Model
new_model = tf.keras.models.load_model('E:/models/trail.h5', custom_objects={'tf': tf})
new_model.load_weights('E:/models/trail.hdf5')
new_model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy', 'mse'])
scores = new_model.evaluate(X_test, y_test, verbose=1)
print("Accuracy After Model Reloaded: {:5.2f}%".format(100*scores[1]))
Now problem is, I can evaluate my output successfully before save an reload the model. But when i reload the trained model file and try to evaluate output, I am getting error the following error:
ValueError: Unknown initializer: my_filter
You have to register custom function name (see here: https://www.tensorflow.org/guide/keras/save_and_serialize#custom_objects):
new_model = tf.keras.models.load_model('E:/models/trail.h5', custom_objects={'my_filter': my_filter, 'tf': tf})
I am training a small network and the training seems to go fine, the val loss decreases, I reach validation accuracy around 80, and it actually stops training once there is no more improvement (patience=10). It trained for 40 epochs. However, it keeps predicting only one class for every test image! I tried to initialize the conv layers randomly, I added regularizers, I switched from Adam to SGD, I added clipvalue, I added dropouts. I also switched to softmax (I have only two labels but I saw some recommendation on using softmax and Dense layer with 2 neurons). Some or one of these helped with the overfitting, but nothing worked for the prediction problem. The data is balanced, though it is a small dataset, so it doesn't make sense that it reaches 80% if it predicts the same labels for evaluation set as well.
What is wrong with my model and how can I fix it? Any comments are welcome.
#Import some packages to use
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
import os
from keras.regularizers import l2
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.initializers import RandomNormal
os.environ["CUDA_VISIBLE_DEVICES"]="0"
epochs = 200
callbacks = []
#schedule = None
decay = 0.0
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-5, mode='min')
train_dir = '/home/d/Desktop/s/data/train'
eval_dir = '/home/d/Desktop/s/data/eval'
test_dir = '/home/d/Desktop/s/data/test'
# create a data generator
train_datagen = ImageDataGenerator(rescale=1./255, #Scale the image between 0 and 1
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,)
val_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
test_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
# load and iterate training dataset
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(224,224),class_mode='categorical', batch_size=16, shuffle='True', seed=42)
# load and iterate validation dataset
val_generator = val_datagen.flow_from_directory(eval_dir, target_size=(224,224),class_mode='categorical', batch_size=16, shuffle='True', seed=42)
# load and iterate test dataset
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(224,224), class_mode=None, batch_size=1, shuffle='False', seed=42)
#We will use a batch size of 32. Note: batch size should be a factor of 2.***4,8,16,32,64...***
#batch_size = 4
#from keras import layers
from keras import models
from keras import optimizers
#from keras.layers import Dropout
#from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img
model = models.Sequential()
model.add(Conv2D(64, (3, 3), activation='relu', name='block1_conv1', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05), input_shape=(224, 224, 3)))
model.add(Conv2D(64, (3, 3), activation='relu', name='block1_conv2', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv1', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv2',kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(MaxPooling2D((2, 2), name='block2_pool'))
model.add(Dropout(0.2))
model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv1', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv2', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv3', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(MaxPooling2D((2, 2), name='block3_pool'))
model.add(Dropout(0.2))
#model.add(layers.Conv2D(512, (3, 3), activation='relu', name='block4_conv1'))
#model.add(layers.Conv2D(512, (3, 3), activation='relu', name='block4_conv2'))
#model.add(layers.Conv2D(512, (3, 3), activation='relu', name='block4_conv3'))
#model.add(layers.MaxPooling2D((2, 2), name='block4_pool'))
model.add(Flatten())
model.add(Dense(256, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), activation='relu', kernel_initializer='he_uniform'))
model.add(Dropout(0.5))
model.add(Dense(2, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), activation='softmax'))
#Lets see our model
model.summary()
#We'll use the RMSprop optimizer with a learning rate of 0.0001
#We'll use binary_crossentropy loss because its a binary classification
#model.compile(loss='binary_crossentropy', optimizer=optimizers.SGD(lr=1e-5, momentum=0.9), metrics=['acc'])
model.compile(loss='categorical_crossentropy',
#optimizer=optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-08, decay=decay),
optimizer=optimizers.SGD(lr= 0.0001, clipvalue = 0.5, decay=1e-6, momentum=0.9, nesterov=True),
metrics=['accuracy'])
#The training part
#We train for 64 epochs with about 100 steps per epoch
history = model.fit_generator(train_generator,
steps_per_epoch=train_generator.n // train_generator.batch_size,
epochs=epochs,
validation_data=val_generator,
validation_steps=val_generator.n // val_generator.batch_size,
callbacks=[earlyStopping, mcp_save]) #, reduce_lr_loss])
#Save the model
model.save_weights('/home/d/Desktop/s/categorical_weights.h5')
model.save('/home/d/Desktop/s/categorical_model_keras.h5')
#lets plot the train and val curve
#get the details form the history object
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and Validation accuracy')
plt.legend()
plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()
model.evaluate_generator(generator=val_generator, steps=val_generator.n // val_generator.batch_size)
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
test_generator.reset()
pred=model.predict_generator(test_generator,
steps=STEP_SIZE_TEST,
verbose=1)
predicted_class_indices=np.argmax(pred,axis=1)
labels = (train_generator.class_indices)
np.save('/home/d/Desktop/s/classes', labels)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]
filenames=test_generator.filenames
results=pd.DataFrame({"Filename":filenames,
"Predictions":predictions})
results.to_csv("categorical_results.csv",index=False)
One of the problems that could lead to such behavior is imbalanced dataset. Your model found out that if it predicts the dominant class each time, it would get a good results.
There are many ways to tackle an imbalance dataset. Here is a good tutorial.
One of the easiest yet powerful solution is to apply higher penalty to your loss if it wrongly predicted the smaller class. This can be implemented in keras by setting the parameter class_weight in the fitor fit_generator function.
It can be a dictionary of example:
class_weight = {0: 0.75, 1: 0.25} # does not necessarily add to up 1.
history = model.fit_generator(train_generator,
steps_per_epoch=train_generator.n // train_generator.batch_size,
epochs=epochs,
class_weight= class_weight, # this is the important part
validation_data=val_generator,
validation_steps=val_generator.n // val_generator.batch_size,
callbacks=[earlyStopping, mcp_save]) #, reduce_lr_loss])
Adding to Coderji's answer, it might also prove advantageous to counter class imbalance using stratified k-fold cross-validation, with k = 5 being common practice. This basically splits your data set up into k splits like regular cross-validation, but also stratifies these splits. In the case of class imbalance, each of these splits contain over-/undersampled classes compensating for their lower/higher occurence within the data set.
As of yet Keras does not have it's own way to use stratified k-fold cross-validation. Instead it's advised to use sklearn's StratifiedKFold. This article gives a detailed overview how to achieve this in Keras,
with the gist of it being:
from sklearn.model_selection import StratifiedKFold# Instantiate the cross validator
skf = StratifiedKFold(n_splits=kfold_splits, shuffle=True)# Loop through the indices the split() method returns
for index, (train_indices, val_indices) in enumerate(skf.split(X, y)):
print "Training on fold " + str(index+1) + "/10..." # Generate batches from indices
xtrain, xval = X[train_indices], X[val_indices]
ytrain, yval = y[train_indices], y[val_indices] # Clear model, and create it
model = None
model = create_model()
# Debug message I guess
# print "Training new iteration on " + str(xtrain.shape[0]) + " training samples, " + str(xval.shape[0]) + " validation samples, this may be a while..."
history = train_model(model, xtrain, ytrain, xval, yval)
accuracy_history = history.history['acc']
val_accuracy_history = history.history['val_acc']
print "Last training accuracy: " + str(accuracy_history[-1]) + ", last validation accuracy: " + str(val_accuracy_history[-1])
create_model() returns a compiled Keras model
train_model() returns last history object of its last model.fit() operation
I'm trying to transition from Keras to PYTorch.
After reading tutorials and similar questions, I came up with the following simple models to test. However, the two models below gives me very different scores: Keras (0.9), PyTorch (0.03).
Could someone give me guidance?
Basically my dataset has 120 features and multilabels with 3 classes that look like below.
[
[1,1,1],
[0,1,1],
[1,0,0],
...
]
def score(true, pred):
lrl = label_ranking_loss(true, pred)
lrap = label_ranking_average_precision_score(true, pred)
print('LRL:', round(lrl), 'LRAP:', round(lrap))
#Keras:
model= Sequential()
model.add(Dense(60, activation="relu", input_shape=(120,)))
model.add(Dense(30, activation="relu"))
model.add(Dense(3, activation="sigmoid"))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=32, epochs=100)
pred = model.predict(x_test)
score(y_test, pred)
#PyTorch
model = torch.nn.Sequential(
torch.nn.Linear(120, 60),
torch.nn.ReLU(),
torch.nn.Linear(60, 30),
torch.nn.ReLU(),
torch.nn.Linear(30, 3),
torch.nn. Sigmoid())
loss_fn = torch.nn. BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 100
batch_size = 32
n_batch = int(x_train.shape[0]/batch_size)
for epoch in range(epochs):
avg_cost = 0
for i in range(n_batch):
x_batch = x_train[i*batch_size:(i+1)*batch_size]
y_batch = y_train[i*batch_size:(i+1)*batch_size]
x, y = Variable(torch.from_numpy(x_batch).float()), Variable(torch.from_numpy(y_batch).float(), requires_grad=False)
pred = model(x)
loss = loss_fn(pred, y)
loss.backward()
optimizer.step()
avg_cost += loss.item()/n_batch
print(epoch, avg_cost)
x, y = Variable(torch.from_numpy(x_test).float()), Variable(torch.from_numpy(y_test).float(), requires_grad=False)
pred = model(x)
score(y_test, pred.data.numpy())
You need to call optimizer.zero_grad() at the start of each iteration, otherwise the gradients from different batches just keep getting accumulated.
I'm new to keras. I've adopted my model from one of the examples and it worked surprisinly well.
Suddenly my model started to predict always only one class from many. I think, the only change I made to the model is adding more samples.
Now I'm lost and have no idea, how to find out the cause.
Could someone tell me where to start and what to check?
Just in case, here is the code. The application learns from vew sorted images to sort remaining unsorted.
The images look like this, 800 per class in training set and 300 per class in test set. Validation and test are the same sets.
import numpy as np
from keras.models import model_from_json
from keras.preprocessing import image
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
from shutil import copyfile
# dimensions of the images.
img_width, img_height = 64, 64
input_dirs = [
#list of dirs to sort with model,
]
out_directory = 'dir_of_auto_sorted_images'
train_base = 'dir_with_training_images'
train_dir = 'train'
val_dir = 'test'
test_dir = 'test'
nb_train_samples = 800*5 #800 training images per class, flipped V/H and zoomed in generator
nb_validation_samples = 1000 #just a number
nb_test_samples = 300*5
epochs = 100
batch_size = 32
#Read class names from training dir. In test in val they should be the same
tmp = os.path.join(train_base, train_dir)
classes = os.listdir(tmp)
classes.sort() #Classes in prediction seem to be sorted by dir_name
for c in classes: #Create output for predicted, where to copy unsorted from model
try:
os.makedirs(os.path.join(out_directory,c))
except:
pass
#############################
# Is right got gray-scale images? Shouldn't be 1 instead of 3
if K.image_data_format() == 'channels_first':
input_shape = (1, img_width, img_height)
else:
input_shape = (img_width, img_height, 1)
# Define model as proposed in keras tutorials
model = Sequential()
model.add(Conv2D(64, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(len(classes), activation='softmax'))
model.add(Activation('sigmoid'))
CreateModel = 1
if CreateModel:
model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
test_datagen = ImageDataGenerator(
zoom_range=0.2,
horizontal_flip=True,
vertical_flip=True)
train_datagen = ImageDataGenerator(
zoom_range=0.2,
horizontal_flip=True,
vertical_flip=True)
train_generator = train_datagen.flow_from_directory(
os.path.join(train_base, train_dir),
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical',
color_mode='grayscale',
#save_to_dir=out_directory+"\\gendebug"
)
validation_generator = train_datagen.flow_from_directory(
os.path.join(train_base, val_dir),
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical',
color_mode='grayscale'
)
test_generator = test_datagen.flow_from_directory(
os.path.join(train_base, test_dir)
, target_size=(img_width, img_height)
, batch_size=batch_size
, class_mode='categorical',
color_mode='grayscale'
# ,save_to_dir=out_dir
)
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size)
# Save model architecture
model_json = model.to_json()
json_file = open(train_base+"/keras_model.json", "w")
json_file.write(model_json)
json_file.close()
# Save model weights
model.save_weights(train_base+"/keras_model.h5")
print("Finished saving")
score = model.evaluate_generator(test_generator, nb_test_samples // batch_size)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
score = model.predict_generator(train_generator, nb_test_samples // batch_size)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
print('Score', score)
#############################
json_file = open(train_base+"/keras_model.json", "r")
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights(train_base+"/keras_model.h5")
loaded_model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
def get_class(prediction):
m=max(prediction)
if m < 0.6:
return -1
for i in range(0, len(prediction)):
if prediction[i] == m:
return i
return -1
right = 0
wrong = 0
for input_directory in input_dirs:
print(input_directory)
cnt=0
for filename in os.listdir(input_directory):
os.sys.stdout.write('.')
os.sys.stdout.flush()
fn = os.path.join(input_directory, filename)
img = image.load_img(fn, target_size=(64, 64), color_mode='grayscale')
x = image.img_to_array(img)
x = x.astype('float32')
x /= 255
x = np.expand_dims(x, axis=0)
prediction = loaded_model.predict(x)[0]
c = get_class(prediction)
if c >= 0 and c < len(prediction):
predicted = classes[c]
print(fn, prediction)
copyfile(fn, os.path.join(out_directory,predicted, filename))
cnt += 1
if cnt > 100:
break