Training and evaluating accuracies different in keras LSTM model - python-3.x

I am training a LSTM model using keras. Training the training set gives accuracy:83% in the last epoch, but evaluating or predicting the model with the same training set gives accuracy:47%. I cant figure out the problem.
Also, I have commented out the training part in the following code, since i save the model & weights on disk, but I used it before to train i.e. model.fit(...) etc.
import pandas as pd
import Preprocessing as pre
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.utils import shuffle
import pickle
import numpy as np
import sys
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.layers import LSTM
from keras.preprocessing.sequence import pad_sequences
from keras.models import model_from_json
from keras.preprocessing.text import Tokenizer
df = pd.read_csv('tweets.csv',header=None,encoding = "ISO-8859-1")
df=shuffle(df)
length=df.size
train=[]
test=[]
Y=[]
Y2=[]
count=450000
for a in range(450000): #loading data
# b=pre.preprocess_tweet(df[1][a])
label=df[0][a]
# train.append(b)
Y.append(label)
count-=1
print("Loading training data...", count)
# with open('training_data_no_stopwords.obj', 'wb') as fp:
# pickle.dump(train, fp)
with open ('training_data.obj', 'rb') as fp:
train = pickle.load(fp)
count=156884
for a in range(450000,606884): #loading testin data
# b = pre.preprocess_tweet(df[1][a])
label=df[0][a]
# test.append(b)
Y2.append(label)
count-=1
print("Loading testing data...", count)
# with open('testing_data_no_stopwords.obj', 'wb') as fp:
# pickle.dump(test, fp)
with open ('testing_data.obj', 'rb') as fp:
test = pickle.load(fp)
# vectorizer = CountVectorizer(analyzer = "word",tokenizer = None, preprocessor = None, stop_words = None, max_features = 3500)
# # # fit_transform() does two functions: First, it fits the model
# # # and learns the vocabulary; second, it transforms our training data
# # # into feature vectors. The input to fit_transform should be a list of
# # # strings.
#
# train = vectorizer.fit_transform(train)
# test = vectorizer.transform(test)
tokenizer = Tokenizer(split=' ')
tokenizer.fit_on_texts(train)
train = tokenizer.texts_to_sequences(train)
max_words = 134
train = pad_sequences(train, maxlen=max_words)
tokenizer.fit_on_texts(test)
test = tokenizer.texts_to_sequences(test)
test = pad_sequences(test, maxlen=max_words)
print ('Extracting features & training batches')
print("Training...")
embedding_size=32
model = Sequential()
model.add(Embedding(606884, 32, input_length=134))
model.add(Dropout(0.4))
model.add(LSTM(128))
model.add(Dense(64))
model.add(Dropout(0.5))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
print(model.summary())
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# batch_size = 501
# num_epochs = 3
#
# model.fit(train, np.array(Y), batch_size=batch_size, epochs=num_epochs ,validation_split=0.2,shuffle=True)
# # Save the weights
# model.save_weights('LSTM_model_weights.h5')
#
# # Save the model architecture
# with open('LSTM_model.json', 'w') as f:
# f.write(model.to_json())
# Model reconstruction from JSON file
with open('LSTM_model.json', 'r') as f:
model = model_from_json(f.read())
# Load weights into the new model
model.load_weights('LSTM_model_weights.h5')
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
scores = model.evaluate(train, np.array(Y))
print('Test accuracy:', scores[1])

Maybe try training with Stratified K-Fold. This probably happens because your test data is very different from training data. Also, you can try shuffling the data before train-test splitting.

Related

Transferlearning ResNet Model does not learn

I trained ResNet-50 model to classify images from 6 classes (my own dataset) and saved it. But the model did not learn properly and predictions are incorrect. What would be the reason for this poor learning?
Here is my code, and the output plots using Keras and TensorFlow backend. How can I solve this?
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.layers import Dense, Dropout
from keras.models import Model
from keras.optimizers import Adam, SGD
from keras.preprocessing.image import ImageDataGenerator, image
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGE = True
# Define some constant needed throughout the script
N_CLASSES = 6
EPOCHS = 20
PATIENCE = 5
TRAIN_PATH= '/Train/'
VALID_PATH = '/Test/'
MODEL_CHECK_WEIGHT_NAME = 'resnet_monki_v1_chk.h5'
# Define model to be used we freeze the pre trained resnet model weight, and add few layer on top of it to utilize our custom dataset
K.set_learning_phase(0)
model = ResNet50(input_shape=(224,224,3),include_top=False, weights='imagenet', pooling='avg')
K.set_learning_phase(1)
x = model.output
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(N_CLASSES, activation='softmax', name='custom_output')(x)
custom_resnet = Model(inputs=model.input, outputs = output)
for layer in model.layers:
layer.trainable = False
custom_resnet.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
custom_resnet.summary()
# 4. Load dataset to be used
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
traingen = datagen.flow_from_directory(TRAIN_PATH, target_size=(224,224), batch_size=32, class_mode='categorical')
validgen = datagen.flow_from_directory(VALID_PATH, target_size=(224,224), batch_size=32, class_mode='categorical', shuffle=False)
# 5. Train Model we use ModelCheckpoint to save the best model based on validation accuracy
es_callback = EarlyStopping(monitor='val_acc', patience=PATIENCE, mode='max')
mc_callback = ModelCheckpoint(filepath=MODEL_CHECK_WEIGHT_NAME, monitor='val_acc', save_best_only=True, mode='max')
train_history = custom_resnet.fit_generator(traingen, steps_per_epoch=len(traingen), epochs= EPOCHS, validation_data=traingen, validation_steps=len(validgen), verbose=2, callbacks=[es_callback, mc_callback])
custom_resnet.save('custom_resnet.h5')
Here are the plots, I had to put the links, the site does not let me put a pic
enter image description here

ValueError: You are trying to load a weight file containing 58 layers into a model with 55 layers

I trained my model and saved the model in .h5 format. Trained by freezing the last layer of the mobilenet imagenet model.
Loading the model and trying prediction makes error stating ValueError: You are trying to load a weight file containing 58 layers into a model with 55 layers.
Training code :
# coding: utf-8
# In[1]:
import pandas as pd
import numpy as np
import os
import keras
import matplotlib.pyplot as plt
from keras.layers import Dense,GlobalAveragePooling2D
from keras.applications import MobileNet
from keras.preprocessing import image
from keras.applications.mobilenet import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.optimizers import Adam
# In[2]:
base_model=MobileNet(weights='imagenet',include_top=False) #imports the mobilenet model and discards the last 1000 neuron layer.
x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(1024,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
x=Dense(1024,activation='relu')(x) #dense layer 2
x=Dense(512,activation='relu')(x) #dense layer 3
preds=Dense(2,activation='softmax')(x) #final layer with softmax activation
# In[3]:
model=Model(inputs=base_model.input,outputs=preds)
#specify the inputs
#specify the outputs
#now a model has been created based on our architecture
# In[4]:
for layer in model.layers[:20]:
layer.trainable=False
for layer in model.layers[20:]:
layer.trainable=True
# In[5]:
train_datagen=ImageDataGenerator(preprocessing_function=preprocess_input) #included in our dependencies
train_generator=train_datagen.flow_from_directory('./train/', # this is where you specify the path to the main data folder
target_size=(224,224),
color_mode='rgb',
batch_size=64,
class_mode='categorical',
shuffle=True)
# In[33]:
model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])
# Adam optimizer
# loss function will be categorical cross entropy
# evaluation metric will be accuracy
step_size_train=train_generator.n//train_generator.batch_size
model.fit_generator(generator=train_generator,
steps_per_epoch=step_size_train,
epochs=10)
# serialize model to JSON
model_json = model.to_json()
with open("mobilenet_2.json", "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("mobilenet_2.h5")
print("Saved model to disk")
Prediciton code :
import keras
from keras import backend as K
from keras.layers.core import Dense, Activation
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.models import Model
from keras.applications import imagenet_utils
from keras.layers import Dense,GlobalAveragePooling2D
from keras.applications import MobileNet
from keras.applications.mobilenet import preprocess_input
import numpy as np
from keras.optimizers import Adam
from keras.models import load_model
model = load_model("mobilenet_1.h5")
#mobile = keras.applications.mobilenet.MobileNet(weights="imagenet")
def prepare_image(file):
img_path = ''
img = image.load_img("/home/christie/mobilenet/transfer-learning/" + file, target_size=(224, 224))
img_array = image.img_to_array(img)
img_array_expanded_dims = np.expand_dims(img_array, axis=0)
return keras.applications.mobilenet.preprocess_input(img_array_expanded_dims)
'''
lookup_list = ["banana","banana_palenkodan","banana_red","banana_nendran","banana_karpooravalli"]
#print(lookup_list)
if ans not in lookup_list:sx
print("Not found")
return "[None]"
'''
preprocessed_image = prepare_image('test.jpg')
predictions = model.predict(preprocessed_image)
results = imagenet_utils.decode_predictions(predictions)
print(results)
Error log :
ValueError: You are trying to load a weight file containing 58 layers
into a model with 55 layers.
The model is converted to JSON format and written to mobilenet_2.json in the local directory. The network weights are written to mobilenet_2.h5 in the local directory.
Similarly you have to load the json and its corresponding weights.
Try editing as below :
# serialize model to JSON
model_json = model.to_json()
with open("mobilenet_2.json", "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("mobilenet_2.h5")
print("Saved model to disk")
# later...
# load json and create model
json_file = open('mobilenet_2.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("mobilenet_2.h5")
print("Loaded model from disk")
You are saving just the weights but trying to load the model architecture and weights. If you would like to save weights and model architecture together and later load, then try the below code -
# save model and architecture to single file
model.save("model.h5")
# later...
# load model
model = load_model('model.h5')

InvalidArgumentError Sentiment analyser with keras

I have built a sentiment analyzer using Keras as a binary classification problem. I am using the Imdb dataset using GRU.
My code is:
# coding=utf-8
# ==========
# MODEL
# ==========
# imports
from __future__ import print_function
from timeit import default_timer as timer
from datetime import timedelta
from keras.models import Sequential
from keras.preprocessing import sequence
from keras import regularizers
from keras.layers import Dense, Embedding
from keras.layers import GRU, LeakyReLU, Bidirectional
from keras.datasets import imdb
#start a timer
start = timer()
# Hyperparameters
Model_Name = 'my_model.h5'
vocab_size = 5000
maxlen = 1000
batch_size = 512
hidden_layer_size = 2
test_split = 0.3
dropout = 0.1
num_epochs = 1
alpha = 0.2
validation_split = 0.25
l1 = 0.01
l2 = 0.01
# Dataset loading
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(path="imdb.npz",
maxlen=maxlen)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')
# Data preprocessing
# Sequence padding
print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
# Network building
print('Build model...')
model = Sequential()
model.add(Embedding(vocab_size, hidden_layer_size))
model.add(Bidirectional(GRU(hidden_layer_size, kernel_initializer='uniform', kernel_regularizer=regularizers.l1_l2(l1=l1,l2=l2), dropout=dropout, recurrent_dropout=dropout,return_sequences=True)))
model.add(LeakyReLU())
model.add(Bidirectional(GRU(hidden_layer_size, kernel_initializer='uniform', dropout=dropout, kernel_regularizer=regularizers.l1_l2(l1=l1,l2=l2), recurrent_dropout=dropout)))
model.add(LeakyReLU())
model.add(Dense(1, activation='softmax', kernel_initializer='uniform', kernel_regularizer=regularizers.l1_l2(l1=l1,l2=l2)))
# Compile my model
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print('Train...')
# Fit the model
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=num_epochs, validation_split=validation_split)
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=1)
# Create a summary, a plot and print the scores of the model
model.summary()
print('Test score:', score)
print('Test accuracy:', acc)
# Save model architecture, weights, training configuration (loss,optimizer),
# and also the state of the optimizer, so you can resume where you stopped
model.save(Model_Name)
end = timer()
print('Running time: ' + str(timedelta(seconds=(end - start))) + ' in Hours:Minutes:Seconds')
I keep receiving an Error message which I don't completely understand:
InvalidArgumentError (see above for traceback): indices[502,665] = 5476 is not in [0, 5000)
[[Node: embedding_1/Gather = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, validate_indices=true, _device=/job:localhost/replica:0/task:0/device:CPU:0](embedding_1/embeddings/read, embedding_1/Cast)]]
Can anyone help me understand what causes this error and how to solve it?
The error complains about a non-existent word index. That's because you are only limiting the number of Emedding features (i.e. there is a word with index 5476 which is not in the range [0, 5000), which 5000 refers to the vocab_size you have set). To resolve this, you also need to pass the vocab_size as num_words argument of load_data function, like this:
... = imdb.load_data(num_words=vocab_size, ...)
This way you are limiting the words to the most frequent words (i.e. top vocab_size words with the most frequency in the dataset) with their indices in range [0, vocab_size).

multi-label text classification. I have a text/label csv. Text is pure text, labels are alphanumeric

import keras
import keras.backend as K
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense
from keras.layers.core import Activation
from keras.preprocessing.text import Tokenizer # for
tokenizing text
from keras.preprocessing.sequence import pad_sequences # for
padding sentences with zeros. To make the sentence length same
from keras.utils import to_categorical # for one-
hot encoding of the labels
from keras.layers import Dense, Input, Flatten, Dropout,
BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, Embedding
from keras.models import Sequential
from sklearn.model_selection import train_test_split
MAX_SEQUENCE_LENGTH = 300
MAX_NB_WORDS = 20000
#Reading the data
raw_data=pd.read_csv("/home/riaz.k/Desktop/TRAIN.csv")
raw_data.head()
# create training and testing vars
train, test = train_test_split(raw_data, test_size=0.3)
train.head()
test.head()
tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(train.Procedure)
train_sequences = tokenizer.texts_to_sequences(train.Procedure)
test_sequences = tokenizer.texts_to_sequences(test.Procedure)
word_index = tokenizer.word_index
containing words and their index
# print(tokenizer.word_index)
print('Found %s unique tokens.' % len(word_index))
train_data = pad_sequences(train_sequences,
maxlen=MAX_SEQUENCE_LENGTH)
train
test_data=pad_sequences(test_sequences,maxlen=MAX_SEQUENCE_LENGTH)
test
print(train_data.shape)
print(test_data.shape)
print (word_index)
train_labels = train['dxcode']
test_labels = test['dxcode']
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder() # converts the character
array to numeric array.
Assigns levels to unique labels.
le.fit(train_labels)
le.fit(test_labels)
train_labels = le.transform(train_labels)
test_labels = le.transform(test_labels)
print(le.classes_)
print(np.unique(train_labels, return_counts=True))
print(np.unique(test_labels, return_counts=True))
le.inverse_transform(1)
labels_train = to_categorical(np.asanyarray(train_labels))
labels_test = to_categorical(np.asarray(test_labels))
print('Shape of data tensor:', train_data.shape)
print('Shape of label tensor:', labels_train.shape)
print('Shape of label tensor:', labels_test.shape)
EMBEDDING_DIM = 100
print(MAX_SEQUENCE_LENGTH)
print('Training model.')
model = Sequential()
model.add(Embedding(MAX_NB_WORDS,
EMBEDDING_DIM,
input_length=MAX_SEQUENCE_LENGTH
))
model.add(Dropout(0.2))
model.add(Conv1D(128, 5, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Conv1D(128, 5, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(23, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['acc'],)
model.fit(train_data, labels_train,
batch_size=32,
epochs=10,
validation_data=(test_data, labels_test))
model.evaluate(test_data, labels_test)
pred = model.predict(test_data)
pred
# print(model.layers)
for layer in model.layers:
print(layer)
import keras.backend as K
emd = K.function(inputs=[model.layers[0].input],
outputs=[model.layers[0].output])
rbind = np.concatenate((train_data, test_data), axis=0)
print(rbind.shape)
### Submissions file
test_results = model.predict_classes(rbind)
#print(test_results)
test_labels = le.inverse_transform(test_results)
#test_labels = [le.inverse_transform(i) for i in test_results]
submissions_CNN =
pd.DataFrame({'id':raw_data['Claimno'],"label":test_labels})
submissions_CNN.to_csv("/home/riaz.k/Desktop/submissions.csv",index=False)
The text document can be labelled with more than one label, so how can I do a multi-label classification on this dataset? I've read a lot of documentation from sklearn, but I can't seem to find the right way to do multi-label classification. Thanks in advance for any help.
Are you getting the error on this line:
train_labels = le.transform(train_labels)
If yes, then its because in the line just above it, you are doing this:
le.fit(test_labels)
What this does is it forgets the previous data (previous call to fit() on the line above it) and only remembers the data in the test_labels. So when a new label (which is present in train but not in test) comes, it will throw this error.
You need to replave the lines:
le.fit(train_labels)
le.fit(test_labels)
with this:
# I am using .tolist() because I observe that your
# train_labels, test_labels are pandas Series objects
le.fit(train_labels.tolist() + test_labels.tolist())

keras gridSearchCV on sklearn One hot Encoded Data

The problem with this code is that I am giving classifier,
One hot encoded data:
Means:
X-train, X-test, y_train, y_test is one hot encoded.
But the classifier is predicting the output:
y_pred_test, y_pred_train in Numerical form
(which I think is incorrect as well). Can anyone help with this?
This is a dummy example so no concern over low accuracy but just to know why it's predicting the output in not One Hot encoded form.
Thanks !
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
x=pd.DataFrame()
x['names']= np.arange(1,10)
x['Age'] = np.arange(1,10)
y=pd.DataFrame()
y['target'] = np.arange(1,10)
from sklearn.preprocessing import OneHotEncoder, Normalizer
ohX= OneHotEncoder()
x_enc = ohX.fit_transform(x).toarray()
ohY = OneHotEncoder()
y_enc = ohY.fit_transform(y).toarray()
print (x_enc)
print("____")
print (y_enc)
import keras
from keras import regularizers
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.models import load_model
from keras.layers.advanced_activations import LeakyReLU
marker="-------"
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
def create_model(learn_rate=0.001):
model = Sequential()
model.add(Dense(units = 15, input_dim =18,kernel_initializer= 'normal', activation="tanh"))
model.add(Dense(units=9, activation = "softmax"))
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy'])
return model
if __name__=="__main__":
X_train, X_test, y_train, y_test = train_test_split(x_enc, y_enc, test_size=0.33, random_state=42)
print ("\n\n",marker*5," Classification\nX_train shape is: ",X_train.shape,"\tX_test shape is:",X_test.shape)
print ("\ny_train shape is: ",y_train.shape,"\t y_test shape is:",y_test.shape,"\n\n")
norm = Normalizer()
#model
X_train = norm.fit_transform(X_train)
X_test = norm.transform(X_test)
earlyStopping=keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, verbose=0, mode='auto')
model = KerasClassifier(build_fn=create_model, verbose=0)
fit_params={'callbacks': [earlyStopping]}
#grid
# batch_size =[50,100,200, 300,400]
epochs = [2,5]
learn_rate=[0.1,0.001]
param_grid = dict( epochs = epochs, learn_rate = learn_rate)
grid = GridSearchCV(estimator = model, param_grid = param_grid, n_jobs=1)
#Predicting
print (np.shape(X_train), np.shape(y_train))
y_train = np.reshape(y_train, (-1,np.shape(y_train)[1]))
print ("y_train shape after reshaping", np.shape(y_train))
grid_result = grid.fit(X_train, y_train, callbacks=[earlyStopping])
print ("grid score using params: ", grid_result.best_score_, " ",grid_result.best_params_)
#scores
print("SCORES")
print (grid_result.score(X_test,y_test))
# summarize results
#print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
#means = grid_result.cv_results_['mean_test_score']
#stds = grid_result.cv_results_['std_test_score']
#params = grid_result.cv_results_['params']
#for mean, stdev, param in zip(means, stds, params):
# print("%f (%f) with: %r" % (mean, stdev, param))
print("\n\n")
print("y_test is",y_test)
y_hat_test = grid.predict(X_test)
y_hat_train = grid.predict(X_train)
print("y_hat_test is ", y_hat_test)

Resources