Tuple index out of range when trying to fit the CNN model - python-3.x

The dataset that I am using is the standard chest Xray dataset https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia. Have been getting this error (tuple index out of range) while fitting the CNN model. Is there a way to circumvent this issue? I suppose argument "validation_data" needs to be appended in some way.
import os
import glob
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
import random
#from pathlib import path
import pathlib2 as pathlib
from pathlib2 import Path
#from keras.models import sequential, Model, load_model
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, Activation
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import Callback, EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import confusion_matrix
from tensorflow.keras import backend as K
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
%matplotlib inline
import shutup; shutup.please()
# DATA PATH #
print (os.listdir("C:/Users/Syd_R/OneDrive/Desktop/Peeumonia_data/archive/chest_xray/chest_xray/"))
data_dir = Path("C:/Users/Syd_R/OneDrive/Desktop/Peeumonia_data/archive/chest_xray/chest_xray/")
train_dir = data_dir/'train'
val_dir = data_dir/'val'
test_dir = data_dir/'test'
# LOAD TRAINING DATA TO DATAFRAME #
def load_train():
normal_cases_dir =train_dir/'NORMAL'
pneumonia_cases_dir = train_dir/ 'PNEUMONIA'
# list of all images
normal_cases = normal_cases_dir.glob('*.jpeg')
pneumonia_cases = pneumonia_cases_dir.glob('*.jpeg')
train_data=[]
train_label=[]
for img in normal_cases:
train_data.append(img)
train_label.append('NORMAL')
for img in pneumonia_cases:
train_data.append(img)
train_label.append('PNEUMONIA')
df=pd.DataFrame(train_data)
df.columns = ['images']
df['labels'] = train_label
df=df.sample(frac=1).reset_index(drop=True)
return df
train_data = load_train()
train_data.shape
# VIZUALIZE THE AMOUNT OF TRAINING DATA WITH LABELS #
plt.bar(train_data['labels'].value_counts().index,train_data['labels'].value_counts().values)
plt.show()
# VIZUALIZE THE TRAINING IMAGE DATA BY RANDOM SAMPLING#
plt.figure(figsize=(10,5))
for i in range(10):
ax = plt.subplot(2,5,i+1)
num= random.randint(0, 5000+i)
im=train_data.loc[num].at['images']
im1=train_data.loc[num].at['labels']
img = cv2.imread(str(im))
img = cv2.resize(img, (224,224))
plt.imshow(img)
plt.title(im1)
plt.axis("off")
print(num)
# DATA PRE-PROCESSING #
def prepare_and_load(isval=True):
if isval==True:
normal_dir=val_dir/'NORMAL'
pneumonia_dir=val_dir/'PNEUMONIA'
else:
normal_dir=test_dir/'NORMAL'
pneumonia_dir=test_dir/'PNEUMONIA'
normal_cases = normal_dir.glob('*.jpeg')
pneumonia_cases = pneumonia_dir.glob('*.jpeg')
data,labels=([] for x in range (2))
def prepare(case):
for img in case:
img = cv2.imread(str(img))
img = cv2.resize(img, (224,224))
if img.shape[2] ==1:
img = np.dstack([img, img, img])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32)/255
if case==normal_cases:
label = to_categorical(0, num_classes=2)
else:
label = to_categorical(1, num_classes=2)
data.append(img)
labels.append(label)
return data,labels
prepare(normal_cases)
d,l=prepare(pneumonia_cases)
d=np.array(d)
l=np.array(1)
return d,l
val_data,val_labels = prepare_and_load(isval=True)
test_data,test_labels = prepare_and_load(isval=False)
print('Number of test images -->', len(test_data))
print('Number of validation images -->', len(val_data))
# DEFINE A FUNCTION TO GENERATE BATCHES FROM TRAINING IMAGES #
def data_gen(data, batch_size):
# Get tiotal number of samples in the data
n= len(data)
steps = n//batch_size
# Define two numpy arrays for containing batch data and labels
batch_data = np.zeros((batch_size, 224, 224, 3), dtype=np.float32)
batch_labels = np.zeros((batch_size,2), dtype=np.float32)
# Get a numpy array of all the indices of the input data
indices = np.arange(n)
# Initalize a counter
i=0
while True:
np.random.shuffle(indices)
# Get the next batch
count = 0
next_batch =indices [(i*batch_size): (i+1)*batch_size]
for j,idx in enumerate(next_batch):
img_name = data.iloc[idx]['images']
label = data.iloc[idx]['images']
if label=='NORMAL':
label=0
else:
label=1
# one hot encoding
encoded_label = to_categorical(label, num_classes=2)
# read the image and resize
img = cv2.imread(str(img_name))
img = cv2.resize(img,(224,224))
# check if it's grayscale
if img.shape[2]==1:
img = np.dstack([img, img, img])
# cv2 reads in BGR mode by default
orig_imag = cv2.cvtColor(img, cv2. COLOR_BGR2RGB)
# normalize the image pixels
orig_img = img.astype(np.float32)/255
batch_data[count]= orig_img
batch_labels[count] = encoded_label
count+=1
if count==batch_size-1:
break
i+=1
yield batch_data, batch_labels
if i>=steps:
i=0
# DEFINE THE CNN MODEL #
model = Sequential()
model.add(Conv2D(32, (3,3), input_shape=(224, 224, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(32, (3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64, (3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(2))
model.add(Activation('softmax'))
# DEFINE PARAMETERS FOR THE CNN MODEL #
batch_size = 64
nb_epochs = 3
# Get a train data generator
train_data_gen = data_gen(data= train_data, batch_size=batch_size)
# DEFINE THE NUMBER OF TRAINING STEPS #
nb_train_steps = train_data.shape[0]//batch_size
print("Number of training and validation steps: {} and {}".format(nb_train_steps, len(val_data)))
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
# FIT THE MODEL #
history = model.fit_generator(train_data_gen,
epochs=nb_epochs,
steps_per_epoch=nb_train_steps,
validation_data=(val_data, val_labels))

Related

Multi-Class Text Classification with BERT null prediction error

I am new to multi-class text classification with BERT. I have been following a tutorial (https://towardsdatascience.com/multi-label-multi-class-text-classification-with-bert-transformer-and-keras-c6355eccb63a) for leaning purposes.
I am able to get the script below running up to calculating the confusion matrix. The classification report also does not work. I would be grateful if someone can help me. My apologies if this question has already been asked. I searched everywhere and could not find an answer.
The error is here: y_predicted = numpy.argmax(predicted_raw, axis = 1). The error message says "axis 1 is out of bounds for array of dimension 1" When I change axis to zero. The new error message is "Singleton array 0 cannot be considered a valid collection." I think what the axis=0 error says is that y_predicted is null. I double checked it with an if statement.
import pandas
import numpy
import re
import nltk
# for plotting
import matplotlib.pyplot as plt
import seaborn as sns
input_dataframe = pandas.read_csv('tutorial6.csv')
fig, ax = plt.subplots()
fig.suptitle("Product", fontsize=12)
input_dataframe["Product"].reset_index().groupby("Product").count().sort_values(by=
"index").plot(kind="barh", legend=False,
ax=ax).grid(axis='x')
plt.show()
def utils_preprocess_text(text, flg_stemm=False, flg_lemm=True, lst_stopwords=None):
## clean (convert to lowercase and remove punctuations and characters and then strip)
text = re.sub(r'[^\w\s]', '', str(text).lower().strip())
## Tokenize (convert from string to list)
lst_text = text.split()
## remove Stopwords
if lst_stopwords is not None:
lst_text = [word for word in lst_text if word not in
lst_stopwords]
## Stemming (remove -ing, -ly, ...)
if flg_stemm == True:
ps = nltk.stem.porter.PorterStemmer()
lst_text = [ps.stem(word) for word in lst_text]
## Lemmatisation (convert the word into root word)
if flg_lemm == True:
lem = nltk.stem.wordnet.WordNetLemmatizer()
lst_text = [lem.lemmatize(word) for word in lst_text]
## back to string from list
text = " ".join(lst_text)
return text
lst_stopwords = nltk.corpus.stopwords.words("english")
input_dataframe["text_clean"] = input_dataframe ["Consumer_Complaint"].apply(lambda x:
utils_preprocess_text(x, flg_stemm=False, flg_lemm=True,
lst_stopwords=lst_stopwords))
from tensorflow.keras.utils import to_categorical
possible_labels = input_dataframe.Product.unique()
label_dict = {}
for index, possible_label in enumerate(possible_labels):
label_dict[possible_label] = index
print(label_dict)
input_dataframe['label'] = input_dataframe.Product.replace(label_dict)
# Split into train and test - stratify over Issue
from sklearn.model_selection import train_test_split
data_train, data_test = train_test_split(input_dataframe, test_size = 0.2,stratify = input_dataframe[["label"]])
# Load Huggingface transformers
from transformers import TFBertModel, BertConfig, BertTokenizerFast
# Then what you need from tensorflow.keras
from tensorflow.keras.layers import Input, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
### --------- Setup BERT ---------- ###
# Name of the BERT model to use
model_name = 'bert-base-uncased'
# Max length of tokens
max_length = 100
# Load transformers config and set output_hidden_states to False
config = BertConfig.from_pretrained(model_name)
config.output_hidden_states = False
# Load BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path = model_name, config = config)
# Load the Transformers BERT model
transformer_model = TFBertModel.from_pretrained(model_name, config = config)
### ------- Build the model ------- ###
# TF Keras documentation: https://www.tensorflow.org/api_docs/python/tf/keras/Model
# Load the MainLayer
bert = transformer_model.layers[0]
# Build your model input
input_ids = Input(shape=(max_length,), name='input_ids', dtype='int32')
inputs = {'input_ids': input_ids}
# Load the Transformers BERT model as a layer in a Keras model
bert_model = bert(inputs)[1]
dropout = Dropout(config.hidden_dropout_prob, name='pooled_output')
pooled_output = dropout(bert_model, training=False)
# Then build your model output
product = Dense(8, kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='product')(pooled_output)
outputs = {'product': product}
# And combine it all in a model object
model = Model(inputs=inputs, outputs=outputs, name='BERT_MultiLabel_MultiClass')
# Take a look at the model
model.summary()
# Set an optimizer
optimizer = Adam()
# Set loss and metrics
loss = {'product': CategoricalCrossentropy(from_logits = True)}
metric = {'product': CategoricalAccuracy('accuracy')}
# Compile the model
model.compile(
optimizer = optimizer,
loss = loss,
metrics = metric)
# Ready output data for the model
y_train = to_categorical(data_train['label'],8)
y_test = to_categorical(data_test['label'],8)
x_train = tokenizer(
text=data_train['Consumer_Complaint'].to_list(),
add_special_tokens=True,
max_length=max_length,
truncation=True,
padding=True,
return_tensors='tf',
return_token_type_ids = False,
return_attention_mask = False,
verbose = True)
x_test = tokenizer(
text=data_test['Consumer_Complaint'].to_list(),
add_special_tokens=True,
max_length=max_length,
truncation=True,
padding=True,
return_tensors='tf',
return_token_type_ids = False,
return_attention_mask = False,
verbose = True)
# Fit the model
history = model.fit(
x={'input_ids': x_train['input_ids']},
y={'product': y_train},
validation_split=0.2,
batch_size=64,
epochs=1)
### ----- Evaluate the model ------ ###
model_eval = model.evaluate(
x={'input_ids': x_test['input_ids']},
y={'product': y_test}
)
print("This is evaluation: ", model_eval)
accr = model.evaluate(x_test['input_ids'],y_test)
print('Test set\n Loss: {:0.3f}\n Accuracy: {:0.3f}'.format(accr[0],accr[1]))
from matplotlib import pyplot as plt
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show();
# plot loss and accuracy
metrics = [k for k in history.history.keys() if ("loss" not in k) and ("val" not in k)]
fig, ax = plt.subplots(nrows=1, ncols=2, sharey=True)
ax[0].set(title="Training")
ax11 = ax[0].twinx()
ax[0].plot(history.history['loss'], color='black')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss', color='black')
for metric in metrics:
ax11.plot(history.history[metric], label=metric)
ax11.set_ylabel("Score", color='steelblue')
ax11.legend()
ax[1].set(title="Validation")
ax22 = ax[1].twinx()
ax[1].plot(history.history['val_loss'], color='black')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Loss', color='black')
for metric in metrics:
ax22.plot(history.history['val_'+metric], label=metric)
ax22.set_ylabel("Score", color="steelblue")
plt.show()
#Testing our model on the test data.
predicted_raw = model.predict({'input_ids':x_test['input_ids']})
print(type(predicted_raw))
predicted_raw=list(predicted_raw)
predicted_raw=numpy.array(predicted_raw)
y_predicted = numpy.argmax(predicted_raw, axis = 1)
y_true = data_test.label
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
confusionmatrix = confusion_matrix(y_predicted,y_true)
I am trying to get the confusion matrix and classification report working.

ValueError: Could not interpret optimizer identifier: []

Hi i'm working on Image classification using XGBoost and VGG16 imagenet as feature extractor here's my code
I tried to implement feature extractor on image dataset from kaggle CK+48 with tensorflow 2.11.0. I'm facing this error,
ValueError: Could not interpret optimizer identifier: []
have tried alot to solve it, will appreciate the help
import numpy as np
import matplotlib.pyplot as plt
import glob
import cv2
import keras
from tensorflow.keras import Model
#from tensorflow.python.keras.layers import Dense, Flatten, GlobalAveragePooling2D, Activatation
from keras.models import Model, Sequential
from keras.models import load_model
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
import os
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, BatchNormalization
from keras.callbacks import TensorBoard
from keras.callbacks import ModelCheckpoint
#from keras.optimizers import adam
import seaborn as sns
from keras.applications.vgg16 import VGG16
# Read input images and assign labels based on folder names
print(os.listdir("C:/Users/Tanzeel ur Rehman/Desktop/CK+48"))
SIZE = 256 #Resize
#Capture training data and labels into respective lists
train_images = []
train_labels = []
for directory_path in glob.glob("C:/Users/Tanzeel ur Rehman/Desktop/CK+48/train"):
label = directory_path.split("\\")[-1]
print(label)
for img_path in glob.glob(os.path.join(directory_path, "*.jpg")):
print(img_path)
img = cv2.imread(img_path, cv2.IMREAD_COLOR)
img = cv2.resize(img, (SIZE, SIZE))
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
train_images.append(img)
train_labels.append(label)
#Convert lists to arrays
train_images = np.array(train_images)
train_labels = np.array(train_labels)
# Capture test/validation data and labels into respective lists
test_images = []
test_labels = []
for directory_path in glob.glob("C:/Users/Tanzeel ur Rehman/Desktop/CK+48/test"):
fruit_label = directory_path.split("\\")[-1]
for img_path in glob.glob(os.path.join(directory_path, "*.jpg")):
img = cv2.imread(img_path, cv2.IMREAD_COLOR)
img = cv2.resize(img, (SIZE, SIZE))
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
test_images.append(img)
test_labels.append(fruit_label)
#Convert lists to arrays
test_images = np.array(test_images)
test_labels = np.array(test_labels)
#Encode labels from text to integers.
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(test_labels)
test_labels_encoded = le.transform(test_labels)
le.fit(train_labels)
train_labels_encoded = le.transform(train_labels)
#Split data into test and train datasets (already split but assigning to meaningful convention)
x_train, y_train, x_test, y_test = train_images, train_labels_encoded, test_images, test_labels_encoded
###################################################################
# Normalize pixel values to between 0 and 1
x_train, x_test = x_train / 255.0, x_test / 255.0
#One hot encode y values for neural network.
#from keras.utils import to_categorical
#y_train_one_hot = to_categorical(y_train)
#y_test_one_hot = to_categorical(y_test)
#############################
#Load model wothout classifier/fully connected layers
VGG_model = VGG16(weights='imagenet', include_top=False, input_shape=(SIZE, SIZE, 3))
#Make loaded layers as non-trainable. This is important as we want to work with pre-trained weights
for layer in VGG_model.layers:
layer.trainable = True
VGG_model.summary() #Trainable parameters will be 0
#Now, let us use features from convolutional network for RF
feature_extractor=VGG_model.predict(x_train)
The error is in the last line of the code, when I try to use features from convolutional network for random Forests on training dataset of images.

how to change fashion_mnist shape

I loaded Fahion_Mnist dataset through "fashion_mnist.load_data()" and I tried to train a ResNet50 neural network. But I don't know how reshape dataset image from (28,28,1) to (224,224,3), as needed as input in ResNet.
I am using Python 3, Keras 2.2.4
This is my code:
from __future__ import absolute_import, division, print_function
import tensorflow as tf
from tensorflow import keras# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import time
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Flatten, Dense, Dropout
from tensorflow.python.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.python.keras.optimizers import Adam
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.preprocessing import image
from PIL import Image
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat','Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
IMAGE_SIZE = (224,224)
NUM_CLASSES = 10
BATCH_SIZE = 8 # try reducing batch size or freeze more layers if your GPU runs out of memory
FREEZE_LAYERS = 2 # freeze the first this many layers for training
NUM_EPOCHS = 20
WEIGHTS_FINAL = 'model_fashion_resnet.h5'
train_images = preprocess_input(train_images)
train_images = np.expand_dims(train_images, axis=0)
train_labels = preprocess_input(train_labels)
train_labels = np.expand_dims(train_labels, axis=0)
test_images = preprocess_input(test_images)
test_images = np.expand_dims(test_images, axis=0)
net = ResNet50(include_top=False, weights='imagenet', input_tensor=None,
input_shape=(IMAGE_SIZE[0],IMAGE_SIZE[1],3))
x = net.output
x = Flatten()(x)
x = Dropout(0.5)(x)
output_layer = Dense(NUM_CLASSES, activation='softmax', name='softmax')(x)
model = Model(inputs=net.input, outputs=output_layer)
for layer in model.layers[:FREEZE_LAYERS]:
layer.trainable = False
for layer in model.layers[FREEZE_LAYERS:]:
layer.trainable = True
model.compile(optimizer=Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
print(model.summary())
inizio=time.time()
datagen = ImageDataGenerator(
featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True)
model.fit_generator(datagen.flow(train_images, train_labels, batch_size=BATCH_SIZE),
steps_per_epoch=len(train_images) / BATCH_SIZE, epochs=NUM_EPOCHS)
And this is what I receive after run:
ValueError: Error when checking input: expected input_1 to have shape (224, 224, 3) but got array with shape (60000, 28, 28)
How to change MNIST images so that they can input in the ResNet50 neural network?

Error when checking input: expected lstm_132_input to have 3 dimensions, but got array with shape (23, 1, 3, 1)

I have a data set include with temperature, humidity and wind. Here I want to predict future temperature value in next hour.
I used LSTM to predict future temperature value.
But when I run the model it showed up this error Error when checking input: expected lstm_132_input to have 3 dimensions, but got array with shape (23, 1, 3, 1)
Can anyone help me to solve this problem?
Here is my code:
import datetime
import time
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from keras.layers.core import Dense, Dropout, Activation
from keras.activations import linear
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler
data = pd.read_csv('data6.csv' , sep=',')
data['date'] = pd.to_datetime(data['date'] + " " + data['time'], format='%m/%d/%Y %H:%M:%S')
data.set_index('time', inplace=True)
data = data.values
data = data.astype('float32')
# normalize the dataset
def create_data(train,X,n_out=1):
#data = np.reshape(train, (train.shape[0], train_shape[1], train_shape[2]))
x,y=list(),list()
start =0
for _ in range(len(data)):
in_end = start+X
out_end= in_end + n_out
if out_end < len(data):
x_input = data[start:in_end]
x.append(x_input)
y.append(data[in_end:out_end,0])
start +=1
return np.array(x),np.array(y)
scaler = MinMaxScaler()
data = scaler.fit_transform(data)
# split into train and test sets
train = int(len(data) * 0.6)
test = len(data) - train
train, test = data[0:train,:], data[train:len(data),:]
X=1
x_train, y_train = create_data(train,X)
x_test, y_test = create_data(test,X)
x_train=x_train.reshape(x_train.shape +(1,))
x_test=x_test.reshape(x_test.shape + (1,))
n_timesteps, n_features, n_outputs = x_train.shape[1], x_train.shape[2], x_train.shape[1]
model = Sequential()
model.add(LSTM(8, activation='relu', input_shape=(n_timesteps, n_features)))
model.add(Dense(8,activation='relu'))
model.add(Dense(n_outputs))
model.compile(loss='mse', optimizer='adam')
# fit network
model.fit(x_train,y_train, epochs=10,batch_size=1, verbose=0)
My csv file:
My csv file.
My error:
model summary :
you need to add activation to your last layer
model = Sequential()
model.add(LSTM(8, activation='relu', input_shape=(n_timesteps, n_features)))
model.add(Dense(8,activation='relu'))
# here
model.add(Dense(n_outputs,activation='relu'))
model.compile(loss='mse', optimizer='adam')
# fit network
model.fit(x_train,y_train, epochs=10,batch_size=1, verbose=0)

Training and evaluating accuracies different in keras LSTM model

I am training a LSTM model using keras. Training the training set gives accuracy:83% in the last epoch, but evaluating or predicting the model with the same training set gives accuracy:47%. I cant figure out the problem.
Also, I have commented out the training part in the following code, since i save the model & weights on disk, but I used it before to train i.e. model.fit(...) etc.
import pandas as pd
import Preprocessing as pre
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.utils import shuffle
import pickle
import numpy as np
import sys
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.layers import LSTM
from keras.preprocessing.sequence import pad_sequences
from keras.models import model_from_json
from keras.preprocessing.text import Tokenizer
df = pd.read_csv('tweets.csv',header=None,encoding = "ISO-8859-1")
df=shuffle(df)
length=df.size
train=[]
test=[]
Y=[]
Y2=[]
count=450000
for a in range(450000): #loading data
# b=pre.preprocess_tweet(df[1][a])
label=df[0][a]
# train.append(b)
Y.append(label)
count-=1
print("Loading training data...", count)
# with open('training_data_no_stopwords.obj', 'wb') as fp:
# pickle.dump(train, fp)
with open ('training_data.obj', 'rb') as fp:
train = pickle.load(fp)
count=156884
for a in range(450000,606884): #loading testin data
# b = pre.preprocess_tweet(df[1][a])
label=df[0][a]
# test.append(b)
Y2.append(label)
count-=1
print("Loading testing data...", count)
# with open('testing_data_no_stopwords.obj', 'wb') as fp:
# pickle.dump(test, fp)
with open ('testing_data.obj', 'rb') as fp:
test = pickle.load(fp)
# vectorizer = CountVectorizer(analyzer = "word",tokenizer = None, preprocessor = None, stop_words = None, max_features = 3500)
# # # fit_transform() does two functions: First, it fits the model
# # # and learns the vocabulary; second, it transforms our training data
# # # into feature vectors. The input to fit_transform should be a list of
# # # strings.
#
# train = vectorizer.fit_transform(train)
# test = vectorizer.transform(test)
tokenizer = Tokenizer(split=' ')
tokenizer.fit_on_texts(train)
train = tokenizer.texts_to_sequences(train)
max_words = 134
train = pad_sequences(train, maxlen=max_words)
tokenizer.fit_on_texts(test)
test = tokenizer.texts_to_sequences(test)
test = pad_sequences(test, maxlen=max_words)
print ('Extracting features & training batches')
print("Training...")
embedding_size=32
model = Sequential()
model.add(Embedding(606884, 32, input_length=134))
model.add(Dropout(0.4))
model.add(LSTM(128))
model.add(Dense(64))
model.add(Dropout(0.5))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
print(model.summary())
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# batch_size = 501
# num_epochs = 3
#
# model.fit(train, np.array(Y), batch_size=batch_size, epochs=num_epochs ,validation_split=0.2,shuffle=True)
# # Save the weights
# model.save_weights('LSTM_model_weights.h5')
#
# # Save the model architecture
# with open('LSTM_model.json', 'w') as f:
# f.write(model.to_json())
# Model reconstruction from JSON file
with open('LSTM_model.json', 'r') as f:
model = model_from_json(f.read())
# Load weights into the new model
model.load_weights('LSTM_model_weights.h5')
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
scores = model.evaluate(train, np.array(Y))
print('Test accuracy:', scores[1])
Maybe try training with Stratified K-Fold. This probably happens because your test data is very different from training data. Also, you can try shuffling the data before train-test splitting.

Resources