Adding a layer stops learning Keras - keras

Code
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential,Model
from keras.layers import LeakyReLU,Dropout, Flatten, Dense,Input
from keras import applications
from keras.preprocessing import image
from keras import backend as K
from keras import regularizers
from keras.optimizers import adam
K.set_image_dim_ordering('tf')
input_tensor = Input(shape=(150,150,3))
img_width, img_height = 150,150
top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'Cats and Dogs Dataset/train'
validation_data_dir = 'Cats and Dogs Dataset/validation'
nb_train_samples = 20000
nb_validation_samples = 5000
epochs = 50
batch_size = 128
base_model=applications.inception_v3.InceptionV3(include_top=False, weights='imagenet', input_tensor=input_tensor, pooling=None)
i=0;
for layer in base_model.layers:
layer.trainable = False
i+=1
base_model.output
top_model=Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(1024,activation="relu"))
top_model.add(Dropout(0.5))
top_model.add(Dense(10,activation="relu"))//Layer with issue
top_model.add(Dropout(0.8))//
top_model.add(Dense(2, activation='softmax'))
model = Model(inputs=base_model.input,outputs=top_model(base_model.output))
model.summary
datagen = ImageDataGenerator(rescale=1. / 255)
train_data = datagen.flow_from_directory(train_data_dir,target_size=(img_width, img_height),batch_size=batch_size,classes=[ 'cats','dogs'])#,class_mode="binary",shuffle=True)
validation_data = datagen.flow_from_directory(validation_data_dir,target_size=(img_width, img_height), batch_size=batch_size,classes=['cats','dogs'])#,class_mode="binary",shuffle=True)
adm=adam(lr=0.02)
model.compile(optimizer=adm,loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_data, steps_per_epoch=nb_train_samples//batch_size, epochs=epochs,validation_data=validation_data, shuffle=True,verbose=1)
I have implemented a Image Classifier on the cats and dogs Dataset(https://www.kaggle.com/c/dogs-vs-cats/data) using keras(transfer learned using the inception netowrk). The code runs without errors but the accuracy is stuck at 50% for the validation set and the training set from the first epoch and the loss isnt decreasing. I am using Atom with hydrogen.
The issue goes away when I remove the marked layer , I cant seem to understand why this is happening.
What I have tried to fix this
different batch sizes - 4,16,64,256
change optimizer - tried adam ,rmsprop , sgd with modified learning rates
tried different activations for the layer - relu,sigmoid and leakyrelu
changed the dropout - the issue vanishes when dropout is 0.9(i.e. make the
layer useless, this works for obvious reason but also points out there is something that i am missing )
changed the final activation to sigmoid
Can someone please tell me what i am missing because i cant think of any reason why adding a layer stops learning

import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential,Model
from keras.layers import LeakyReLU,Dropout, Flatten, Dense,Input
from keras import applications
from keras.preprocessing import image
from keras import backend as K
from keras import regularizers
from keras.optimizers import adam
K.set_image_dim_ordering('tf')
input_tensor = Input(shape=(150,150,3))
img_width, img_height = 150,150
top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'Cats and Dogs Dataset/train'
validation_data_dir = 'Cats and Dogs Dataset/validation'
nb_train_samples = 20000
nb_validation_samples = 5000
epochs = 50
batch_size = 64
base_model=applications.inception_v3.InceptionV3(include_top=False, weights='imagenet', input_tensor=input_tensor, pooling=None)
i=0;
for layer in base_model.layers:
layer.trainable = False
i+=1
base_model.output
top_model=Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(512,activation="relu")) //decrease in units
top_model.add(Dropout(0.4)) // change the drop out
top_model.add(Dense(128,activation="relu")) //increase in units
top_model.add(Dropout(0.2)) // decrease in dropout
top_model.add(Dense(2, activation='softmax'))
model = Model(inputs=base_model.input,outputs=top_model(base_model.output))
model.summary
datagen = ImageDataGenerator(rescale=1. / 255)
train_data = datagen.flow_from_directory(train_data_dir,target_size=(img_width, img_height),batch_size=batch_size,classes=[ 'cats','dogs'])#,class_mode="binary",shuffle=True)
validation_data = datagen.flow_from_directory(validation_data_dir,target_size=(img_width, img_height), batch_size=batch_size,classes=['cats','dogs'])#,class_mode="binary",shuffle=True)
adm=adam(lr=0.02)
model.compile(optimizer=adm,loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_data, steps_per_epoch=nb_train_samples//batch_size, epochs=epochs,validation_data=validation_data, shuffle=True,verbose=1)
I have reduce the number of units in first dense layer while increase the number of units in 2nd dense layer .. and also deceasing the drop out rate .. run this code and let me know. one more thing more complex the network is higher the chance of over-fitting .. increase in dropout value might result in no learning for that layer. try to keep you network simple .

Related

Questions about Multitask deep neural network modeling using Keras

I'm trying to develop a multitask deep neural network (MTDNN) to make prediction on small molecule bioactivity against kinase targets and something is definitely wrong with my model structure but I can't figure out what.
For my training data (highly imbalanced data with 0 as inactive and 1 as active), I have 423 unique kinase targets (tasks) and over 400k unique compounds. I first calculate the ECFP fingerprint using smiles, and then I randomly split the input data into train, test, and valid sets based on 8:1:1 ratio using RandomStratifiedSplitter from deepchem package. After training my model using the train set and I want to make prediction on the test set to check model performance.
Here's what my data looks like (screenshot example):
(https://i.stack.imgur.com/8Hp36.png)
Here's my code:
# Import Packages
import numpy as np
import pandas as pd
import deepchem as dc
from sklearn.metrics import roc_auc_score, roc_curve, auc, confusion_matrix
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import initializers, regularizers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, Dropout, Reshape
from tensorflow.keras.optimizers import SGD
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
# Build Model
inputs = keras.Input(shape = (1024, ))
x = keras.layers.Dense(2000, activation='relu', name="dense2000",
kernel_initializer=initializers.RandomNormal(stddev=0.02),
bias_initializer=initializers.Ones(),
kernel_regularizer=regularizers.L2(l2=.0001))(inputs)
x = keras.layers.Dropout(rate=0.25)(x)
x = keras.layers.Dense(500, activation='relu', name='dense500')(x)
x = keras.layers.Dropout(rate=0.25)(x)
x = keras.layers.Dense(846, activation='relu', name='output1')(x)
logits = Reshape([423, 2])(x)
outputs = keras.layers.Softmax(axis=2)(logits)
Model1 = keras.Model(inputs=inputs, outputs=outputs, name='MTDNN')
Model1.summary()
opt = keras.optimizers.SGD(learning_rate=.0003, momentum=0.9)
def loss_function (output, labels):
loss = tf.nn.softmax_cross_entropy_with_logits(output,labels)
return loss
loss_fn = loss_function
Model1.compile(loss=loss_fn, optimizer=opt,
metrics=[keras.metrics.Accuracy(),
keras.metrics.AUC(),
keras.metrics.Precision(),
keras.metrics.Recall()])
for train, test, valid in split2:
trainX = pd.DataFrame(train.X)
trainy = pd.DataFrame(train.y)
trainy2 = tf.one_hot(trainy,2)
testX = pd.DataFrame(test.X)
testy = pd.DataFrame(test.y)
testy2 = tf.one_hot(testy,2)
validX = pd.DataFrame(valid.X)
validy = pd.DataFrame(valid.y)
validy2 = tf.one_hot(validy,2)
history = Model1.fit(x=trainX, y=trainy2,
shuffle=True,
epochs=10,
verbose=1,
batch_size=100,
validation_data=(validX, validy2))
y_pred = Model1.predict(testX)
y_pred2 = y_pred[:, :, 1]
y_pred3 = np.round(y_pred2)
# Check the # of nonzero in assay
(y_pred3!=0).sum () #all 0s
My questions are:
The roc and precision recall are all extremely high (>0.99), but the prediction result of test set contains all 0s, no actives at all. I also use the randomized dataset with same active:inactive ratio for each task to test if those values are too good to be true, and turns out all values are still above 0.99, including roc which is expected to be 0.5.
Can anyone help me to identify what is wrong with my model and how should I fix it please?
Can I use built-in functions in sklearn to calculate roc/accuracy/precision-recall? Or should I manually calculate the metrics based on confusion matrix on my own for multitasking purpose. Why and why not?

Transferlearning ResNet Model does not learn

I trained ResNet-50 model to classify images from 6 classes (my own dataset) and saved it. But the model did not learn properly and predictions are incorrect. What would be the reason for this poor learning?
Here is my code, and the output plots using Keras and TensorFlow backend. How can I solve this?
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.layers import Dense, Dropout
from keras.models import Model
from keras.optimizers import Adam, SGD
from keras.preprocessing.image import ImageDataGenerator, image
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGE = True
# Define some constant needed throughout the script
N_CLASSES = 6
EPOCHS = 20
PATIENCE = 5
TRAIN_PATH= '/Train/'
VALID_PATH = '/Test/'
MODEL_CHECK_WEIGHT_NAME = 'resnet_monki_v1_chk.h5'
# Define model to be used we freeze the pre trained resnet model weight, and add few layer on top of it to utilize our custom dataset
K.set_learning_phase(0)
model = ResNet50(input_shape=(224,224,3),include_top=False, weights='imagenet', pooling='avg')
K.set_learning_phase(1)
x = model.output
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(N_CLASSES, activation='softmax', name='custom_output')(x)
custom_resnet = Model(inputs=model.input, outputs = output)
for layer in model.layers:
layer.trainable = False
custom_resnet.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
custom_resnet.summary()
# 4. Load dataset to be used
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
traingen = datagen.flow_from_directory(TRAIN_PATH, target_size=(224,224), batch_size=32, class_mode='categorical')
validgen = datagen.flow_from_directory(VALID_PATH, target_size=(224,224), batch_size=32, class_mode='categorical', shuffle=False)
# 5. Train Model we use ModelCheckpoint to save the best model based on validation accuracy
es_callback = EarlyStopping(monitor='val_acc', patience=PATIENCE, mode='max')
mc_callback = ModelCheckpoint(filepath=MODEL_CHECK_WEIGHT_NAME, monitor='val_acc', save_best_only=True, mode='max')
train_history = custom_resnet.fit_generator(traingen, steps_per_epoch=len(traingen), epochs= EPOCHS, validation_data=traingen, validation_steps=len(validgen), verbose=2, callbacks=[es_callback, mc_callback])
custom_resnet.save('custom_resnet.h5')
Here are the plots, I had to put the links, the site does not let me put a pic
enter image description here

MNIST and transfer learning with VGG16 in Keras- low validation accuracy

I recently started taking advantage of Keras's flow_from_dataframe() feature for a project, and decided to test it with the MNIST dataset. I have a directory full of the MNIST samples in png format, and a dataframe with the absolute directory for each in one column and the label in the other.
I'm also using transfer learning, importing VGG16 as a base, and adding my own 512 node relu dense layer and 0.5 drop-out before a softmax layer of 10. (For digits 0-9). I'm using rmsprop (lr=1e-4) as the optimizer.
When I launch my environment, it calls the latest version of keras_preprocessing from Git, which has support for absolute directories and capitalized file extensions.
My problem is that I have a very high training accuracy, and a terribly low validation accuracy. By my final epoch (10), I had a training accuracy of 0.94 and a validation accuracy of 0.01.
I'm wondering if there's something fundamentally wrong with my script? With another dataset, I'm even getting NaNs for both my training and validation loss values after epoch 4. (I checked the relevant columns, there aren't any null values!)
Here's my code. I'd be deeply appreciative is someone could glance through it and see if anything jumped out at them.
import pandas as pd
import numpy as np
import keras
from keras_preprocessing.image import ImageDataGenerator
from keras import applications
from keras import optimizers
from keras.models import Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k
from keras.callbacks import ModelCheckpoint, CSVLogger
from keras.applications.vgg16 import VGG16, preprocess_input
# INITIALIZE MODEL
img_width, img_height = 32, 32
model = VGG16(weights = 'imagenet', include_top=False, input_shape = (img_width, img_height, 3))
# freeze all layers
for layer in model.layers:
layer.trainable = False
# Adding custom Layers
x = model.output
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(10, activation="softmax")(x)
# creating the final model
model_final = Model(input = model.input, output = predictions)
# compile the model
rms = optimizers.RMSprop(lr=1e-4)
#adadelta = optimizers.Adadelta(lr=0.001, rho=0.5, epsilon=None, decay=0.0)
model_final.compile(loss = "categorical_crossentropy", optimizer = rms, metrics=["accuracy"])
# LOAD AND DEFINE SOURCE DATA
train = pd.read_csv('MNIST_train.csv', index_col=0)
val = pd.read_csv('MNIST_test.csv', index_col=0)
nb_train_samples = 60000
nb_validation_samples = 10000
batch_size = 60
epochs = 10
# Initiate the train and test generators
train_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()
train_generator = train_datagen.flow_from_dataframe(dataframe=train,
directory=None,
x_col='train_samples',
y_col='train_labels',
has_ext=True,
target_size = (img_height,
img_width),
batch_size = batch_size,
class_mode = 'categorical',
color_mode = 'rgb')
validation_generator = test_datagen.flow_from_dataframe(dataframe=val,
directory=None,
x_col='test_samples',
y_col='test_labels',
has_ext=True,
target_size = (img_height,
img_width),
batch_size = batch_size,
class_mode = 'categorical',
color_mode = 'rgb')
# GET CLASS INDICES
print('****************')
for cls, idx in train_generator.class_indices.items():
print('Class #{} = {}'.format(idx, cls))
print('****************')
# DEFINE CALLBACKS
path = './chk/epoch_{epoch:02d}-valLoss_{val_loss:.2f}-valAcc_{val_acc:.2f}.hdf5'
chk = ModelCheckpoint(path, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')
logger = CSVLogger('./chk/training_log.csv', separator = ',', append=False)
nPlus = 1
samples_per_epoch = nb_train_samples * nPlus
# Train the model
model_final.fit_generator(train_generator,
steps_per_epoch = int(samples_per_epoch/batch_size),
epochs = epochs,
validation_data = validation_generator,
validation_steps = int(nb_validation_samples/batch_size),
callbacks = [chk, logger])
Have you tried explicitly defining the classes of the images? as such:
train_generator=image.ImageDataGenerator().flow_from_dataframe(classes=[0,1,2,3,4,5,6,7,8,9])
in both the train and validation generators.
I have found that sometimes the train and validation generators create different correspondence dictionaries.

OOM during prediction with batchSize that works during training

I'm confused by a Keras behavior I'm seeing. I'm finetuning the resnet50 model.
from keras import applications
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten
from regressionGenerator import regressionGenerator
from keras.optimizers import SGD
from keras.callbacks import TensorBoard
from keras.regularizers import l2
batchSize = 32
numObservations = 948
iterationsPerEpoch = numObservations // batchSize
targetSize = (1024,1024) # Keep aspect ratio, approximately .12 of
original image size
preprocessFcn = applications.resnet50.preprocess_input
base_model = applications.ResNet50(weights='imagenet', include_top=False, input_shape=(targetSize[0],targetSize[1], 3),pooling='none')
x = base_model.output
x = Flatten()(x)
x = Dense(2048,activation='relu',kernel_regularizer=l2(0.0001))(x)
x = Dropout(0.5)(x)
predictions = Dense(5)(x)
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer=SGD(lr=0.00001, momentum=0.9), loss='mean_squared_error')
train_generator = regressionGenerator(batchSize,targetSize,preprocessFcn)
# Train for a few epochs to initialize top of network
model.fit_generator(
train_generator,
steps_per_epoch=iterationsPerEpoch,
epochs=15)
This model trains, and fits in memory. When I predict, also from a generator, I cannot train with the same batchSize of 32, or I receive OOM. I have to lower the batchSize to 8 to fit in memory on the same GPU.
import numpy as np
from keras.models import load_model
from predictionGenerator import predictionGenerator
from keras import applications
model = load_model('')
preprocessFcn = applications.resnet50.preprocess_input
batchSize = 8
targetSize = (1024,1024)
test_generator,idxList,numSteps = predictionGenerator(batchSize, targetSize, preprocessFcn)
Ypred = model.predict_generator(test_generator, numSteps)
Ypred = np.around(Ypred)
Ypred = np.clip(Ypred, 0, np.inf) # Clip negative counts
Ypred = Ypred[idxList, :] # Sort according to order of observations in generator
numObservations = len(idxList)
fileindices = np.reshape(np.arange(numObservations),(numObservations,1))
Ypred = np.hstack((fileindices,Ypred))
How is it possible that I can use a batchSize of 32 and train, but have to lower the batchSize by a factor of 4 during prediction?

Using model.pop() changes the model's summary but does not effect the output

I am trying to remove the top layers from a model I have previously trained.
This is the code I use:
import os
import h5py
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense
# KERAS_BACKEND=theano python
import keras
keras.backend.set_image_dim_ordering("th")
img_width, img_height = 150, 150
data_dir = '//shared_directory/projects/try_CD/data/validation'
nb_train_samples = 2000
nb_validation_samples = 800
nb_epoch = 50
def make_bottleneck_features(model):
datagen = ImageDataGenerator(rescale=1./255)
generator = datagen.flow_from_directory(
data_dir,
target_size=(img_width, img_height),
batch_size=32,
class_mode=None,
shuffle=False)
bottleneck_features = model.predict_generator(generator, nb_validation_samples)
return (bottleneck_features)
model=keras.models.load_model('/shared_directory/projects/think_exp/CD_M1.h5')
A = make_bottleneck_features(model)
model.summary()
for i in range (6):
model.pop()
B = make_bottleneck_features(model)
model.summary()
Judging comparing the results of the two calls to model.summary(), I can see that indeed the 6 topmost layers were removed.
However, the model's output (saved to A and B) does not change after discarding these layers.
What is the source of that discrepancy?
How can I retrieve the output of the desired layer instead of that of the entire model?
Thanks in advance!
You can't drop layers like that, in order for it to have an effect, you need to recompile the model (AKA model.compile).
But that's not the best way to obtain outputs from intermediate layers, you can just use K.function (where K is keras.backend) to build a function from the input to one of the layers and then call the function. More details are available in this answer.

Resources