Failed to find data adapter that can handle input: <class 'NoneType'>, <class 'NoneType'> validation_generator - conv-neural-network

I have been trying to create model to classify a set of images into 10 classes. I am using CNN to do the same. I am stuck with the above error and I am not able to proceed further. Please help me out. I think the problem is with the validation_generator. But I tried everything possible and I am not able to fix it.
The code that I have written:
import tensorflow as tf
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageEnhance
import tensorflow.keras as keras
train_path = "/Users/user/Desktop/train"
test_path = "/Users/user/Desktop/val"
model = tf.keras.models.Sequential([
# Note the input shape is the desired size of the image 256x256 with 3 bytes color
# This is the first convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(256, 256, 3)),
tf.keras.layers.MaxPooling2D(2, 2),
# The second convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The third convolution
tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The fourth convolution
tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# Flatten the results to feed into a DNN
tf.keras.layers.Flatten(),
tf.keras.layers.Dropout(0.5),
# 512 neuron hidden layer
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
# Print the model summary
model.summary()
# Set the training parameters
model.compile(loss = 'categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
from keras_preprocessing.image import ImageDataGenerator
TRAINING_DIR = train_path
training_datagen = ImageDataGenerator(
rescale = 1./255,
rotation_range=10,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0,
zoom_range=0.2,
horizontal_flip=False,
fill_mode='nearest')
VALIDATION_DIR = test_path
validation_datagen = ImageDataGenerator(rescale = 1./255)
train_generator = training_datagen.flow_from_directory(
TRAINING_DIR,
target_size=(256,256),
class_mode='categorical',
batch_size=64
)
validation_generator = validation_datagen.flow_from_directory(
VALIDATION_DIR,
target_size=(256,256),
class_mode='categorical',
batch_size=64
)
# the error raises here
history = model.fit(epochs=25, validation_data = validation_generator, verbose = 1, validation_steps=3)

Related

Unexpected outputs from model.predict when using rescaling

First off: I'm aware of this post, but it doesn't provide an answer.
I am building my model like this:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from keras.preprocessing.image import ImageDataGenerator # for data augmentation
import pandas as pd # to save .csv files
from time import perf_counter # to track runtime
from keras.metrics import TrueNegatives, TruePositives, FalseNegatives, FalsePositives
def build_model(dimension):
model = Sequential()
model.add(Conv2D(32, (11,11), activation='relu',
input_shape=(dimension, dimension, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten()) # to prepare for dropout
model.add(Dropout(0.2)) # to prevent overfitting
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy',
TruePositives(),
TrueNegatives(),
FalsePositives(),
FalseNegatives()
]
)
return model
def train_model(epoch, batch_size, run, subrun):
dimension = 200
model = build_model(dimension)
train_datagen = ImageDataGenerator(validation_split=0.2,
# samplewise_std_normalization=True,
rotation_range=40,
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
width_shift_range=0.2,
height_shift_range=0.2,
fill_mode='nearest'
)
training_set = train_datagen.flow_from_directory('train6',
target_size=(dimension, dimension),
color_mode='rgb', # default
class_mode='binary',
batch_size=batch_size,
save_to_dir=None,
interpolation='nearest',
subset='training')
validation_set = train_datagen.flow_from_directory('train6',
target_size=(dimension, dimension),
color_mode='rgb', # default
class_mode='binary',
batch_size=batch_size,
save_to_dir=None,
# if 'str', saves augmented images for visualisation
interpolation='nearest',
subset='validation')
start_time = perf_counter() # start counting
history = model.fit_generator(training_set,
epochs=epoch,
steps_per_epoch=training_set.samples // batch_size,
validation_data=validation_set,
validation_steps=validation_set.samples // batch_size,
verbose=2)
stop_time = perf_counter() # stop counting
# saving trained model & history file
model.save_weights('models/cat_dog_classifier_{0}_{1}.h5'.format(run, subrun)) # save model weights
hist_pd = pd.DataFrame(history.history) # making panda file of history.history
hist_csv_file = 'histories/history_{0}_{1}.csv'.format(run, subrun) # defining name for csv file
with open(hist_csv_file, mode='w') as f: # saving the pd file as csv
hist_pd.to_csv(f)
return stop_time - start_time
And I use the following code to get the probabilities:
from build_model import build_model
from keras.preprocessing import image
import numpy as np
run = 'A28'
subrun = 1
dimension = 200
# build model
model = build_model(dimension)
model.load_weights('models/cat_dog_classifier_{0}_{1}.h5'.format(run, subrun))
# Get test image ready
amount_of_images = 10
predictions = np.zeros((amount_of_images, 2))
labels = np.zeros(amount_of_images)
for i in range(amount_of_images):
image_name = 1 + i # choose what image to start from
test_image = image.load_img('test1/{}.jpg'.format(image_name), target_size=(dimension, dimension))
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis=0)
label = model.predict_classes(test_image, batch_size=1)
labels[i] = label
prediction = model.predict(test_image, batch_size=1)
print(prediction)
print(labels)
When I train my model without using the rescaling or the normalisation, the predictions are probabilities as expected. However, when I use either, it returns only 0s and 1s (the same labels as predict_classes). I have tried to run the dummy code provided in the link above, and it worked as expected; which makes sense I suppose, as the script also runs fine when I haven't used rescaling. However, I would really like to be able to use rescaling. Does anyone have any idea what is going wrong?

Why does my model predict the same label?

I am training a small network and the training seems to go fine, the val loss decreases, I reach validation accuracy around 80, and it actually stops training once there is no more improvement (patience=10). It trained for 40 epochs. However, it keeps predicting only one class for every test image! I tried to initialize the conv layers randomly, I added regularizers, I switched from Adam to SGD, I added clipvalue, I added dropouts. I also switched to softmax (I have only two labels but I saw some recommendation on using softmax and Dense layer with 2 neurons). Some or one of these helped with the overfitting, but nothing worked for the prediction problem. The data is balanced, though it is a small dataset, so it doesn't make sense that it reaches 80% if it predicts the same labels for evaluation set as well.
What is wrong with my model and how can I fix it? Any comments are welcome.
#Import some packages to use
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
import os
from keras.regularizers import l2
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.initializers import RandomNormal
os.environ["CUDA_VISIBLE_DEVICES"]="0"
epochs = 200
callbacks = []
#schedule = None
decay = 0.0
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-5, mode='min')
train_dir = '/home/d/Desktop/s/data/train'
eval_dir = '/home/d/Desktop/s/data/eval'
test_dir = '/home/d/Desktop/s/data/test'
# create a data generator
train_datagen = ImageDataGenerator(rescale=1./255, #Scale the image between 0 and 1
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,)
val_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
test_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
# load and iterate training dataset
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(224,224),class_mode='categorical', batch_size=16, shuffle='True', seed=42)
# load and iterate validation dataset
val_generator = val_datagen.flow_from_directory(eval_dir, target_size=(224,224),class_mode='categorical', batch_size=16, shuffle='True', seed=42)
# load and iterate test dataset
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(224,224), class_mode=None, batch_size=1, shuffle='False', seed=42)
#We will use a batch size of 32. Note: batch size should be a factor of 2.***4,8,16,32,64...***
#batch_size = 4
#from keras import layers
from keras import models
from keras import optimizers
#from keras.layers import Dropout
#from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img
model = models.Sequential()
model.add(Conv2D(64, (3, 3), activation='relu', name='block1_conv1', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05), input_shape=(224, 224, 3)))
model.add(Conv2D(64, (3, 3), activation='relu', name='block1_conv2', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv1', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv2',kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(MaxPooling2D((2, 2), name='block2_pool'))
model.add(Dropout(0.2))
model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv1', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv2', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv3', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(MaxPooling2D((2, 2), name='block3_pool'))
model.add(Dropout(0.2))
#model.add(layers.Conv2D(512, (3, 3), activation='relu', name='block4_conv1'))
#model.add(layers.Conv2D(512, (3, 3), activation='relu', name='block4_conv2'))
#model.add(layers.Conv2D(512, (3, 3), activation='relu', name='block4_conv3'))
#model.add(layers.MaxPooling2D((2, 2), name='block4_pool'))
model.add(Flatten())
model.add(Dense(256, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), activation='relu', kernel_initializer='he_uniform'))
model.add(Dropout(0.5))
model.add(Dense(2, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), activation='softmax'))
#Lets see our model
model.summary()
#We'll use the RMSprop optimizer with a learning rate of 0.0001
#We'll use binary_crossentropy loss because its a binary classification
#model.compile(loss='binary_crossentropy', optimizer=optimizers.SGD(lr=1e-5, momentum=0.9), metrics=['acc'])
model.compile(loss='categorical_crossentropy',
#optimizer=optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-08, decay=decay),
optimizer=optimizers.SGD(lr= 0.0001, clipvalue = 0.5, decay=1e-6, momentum=0.9, nesterov=True),
metrics=['accuracy'])
#The training part
#We train for 64 epochs with about 100 steps per epoch
history = model.fit_generator(train_generator,
steps_per_epoch=train_generator.n // train_generator.batch_size,
epochs=epochs,
validation_data=val_generator,
validation_steps=val_generator.n // val_generator.batch_size,
callbacks=[earlyStopping, mcp_save]) #, reduce_lr_loss])
#Save the model
model.save_weights('/home/d/Desktop/s/categorical_weights.h5')
model.save('/home/d/Desktop/s/categorical_model_keras.h5')
#lets plot the train and val curve
#get the details form the history object
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and Validation accuracy')
plt.legend()
plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()
model.evaluate_generator(generator=val_generator, steps=val_generator.n // val_generator.batch_size)
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
test_generator.reset()
pred=model.predict_generator(test_generator,
steps=STEP_SIZE_TEST,
verbose=1)
predicted_class_indices=np.argmax(pred,axis=1)
labels = (train_generator.class_indices)
np.save('/home/d/Desktop/s/classes', labels)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]
filenames=test_generator.filenames
results=pd.DataFrame({"Filename":filenames,
"Predictions":predictions})
results.to_csv("categorical_results.csv",index=False)
One of the problems that could lead to such behavior is imbalanced dataset. Your model found out that if it predicts the dominant class each time, it would get a good results.
There are many ways to tackle an imbalance dataset. Here is a good tutorial.
One of the easiest yet powerful solution is to apply higher penalty to your loss if it wrongly predicted the smaller class. This can be implemented in keras by setting the parameter class_weight in the fitor fit_generator function.
It can be a dictionary of example:
class_weight = {0: 0.75, 1: 0.25} # does not necessarily add to up 1.
history = model.fit_generator(train_generator,
steps_per_epoch=train_generator.n // train_generator.batch_size,
epochs=epochs,
class_weight= class_weight, # this is the important part
validation_data=val_generator,
validation_steps=val_generator.n // val_generator.batch_size,
callbacks=[earlyStopping, mcp_save]) #, reduce_lr_loss])
Adding to Coderji's answer, it might also prove advantageous to counter class imbalance using stratified k-fold cross-validation, with k = 5 being common practice. This basically splits your data set up into k splits like regular cross-validation, but also stratifies these splits. In the case of class imbalance, each of these splits contain over-/undersampled classes compensating for their lower/higher occurence within the data set.
As of yet Keras does not have it's own way to use stratified k-fold cross-validation. Instead it's advised to use sklearn's StratifiedKFold. This article gives a detailed overview how to achieve this in Keras,
with the gist of it being:
from sklearn.model_selection import StratifiedKFold# Instantiate the cross validator
skf = StratifiedKFold(n_splits=kfold_splits, shuffle=True)# Loop through the indices the split() method returns
for index, (train_indices, val_indices) in enumerate(skf.split(X, y)):
print "Training on fold " + str(index+1) + "/10..." # Generate batches from indices
xtrain, xval = X[train_indices], X[val_indices]
ytrain, yval = y[train_indices], y[val_indices] # Clear model, and create it
model = None
model = create_model()
# Debug message I guess
# print "Training new iteration on " + str(xtrain.shape[0]) + " training samples, " + str(xval.shape[0]) + " validation samples, this may be a while..."
history = train_model(model, xtrain, ytrain, xval, yval)
accuracy_history = history.history['acc']
val_accuracy_history = history.history['val_acc']
print "Last training accuracy: " + str(accuracy_history[-1]) + ", last validation accuracy: " + str(val_accuracy_history[-1])
create_model() returns a compiled Keras model
train_model() returns last history object of its last model.fit() operation

Keras Transfer Learning Issue

I have trained & saved a smaller network on my small dataset, and I want to use transfer learning.
I want to use this saved network on top of the conv part of the pretrained VGG16, specifically I want to freeze some layers of VGG but not all then I want to use the fc that I have already trained on my smaller dataset, and learn a model which is a combination of both with transferred weights.
I am following a mish and mash of tutorials: https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html and https://machinelearningmastery.com/how-to-develop-a-convolutional-neural-network-to-classify-photos-of-dogs-and-cats/ I do not just want to use pretrained features, and I do not just want to add two new layers to the VGG's conv net, as I mentioned, I want to transfer the fc layers of the smaller network and freeze all blocks of conv layers but one of VGGs and train again. Below is my code but I get an error (no matter how I tried to change around the code, I get a similar error)
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense
# path to the model weights files.
weights_path = '/home/d/Desktop/s/vgg16_weights.h5'
top_model_weights_path = '/home/d/Desktop/s/model_weights.h5'
# dimensions of our images.
img_width, img_height = 256, 256
# build the VGG16 network
base_model = applications.VGG16(weights='imagenet', include_top=False, input_shape=(256, 256, 3))
print('Model loaded.')
# set the first 25 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
for layer in base_model.layers[:15]:
layer.trainable = False
# build a classifier model to put on top of the convolutional model
top_model = Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))
# note that it is necessary to start with a fully-trained
# classifier, including the top classifier,
# in order to successfully do fine-tuning
top_model.load_weights(top_model_weights_path)
model= Model(inputs=base_model.input, outputs=top_model(base_model.output))
# add the model on top of the convolutional base
#model.add(top_model)
print(top_model.summary())
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
model.compile(loss='binary_crossentropy',
optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
metrics=['accuracy'])
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
train_dir = '/home/d/Desktop/s/data/train'
eval_dir = '/home/d/Desktop/s/data/eval'
test_dir = '/home/d/Desktop/s/data/test'
# create a data generator
train_datagen = ImageDataGenerator(rescale=1./255, #Scale the image between 0 and 1
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,)
val_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
test_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
# load and iterate training dataset
train_generator = train_datagen.flow_from_directory(train_dir, class_mode='binary', batch_size=16, shuffle='True', seed=42)
# load and iterate validation dataset
val_generator = val_datagen.flow_from_directory(eval_dir, class_mode='binary', batch_size=16, shuffle='True', seed=42)
# load and iterate test dataset
test_generator = test_datagen.flow_from_directory(test_dir, class_mode=None, batch_size=1, shuffle='False', seed=42)
#The training part
#We train for 64 epochs with about 100 steps per epoch
history = model.fit_generator(train_generator,
steps_per_epoch=train_generator.n // train_generator.batch_size,
epochs=6,
validation_data=val_generator,
validation_steps=val_generator.n // val_generator.batch_size)
The error I am getting is:
Model loaded.
Traceback (most recent call last):
File "/home/d/Desktop/s/transferLearningS.py", line 33, in <module>
top_model.load_weights(top_model_weights_path)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/network.py", line 1166, in load_weights
f, self.layers, reshape=reshape)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/saving.py", line 1030, in load_weights_from_hdf5_group
str(len(filtered_layers)) + ' layers.')
ValueError: You are trying to load a weight file containing 6 layers into a model with 2 layers.
And my smaller network is built this way:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(256, 256, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5)) #Dropout for regularization
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid')) #Sigmoid function at the end because we have just two classes
Any recommendations how I can fix this issue?

How do I change the last layer in Keras to get logits rather than probabilities from model?

My goal is to retrieve logits from a Keras neural network model. I read here: Keras - how to get unnormalized logits instead of probabilities
That I need to change the last activation layer to "linear." Here's my code
from __future__ import absolute_import, division, print_function
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
#Preprocessing
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
train_images = train_images / 255.0
test_images = test_images / 255.0
#Preprocessing
#Generate the Model
model = keras.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(128, activation=tf.nn.relu),
keras.layers.Dense(10, activation=tf.nn.linear)
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(train_images, train_labels, epochs=5)
The problem happens on this line. Is there a better way to get the logits? If not how do I get the activation to be linear?
keras.layers.Dense(10, activation=tf.nn.linear)
You can use Activation layer
#Generate the Model
model = keras.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(128, activation=tf.nn.relu),
keras.layers.Dense(10, activation=None),
keras.layers.Activation('relu')
])
Using linear activation is correct for getting the logits. As you're developing keras models, use the activations from keras.
Replace
keras.layers.Dense(128, activation=tf.nn.relu),
keras.layers.Dense(10, activation=tf.nn.linear)
with
keras.layers.Dense(128, activation=keras.activations.relu),
keras.layers.Dense(10, activation=keras.activations.linear)
Alternately, not specifying an activation also defaults to linear.
keras.layers.Dense(10)

Keras Conv1D for Time Series

I am just a novice in area of deep learning.
I made my first basic attempt with Keras Conv1D. Not sure what I did and whether I did it right. My input data is simply total sales by every week (total of 313 weeks), for stores across US and with a time step of 1.
Here is my code:
from pandas import read_csv
import matplotlib.pyplot as plt
import numpy
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return numpy.array(dataX), numpy.array(dataY)
seed = 7
numpy.random.seed(seed)
dataframe = read_csv('D:/MIS793/Dataset/Academic Dataset External 2/Python scripts/totalsale _byweek.csv', usecols=[1], engine='python')
plt.plot(dataframe)
plt.show()
dataset = dataframe.values
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
trainX = trainX.reshape(trainX.shape[0], trainX.shape[1], 1).astype('float32')
testX = testX.reshape(testX.shape[0], testX.shape[1], 1).astype('float32')
model = Sequential()
model.add(Conv1D(filters=10, kernel_size=1, padding='same', strides=1, activation='relu',input_shape=(1,1)))
model.add(MaxPooling1D(pool_size=1))
model.add(Flatten())
model.add(Dense(250, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer='adam', metrics=['mae'])
print(model.summary())
model.fit(trainX, trainY, validation_data=(testX, testY), epochs=10, batch_size=100)
scores = model.evaluate(testX, testY, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
Not sure about few things here:
Reshaping of trainX and testX.
Value of kernel_size and input_shape
My idea here is it's just one vector of sales value. 10 filters, each of size 1 move from one value to another. Input shape is of the format time step, dimensions.
I only got accuracy of 10.91%! So my first question is whether I am feeding in the right parameters.
Thanks
ASC
With model.metrics_names you can get the labels of your scores variable.
In your case it will be ['loss', 'mean_absolute_error'].
So what you are printing is not the accuracy, but the mae, multiplied by 100.
I tried using accuracy instead of mae. However I got accuracy as 0%. Just wondering as this was about predicting numerical values, should I really use accuracy? Here is my latest code.
from pandas import read_csv
import matplotlib.pyplot as plt
import numpy
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return numpy.array(dataX), numpy.array(dataY)
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
dataframe = read_csv('D:/MIS793/Dataset/Academic Dataset External 2/Python scripts/totalsale _byweek.csv', usecols=[1], engine='python')
plt.plot(dataframe)
plt.show()
dataset = dataframe.values
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
trainX = trainX.reshape(trainX.shape[0], trainX.shape[1],1).astype('float32')
testX = testX.reshape(testX.shape[0], testX.shape[1],1).astype('float32')
model = Sequential()
model.add(Conv1D(filters=20, kernel_size=1, padding='same', strides=1, activation='relu',input_shape=(1,1)))
model.add(MaxPooling1D(pool_size=1))
model.add(Conv1D(filters=10, kernel_size=1, padding='same', strides=1, activation='relu'))
model.add(MaxPooling1D(pool_size=1))
model.add(Flatten())
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(trainX, trainY, validation_data=(testX, testY), epochs=10, batch_size=100)
scores = model.evaluate(testX, testY, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
OR should I go with MAE?
If I go with MAE, my scores will look like below:
[0.12740663779013364, 0.31208728355111426]
First one is loss and second one is MAE. Isn't that a better metrics in this case?
The final line will be like this:
print("MAE: %.2f%%" % (scores[1]))
Thanks
Anindya

Resources