I am getting into a similar problem with reshaping data for 1-D CNN:
I am loading data (training and testing data sets ) from a csv file with 24,325 lines. Each line is a vector of 256 numbers - independent variables plus 11 numbers of expected outcome ( labels ) [0,0,0,0,1,0,0,0,0,0,0]
I am using TensorFlow backend.
The code looks like that:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
#Importing training set
training_set = pd.read_csv("Data30.csv")
X_train = training_set.iloc[:20000, 3 :-11].values
y_train = training_set.iloc[:20000, -11:-1].values
#Importing test set
test_set = pd.read_csv("Data30.csv")
X_test = training_set.iloc[ 20001:, 3 :-11].values
y_test = training_set.iloc[ 20001:, -11:].values
X_train /= np.max(X_train) # Normalise data to [0, 1] range
X_test /= np.max(X_test) # Normalise data to [0, 1] range
print("X_train.shape[0] = " + str(X_train.shape[0]))
print("X_train.shape[1] = " + str(X_train.shape[1]))
print("y_train.shape[0] = " + str(y_train.shape[0]))
print("y_train.shape[1] = " + str(y_train.shape[1]))
print("X_test.shape[0] = " + str(X_test.shape[0]))
print("X_test.shape[1] = " + str(X_test.shape[1]))
This is what I get:
X_train.shape[0] = 20000
X_train.shape1 = 256
y_train.shape[0] = 20000
y_train.shape1 = 11
X_test.shape[0] = 4325
X_test.shape1 = 256
#Convert data into 3d tensor
# Old Version
# X_train = np.reshape(X_train,(1,X_train.shape[0],X_train.shape[1]))
# X_test = np.reshape(X_test,(1,X_test.shape[0],X_test.shape[1]))
**# New Correct Version based on the Answer:**
X_train = np.reshape(X_train,( X_train.shape[0],X_train.shape[1], 1 ))
X_test = np.reshape(X_test,( X_test.shape[0],X_test.shape[1], 1 ))
print("X_train.shape[0] = " + str(X_train.shape[0]))
print("X_train.shape[1] = " + str(X_train.shape[1]))
print("X_test.shape[0] = " + str(X_test.shape[0]))
print("X_test.shape[1] = " + str(X_test.shape[1]))
This is result of the reshaping:
X_train.shape[0] = 1
X_train.shape1 = 20000
X_test.shape[0] = 1
X_test.shape1 = 4325
#Importing convolutional layers
from keras.models import Sequential
from keras.layers import Convolution1D
from keras.layers import MaxPooling1D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers.normalization import BatchNormalization
#Initialising the CNN
classifier = Sequential()
#1.Multiple convolution and max pooling
classifier.add(Convolution1D(filters=8, kernel_size=11, activation="relu", input_shape=( 256, 1 )))
classifier.add(MaxPooling1D(strides=4))
classifier.add(BatchNormalization())
classifier.add(Convolution1D(filters=16, kernel_size=11, activation='relu'))
classifier.add(MaxPooling1D(strides=4))
classifier.add(BatchNormalization())
classifier.add(Convolution1D(filters=32, kernel_size=11, activation='relu'))
classifier.add(MaxPooling1D(strides=4))
classifier.add(BatchNormalization())
#classifier.add(Convolution1D(filters=64, kernel_size=11,activation='relu'))
#classifier.add(MaxPooling1D(strides=4))
#2.Flattening
classifier.add(Flatten())
#3.Full Connection
classifier.add(Dropout(0.5))
classifier.add(Dense(64, activation='relu'))
classifier.add(Dropout(0.25))
classifier.add(Dense(64, activation='relu'))
classifier.add(Dense(1, activation='sigmoid'))
#Configure the learning process
classifier.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
#Train!
classifier.fit_generator(training_set,
steps_per_epoch= 100,
nb_epoch = 200,
validation_data = (X_test,y_test),
validation_steps = 40)
score = classifier.evaluate(X_test, y_test)
This is the error I get:
Traceback (most recent call last):
File "C:/Conda/ML_Folder/CNN Data30.py", line 85, in
classifier.fit_generator(X_train, steps_per_epoch=10, epochs=10, validation_data=(X_test,y_test))
File "C:\Conda\lib\site-packages\keras\legacy\interfaces.py", line 87, in wrapper
return func(*args, **kwargs)
File "C:\Conda\lib\site-packages\keras\models.py", line 1121, in fit_generator
initial_epoch=initial_epoch)
File "C:\Conda\lib\site-packages\keras\legacy\interfaces.py", line 87, in wrapper
return func(*args, **kwargs)
File "C:\Conda\lib\site-packages\keras\engine\training.py", line 1978, in fit_generator
val_x, val_y, val_sample_weight)
File "C:\Conda\lib\site-packages\keras\engine\training.py", line 1378, in _standardize_user_data
exception_prefix='input')
File "C:\Conda\lib\site-packages\keras\engine\training.py", line 144, in _standardize_input_data
str(array.shape))
ValueError: Error when checking input: expected conv1d_1_input to have shape (None, 256, 1) but got array with shape (1, 4325, 256)
Can you please help me to fix the code?
Shapes should be (batchSize, length, channels)
So: (20000,256,1) and (20000,11)
Detail: your last Dense must output 11, so: Dense(11,...)
Related
I am trying to train a 1D-CNN on tabular data to then use LRP on it (as it has been done here). I am stuck at the model.fit() part of the implementation. Up to that point everything seems to work.
Here is the error I get:
model_history = model.fit(X_train_smote, y_train_smote, batch_size=100, epochs=100, validation_split = 0.2)
Epoch 1/100
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [60], in <cell line: 1>()
----> 1 model_history = model.fit(X_train_smote, y_train_smote, batch_size=100, epochs=100, validation_split = 0.2)
File D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\utils\traceback_utils.py:67, in filter_traceback.<locals>.error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
File ~\AppData\Local\Temp\__autograph_generated_filelswqetod.py:15, in outer_factory.<locals>.inner_factory.<locals>.tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
ValueError: in user code:
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 1051, in train_function *
return step_function(self, iterator)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 1040, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 1030, in run_step **
outputs = model.train_step(data)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 890, in train_step
loss = self.compute_loss(x, y, y_pred, sample_weight)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 948, in compute_loss
return self.compiled_loss(
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 139, in __call__
losses = call_fn(y_true, y_pred)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 243, in call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 1787, in categorical_crossentropy
return backend.categorical_crossentropy(
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\backend.py", line 5119, in categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
ValueError: Shapes (None,) and (None, 6, 2) are incompatible
Yet, for the sake of clarity here is the rest of my current implementation (adapted from a Kaggle notebook). The model structure is based on the feedback of my previous question here on SO.
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.colors import ListedColormap
from sklearn.metrics import plot_confusion_matrix
from scipy.stats import norm, boxcox
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from collections import Counter
from scipy import stats
import tensorflow as tf
import warnings
warnings.simplefilter(action='ignore', category=Warning)
dataset = pd.read_csv('F:/Programmieren/this_one/data/Churn_Modelling.csv')
# split into variables and target
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values
# here gender is encoded
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])
# one hot encoding the country (as explained in Ullah et al.)
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
# split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .2)
# upsample minority class with SMOTE
import imblearn
from imblearn.over_sampling import SMOTENC
#get original class distribution
counter = Counter(y)
print(counter)
#SMOTENC is used instead of SMOTE because there are multiple categorical features in the dataset
oversample = SMOTENC(categorical_features=[0, 1, 2, 4, 9, 10])
X_train_smote, y_train_smote = oversample.fit_resample(X_train, y_train)
#get new class distribution
counter = Counter(y_train_smote)
print(counter)
# normalize values to range 0-1 as explained in Ullah et al.
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
X_train_smote = mms.fit_transform(X_train_smote)
X_test = mms.transform(X_test)
#record-wise normalization for relative value comparison as stated by one of the authors I was in contact with
from sklearn.preprocessing import normalize
X_train_smote = normalize(X_train_smote, axis=1, norm='l1')
X_test = normalize(X_test, axis=1, norm='l1')
#reshape data for CNN
sample_size = X_train_smote.shape[0] # number of samples in train set
time_steps = X_train_smote.shape[1] # number of features in train set
input_dimension = 1
train_data_reshaped = X_train_smote.reshape(sample_size,time_steps,input_dimension)
print("After reshape train data set shape:\n", train_data_reshaped.shape)
print("1 Sample shape:\n",train_data_reshaped[0].shape)
print("An example sample:\n", train_data_reshaped[0])
#reshape test data as well
test_data_reshaped = X_test.reshape(X_test.shape[0],X_test.shape[1],1)
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv1D
#create model as explained in the paper
model = Sequential()
model.add(Conv1D(filters=25, kernel_size=3, activation='relu', input_shape=(12,1)))
model.add(Conv1D(50, 3))
model.add(Conv1D(100, 3))
model.add(Dense(2200, activation='relu'))
model.add(Dense(2, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()
#output of model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1d (Conv1D) (None, 10, 25) 100
conv1d_1 (Conv1D) (None, 8, 50) 3800
conv1d_2 (Conv1D) (None, 6, 100) 15100
dense (Dense) (None, 6, 2200) 222200
dense_1 (Dense) (None, 6, 2) 4402
dense_2 (Dense) (None, 6, 2) 6
=================================================================
Total params: 245,608
Trainable params: 245,608
Non-trainable params: 0
_________________________________________________________________
Are there any major flaws in my current approach that I am not aware of?
You should add a Flatten() layer before the output layer as the output shape is (6,2) and not (2,).
#Add Flatten layer
model.add(Flatten())
model.add(Dense(2, activation='softmax'))
Also kindly change the loss to sparse_categorical_crossentopy because you are using integers(0,1) as the labels.
#change loss to sparse_categorical_crossentropy
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
Please refer to the gist for working code. Thank you!
This was the code for building the artificial neural network and the classifier. It was simple churn modelling for determining whether a customer will leave a bank or not.
#Building the ANN
import keras
from keras.models import Sequential
from keras.layers import Dense
#Initializing the ANN
classifier = Sequential()
#Adding input layer and hidden layer iinto ANN
classifier.add(Dense(6, kernel_initializer = 'glorot_uniform', activation = 'relu', input_shape =
(11,)))
#Adding second hidden layer
classifier.add(Dense(6, kernel_initializer = 'glorot_uniform', activation = 'relu'))
#Adding the output/final layer
classifier.add(Dense(1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))
#Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=('accuracy'))
#Fitting the ANN on trainig set using fit method
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)
#Making prediction and analyzing the dataset
y_prediction = classifier.predict(X_test)
#Converting the probablities into definite results for model validation
y_prediction = (y_prediction > 0.5)
#Making confusion matrix for evaluating the resuts
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_prediction)
#Evaluating, improving and tuning the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense
def build_classifier():
classifier = Sequential()
classifier.add(Dense(6, kernel_initializer = 'glorot_uniform', activation = 'relu', input_shape =
(11,)))
classifier.add(Dense(6, kernel_initializer = 'glorot_uniform', activation = 'relu'))
classifier.add(Dense(1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=('accuracy'))
return classifier
classifier = KerasClassifier(build_fn = build_classifier(), batch_size = 10, epochs = 100)
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)
---
And this was the error
RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib\externals\loky\backend\queues.py", line 153, in _ feed
obj = dumps(obj, reducers=reducers)
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib\externals\loky\backend\reduction.py", line 271,
in dumps
dump(obj, buf, reducers=reducers, protocol=protocol)
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib\externals\loky\backend\reduction.py", line 264,
in dump
_LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib\externals\cloudpickle\cloudpickle_fast.py",
line 563, in dump
return Pickler.dump(self, obj)
TypeError: cannot pickle 'weakref' object
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\BSNL\Documents\Deep_Learning_A_Z\Volume 1 - Supervised Deep Learning\Part 1 -
Artificial Neural Networks (ANN)\Section 4 - Building an ANN\ANN.py", line 78, in
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)
File "C:\Users\BSNL\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 72, in inner_f
return f(**kwargs)
File "C:\Users\BSNL\anaconda3\lib\site-packages\sklearn\model_selection_validation.py", line 401, in
cross_val_score
cv_results = cross_validate(estimator=estimator, X=X, y=y, groups=groups,
File "C:\Users\BSNL\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 72, in inner_f
return f(**kwargs)
File "C:\Users\BSNL\anaconda3\lib\site-packages\sklearn\model_selection_validation.py", line 242, in
cross_validate
scores = parallel(
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib\parallel.py", line 1061, in call
self.retrieve()
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib\parallel.py", line 940, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib_parallel_backends.py", line 542, in
wrap_future_result
return future.result(timeout=timeout)
File "C:\Users\BSNL\anaconda3\lib\concurrent\futures_base.py", line 432, in result
return self.__get_result()
File "C:\Users\BSNL\anaconda3\lib\concurrent\futures_base.py", line 388, in __get_result
raise self._exception
PicklingError: Could not pickle the task to send it to the workers.
putting n_jobs = 1 worked because I had also did a mistake of putting brackets in Kerasclassifier's build_fn argument i.e. use build_classifier instead of build_classifier().
I am trying to build a CNN that classifies cats and dogs images but when I try and run the code to resume training of my model an error is thrown up. Here is my code for resume training of the model:
from keras import Sequential
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import *
from keras.callbacks import ModelCheckpoint
from keras.optimizers import *
from keras.models import *
import keras
import numpy as np
import os
img_size = 200 # number of pixels for width and height
#Random Seed
np.random.seed(17897)
training_path = os.getcwd() + "/cats and dogs images/train"
testing_path = os.getcwd() + "/cats and dogs images/test"
#Loads the Model
model = load_model('trained_model.h5')
#Scales the pixel values to between 0 to 1
datagen = ImageDataGenerator(rescale=1.0/255.0)
Batch_size = 10
#Prepares Training Data
training_dataset = datagen.flow_from_directory(directory = training_path,
target_size=(img_size,img_size),
classes = ["cat","dog"],
class_mode = "categorical",
batch_size = Batch_size)
#Prepares Testing Data
testing_dataset = datagen.flow_from_directory(directory = testing_path,
target_size=(img_size,img_size),
classes = ["cat","dog"],
class_mode = "categorical",
batch_size = Batch_size)
#Recompiles model
#model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])
#Checkpoint
filepath = os.getcwd() + "/trained_model.h5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min', save_freq=1)
#Fitting the model to the dataset (Training the Model)
model.fit(x = training_dataset, steps_per_epoch = 400, validation_data=testing_dataset, validation_steps=100, epochs = 10, callbacks=[checkpoint], verbose = 1)
# evaluate model on training dataset
_,acc = model.evaluate(training_dataset, steps=len(training_dataset), verbose=1)
print("Accuracy on training dataset:")
print('> %.3f' % float(acc * 100.0))
#evaluate model on testing dataset
_,acc = model.evaluate(testing_dataset, steps=len(testing_dataset), verbose=1)
print("Accuracy on testing dataset:")
print('> %.3f' % (acc * 100.0))
However when I run the code I get this error:
Found 4000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.
Epoch 1/10
Traceback (most recent call last):
File "C:\Users\Jackson\Documents\Programming\Python Projects\Neural Network That Deteremines Cats and Dogs\Retraining Network.py", line 52, in <module>
model.fit(x = training_dataset, steps_per_epoch = 400, validation_data=testing_dataset, validation_steps=100, epochs = 10, callbacks=[checkpoint], verbose = 1)
File "C:\Users\Jackson\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\training.py", line 108, in _method_wrapper
return method(self, *args, **kwargs)
File "C:\Users\Jackson\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1098, in fit
tmp_logs = train_function(iterator)
File "C:\Users\Jackson\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\eager\def_function.py", line 780, in __call__
result = self._call(*args, **kwds)
File "C:\Users\Jackson\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\eager\def_function.py", line 846, in _call
return self._concrete_stateful_fn._filtered_call(canon_args, canon_kwds) # pylint: disable=protected-access
File "C:\Users\Jackson\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\eager\function.py", line 1843, in _filtered_call
return self._call_flat(
File "C:\Users\Jackson\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\eager\function.py", line 1923, in _call_flat
return self._build_call_outputs(self._inference_function.call(
File "C:\Users\Jackson\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\eager\function.py", line 545, in call
outputs = execute.execute(
File "C:\Users\Jackson\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\eager\execute.py", line 59, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 6400000 values, but the requested shape requires a multiple of 1000000
[[node sequential/flatten/Reshape (defined at C:\Users\Jackson\Documents\Programming\Python Projects\Neural Network That Deteremines Cats and Dogs\Retraining Network.py:52) ]] [Op:__inference_train_function_674]
Function call stack:
train_function
What am I doing wrong as the code seems to be exactly the same as my training code that builds the model and trains it:
from keras import Sequential
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import *
from keras.callbacks import ModelCheckpoint
from keras.optimizers import *
import keras
import numpy as np
import os
img_size = 250 # number of pixels for width and height
#Random Seed
np.random.seed(123456789)
training_path = os.getcwd() + "/cats and dogs images/train"
testing_path = os.getcwd() + "/cats and dogs images/test"
#Defines the Model
model = Sequential([
Conv2D(filters=128, kernel_size=(3,3), activation="relu", padding="same", input_shape=(img_size,img_size,3)),
MaxPool2D(pool_size=(2,2), strides=2),
Conv2D(filters=64, kernel_size=(3,3), activation="relu", padding="same"),
Flatten(),
Dense(32, activation="relu"),
Dense(2, activation="softmax")
])
#Scales the pixel values to between 0 to 1
datagen = ImageDataGenerator(rescale=1.0/255.0)
Batch_size = 10
#Prepares Training Data
training_dataset = datagen.flow_from_directory(directory = training_path,
target_size=(img_size,img_size),
classes = ["cat","dog"],
class_mode = "categorical",
batch_size = Batch_size)
#Prepares Testing Data
testing_dataset = datagen.flow_from_directory(directory = testing_path,
target_size=(img_size,img_size),
classes = ["cat","dog"],
class_mode = "categorical",
batch_size = Batch_size)
#Compiles the model
#model.compile(loss="categorical_crossentropy", optimizer="sgd", metrics=['accuracy'])
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])
#model.compile(loss="mse", optimizer="sgd", metrics=[keras.metrics.MeanSquaredError()])
#Checkpoint
filepath = os.getcwd() + "/trained_model.h5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min', save_freq=1)
#Fitting the model to the dataset (Training the Model)
model.fit(x = training_dataset, steps_per_epoch = 400,
validation_data=testing_dataset, validation_steps=100,
epochs = 10, callbacks=[checkpoint], verbose = 1)
# evaluate model on training dataset
_,acc = model.evaluate_generator(training_dataset, steps=len(training_dataset), verbose=0)
print("Accuracy on training dataset:")
print('> %.3f' % (acc * 100.0))
#evaluate model on testing dataset
_,acc = model.evaluate_generator(testing_dataset, steps=len(testing_dataset), verbose=0)
print("Accuracy on testing dataset:")
print('> %.3f' % (acc * 100.0))
And this code runs perfectly without any errors so why doesn't the code above work?
I have a question about coding CNN using Keras.
The shape of input data(adj) is (20000, 50, 50); 20000 is the number of samples, 50 x 50 are 2-D data (like images). Batch size is 100.
(actually, there are two inputs: adj=(20000, 50, 50), features=(20000, 50, 52).
The issued part is like below:
from keras.layers import Conv2D, MaxPool2D, Flatten
adj_visible1 = Input(shape=(50, 50, 1))
conv11 = Conv2D(16, kernel_size=5, activation='relu')(adj_visible1)
pool11 = MaxPool2D(pool_size=(2, 2))(conv11)
conv12 = Conv2D(8, kernel_size=5, activation='relu')(pool11)
pool12 = MaxPool2D(pool_size=(2, 2))(conv12)
flat1 = Flatten()(pool12)
But an error message occurred like below:
ValueError: Input 0 is incompatible with layer conv2d_1: expected ndim=4, found ndim=3
I found similar cases that print the same message, however, most of the reason is that they didn't consider the filter like (50, 50), not (50, 50, "1") for input shape.
In my case, I used the shape (50, 50, 1) not (50, 50). However, it still prints the same error message.
What should I do?
I'm attaching the full code as follows:
from sklearn.cross_validation import train_test_split
from keras.models import Sequential
from keras.layers.core import Dense, Dropout
from keras.optimizers import RMSprop, Adam, Adadelta
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input, Flatten, MaxPool2D
from keras.layers.convolutional import Conv2D
from keras.layers.merge import concatenate
from keras.callbacks import CSVLogger
#Settings
epoch = 100
batch_size = 100
test_size = 10000
# Load data
adj = np.load('adj.npy') #(20000, 50, 50)
features = np.load('features.npy') #(20000, 50, 52)
Prop = np.load('Properties.npy') #(20000, 1)
database = np.dstack((adj, features)) #(20000, 50, 102)
#Train/Test split
X_tr, X_te, Y_tr, Y_te = train_test_split(database, Prop, test_size=test_size)
X_tr_adj, X_tr_features = X_tr[:, :, 0:50], X_tr[:, :, 50:]
X_te_adj, X_te_features = X_te[:, :, 0:50], X_te[:, :, 50:]
def create_model():
# first input model
adj_visible1 = Input(shape=(50, 50, 1))
conv11 = Conv2D(16, kernel_size=5, activation='relu')(adj_visible1)
pool11 = MaxPool2D(pool_size=(2, 2))(conv11)
conv12 = Conv2D(8, kernel_size=5, activation='relu')(pool11)
pool12 = MaxPool2D(pool_size=(2, 2))(conv12)
flat1 = Flatten()(pool12)
# second input model
features_visible2 = Input(shape=(50, 52, 1))
conv21 = Conv2D(16, kernel_size=5, activation='relu')(features_visible2)
pool21 = MaxPool2D(pool_size=(2, 2))(conv21)
conv22 = Conv2D(8, kernel_size=5, activation='relu')(pool21)
pool22 = MaxPool2D(pool_size=(2, 2))(conv22)
flat2 = Flatten()(pool22)
# merge input models
merge = concatenate([flat1, flat2])
# interpretation model
hidden1 = Dense(128, activation='relu')(merge)
hidden2 = Dense(32, activation='relu')(hidden1)
output = Dense(1, activation='linear')(hidden2)
model = Model(inputs=[adj_visible1, features_visible2], outputs=output)
model.compile(loss='mean_squared_error', optimizer=Adam())
# summarize layers
print(model.summary())
return model
def train_model(batch_size = 100, nb_epoch = 20):
model = create_model()
csv_logger = CSVLogger('CNN trial.csv')
history = model.fit([X_tr_adj, X_tr_features], Y_tr,
batch_size=batch_size,
epochs=nb_epoch,
verbose=1,
validation_data=([X_te_adj, X_te_features], Y_te),
callbacks=[csv_logger])
predictions_valid = model.predict(X_te_adj, X_te_features, batch_size=batch_size, verbose=1)
return model
train_model(nb_epoch = epoch)
I wrote the code with reference to the following material: https://machinelearningmastery.com/keras-functional-api-deep-learning/
You have to use conv1D and MaxPool1D instead of conv2D and MaxPool2D cause your dataset is a single-channel image instead of 3 channel image. In conv1D layer expect the input to be in format Batch X Height X Width, while in conv2D it expects the input to dimension = 4 i.e Batch X Height X Width X Channels.
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers.core import Dense, Dropout
from keras.optimizers import RMSprop, Adam, Adadelta
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input, Flatten, MaxPool1D
from keras.layers.convolutional import Conv1D
from keras.layers.merge import concatenate
from keras.callbacks import CSVLogger
import numpy as np
epoch = 100
batch_size = 100
test_size = 10000
adj = np.random.randint(0,high=100, size=(20000, 50, 50)) #(20000, 50, 50)
features = np.random.randint(0,high=100, size=(20000, 50, 52)) #(20000, 50, 52)
Prop = np.random.randint(0,high=100, size=(20000,)) #(20000, 1)
database = np.dstack((adj, features)) #(20000, 50, 102)
print( " shape of database :", database.shape)
t
X_tr, X_te, Y_tr, Y_te = train_test_split(database, Prop, test_size=test_size)
X_tr_adj, X_tr_features = X_tr[:, :, 0:50], X_tr[:, :, 50:]
X_te_adj, X_te_features = X_te[:, :, 0:50], X_te[:, :, 50:]
def create_model():
# first input model
adj_visible1 = Input(shape=(50, 50))
conv11 = Conv1D(16, kernel_size=5, activation='relu')(adj_visible1)
pool11 = MaxPool1D(pool_size=2)(conv11)
conv12 = Conv1D(8, kernel_size=5, activation='relu')(pool11)
pool12 = MaxPool1D(pool_size=2)(conv12)
flat1 = Flatten()(pool12)
# second input model
features_visible2 = Input(shape=(50, 52))
conv21 = Conv1D(16, kernel_size=5, activation='relu')(features_visible2)
pool21 = MaxPool1D(pool_size=2)(conv21)
conv22 = Conv1D(8, kernel_size=5, activation='relu')(pool21)
pool22 = MaxPool1D(pool_size=2)(conv22)
flat2 = Flatten()(pool22)
# merge input models
merge = concatenate([flat1, flat2])
# interpretation model
hidden1 = Dense(128, activation='relu')(merge)
hidden2 = Dense(32, activation='relu')(hidden1)
output = Dense(1, activation='linear')(hidden2)
model = Model(inputs=[adj_visible1, features_visible2], outputs=output)
model.compile(loss='mean_squared_error', optimizer=Adam())
# summarize layers
print(model.summary())
return model
def train_model(batch_size = 100, nb_epoch = 20):
model = create_model()
csv_logger = CSVLogger('CNN trial.csv')
history = model.fit([X_tr_adj, X_tr_features], Y_tr,
batch_size=batch_size,
epochs=nb_epoch,
verbose=1,
validation_data=([X_te_adj, X_te_features], Y_te),
callbacks=[csv_logger])
return model
train_model(nb_epoch = 10)
I put together a VAE using Dense Neural Networks in Keras. During model.fit I get a dimension mismatch, but not sure what is throwing the code off. Below is what my code looks like
from keras.layers import Lambda, Input, Dense
from keras.models import Model
from keras.datasets import mnist
from keras.losses import mse, binary_crossentropy
from keras.utils import plot_model
from keras import backend as K
import keras
import numpy as np
import matplotlib.pyplot as plt
import argparse
import os
(x_train, y_train), (x_test, y_test) = mnist.load_data()
image_size = x_train.shape[1]
original_dim = image_size * image_size
x_train = np.reshape(x_train, [-1, original_dim])
x_test = np.reshape(x_test, [-1, original_dim])
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
# network parameters
input_shape = (original_dim, )
intermediate_dim = 512
batch_size = 128
latent_dim = 2
epochs = 50
x = Input(batch_shape=(batch_size, original_dim))
h = Dense(intermediate_dim, activation='relu')(x)
z_mean = Dense(latent_dim)(h)
z_log_sigma = Dense(latent_dim)(h)
def sampling(args):
z_mean, z_log_sigma = args
#epsilon = K.random_normal(shape=(batch, dim))
epsilon = K.random_normal(shape=(batch_size, latent_dim))
return z_mean + K.exp(z_log_sigma) * epsilon
# note that "output_shape" isn't necessary with the TensorFlow backend
# so you could write `Lambda(sampling)([z_mean, z_log_sigma])`
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)
print('X Decoded Mean shape: ', x_decoded_mean.shape)
# end-to-end autoencoder
vae = Model(x, x_decoded_mean)
# encoder, from inputs to latent space
encoder = Model(x, z_mean)
# generator, from latent space to reconstructed inputs
decoder_input = Input(shape=(latent_dim,))
_h_decoded = decoder_h(decoder_input)
_x_decoded_mean = decoder_mean(_h_decoded)
generator = Model(decoder_input, _x_decoded_mean)
def vae_loss(x, x_decoded_mean):
xent_loss = keras.metrics.binary_crossentropy(x, x_decoded_mean)
kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
return xent_loss + kl_loss
vae.compile(optimizer='rmsprop', loss=vae_loss)
print('X train shape: ', x_train.shape)
print('X test shape: ', x_test.shape)
vae.fit(x_train, x_train,
shuffle=True,
epochs=epochs,
batch_size=batch_size,
validation_data=(x_test, x_test))
Here is the stack trace that I see when model.fit is called.
File "/home/asattar/workspace/projects/keras-examples/blogautoencoder/VariationalAutoEncoder.py", line 81, in <module>
validation_data=(x_test, x_test))
File "/usr/local/lib/python2.7/dist-packages/Keras-2.2.4-py2.7.egg/keras/engine/training.py", line 1047, in fit
validation_steps=validation_steps)
File "/usr/local/lib/python2.7/dist-packages/Keras-2.2.4-py2.7.egg/keras/engine/training_arrays.py", line 195, in fit_loop
outs = fit_function(ins_batch)
File "/usr/local/lib/python2.7/dist-packages/Keras-2.2.4-py2.7.egg/keras/backend/tensorflow_backend.py", line 2897, in __call__
return self._call(inputs)
File "/usr/local/lib/python2.7/dist-packages/Keras-2.2.4-py2.7.egg/keras/backend/tensorflow_backend.py", line 2855, in _call
fetched = self._callable_fn(*array_vals)
File "/home/asattar/.local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1439, in __call__
run_metadata_ptr)
File "/home/asattar/.local/lib/python2.7/site-packages/tensorflow/python/framework/errors_impl.py", line 528, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [128,784] vs. [96,784]
[[{{node training/RMSprop/gradients/loss/dense_5_loss/logistic_loss/mul_grad/BroadcastGradientArgs}} = BroadcastGradientArgs[T=DT_INT32, _class=["loc:#train...ad/Reshape"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](training/RMSprop/gradients/loss/dense_5_loss/logistic_loss/mul_grad/Shape, training/RMSprop/gradients/loss/dense_5_loss/logistic_loss/mul_grad/Shape_1)]]
Please note the "Incompatible shapes: [128,784] vs. [96,784]" in the stack trace" towards the end of the trace.
According to Keras: What if the size of data is not divisible by batch_size?, one should better use model.fit_generator rather than model.fit here.
To use model.fit_generator, one should define one's own generator object.
Following is an example:
from keras.utils import Sequence
import math
class Generator(Sequence):
# Class is a dataset wrapper for better training performance
def __init__(self, x_set, y_set, batch_size=256):
self.x, self.y = x_set, y_set
self.batch_size = batch_size
self.indices = np.arange(self.x.shape[0])
def __len__(self):
return math.floor(self.x.shape[0] / self.batch_size)
def __getitem__(self, idx):
inds = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
batch_x = self.x[inds]
batch_y = self.y[inds]
return batch_x, batch_y
def on_epoch_end(self):
np.random.shuffle(self.indices)
train_datagen = Generator(x_train, x_train, batch_size)
test_datagen = Generator(x_test, x_test, batch_size)
vae.fit_generator(train_datagen,
steps_per_epoch=len(x_train)//batch_size,
validation_data=test_datagen,
validation_steps=len(x_test)//batch_size,
epochs=epochs)
Code adopted from How to shuffle after each epoch using a custom generator?.
Just tried to replicate and found out that when you define
x = Input(batch_shape=(batch_size, original_dim))
you're setting the batch size and it's causing a mismatch when it starts to validate. Change to
x = Input(shape=input_shape)
and you should be all set.