Model evaluation throwing "required broadcastable shapes" error - python-3.x

I am trying to evaluate a model using model.evaluate(X_test, Y_test) where, X_test and Y_test are the test sets obtained from cifar10 datasets. While evaluating the model, I get this error. Not sure, why I'm getting this. Any help will be appreciated.
Node: 'mean_squared_error/SquaredDifference'
required broadcastable shapes
[[{{node mean_squared_error/SquaredDifference}}]] [Op:__inference_test_function_1521711]
The way that I'm obtaining X_test and Y_test is given below:
#import all required libaries
from keras.datasets import cifar10
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, BatchNormalization
from keras.models import Model, Sequential
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import accuracy_score
from keras.models import load_model
from keras.losses import msle
import numpy as np # linear algebra
import matplotlib.pyplot as plt #visualization library
#load training and test dataset from cifar10 dataset
(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
# Unit normalizing
X_train = X_train.astype('float32')/255
X_test = X_test.astype('float32')/255
# Reshaping training and test datasets
X_train = X_train.reshape(len(X_train),X_train.shape[1],X_train.shape[2],3)
print(X_train.shape) #(50000, 32, 32, 3)
print(X_test.shape) #(10000, 32, 32, 3)
print(Y_test.shape) #(10000, 1)
def build_autoencoder(img_shape):
# The encoder network
encoder = Sequential()
encoder.add(Conv2D(32, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=img_shape)) # 32x32x32
encoder.add(BatchNormalization()) # 32x32x32
encoder.add(MaxPooling2D(2, padding='same')) # 16x16x32
encoder.add(Conv2D(16, kernel_size=3, strides=1, padding='same', activation='relu')) # 16x16x16
encoder.add(BatchNormalization()) # 16x16x16
encoder.add(Conv2D(8, kernel_size=3, strides=1, padding='same', activation='relu')) # 16x16x8
encoder.add(BatchNormalization()) # 16x16x8
encoder.add(Conv2D(8, kernel_size=3, strides=1, padding='same', activation='relu')) # 16x16x8
# The decoder network
decoder = Sequential()
decoder.add(Conv2D(32, kernel_size=3, strides=1, padding='same', activation='relu')) # 16x16x32
decoder.add(BatchNormalization()) # 16x16x32
decoder.add(UpSampling2D()) # 32x32x32
decoder.add(Conv2D(16, kernel_size=3, strides=2, padding='same', activation='relu')) # 16x16x16
decoder.add(BatchNormalization()) # 16x16x16
decoder.add(UpSampling2D()) # 32x32x16
decoder.add(Conv2D(16, kernel_size=3, strides=1, padding='same', activation='relu')) # 32x32x16
decoder.add(BatchNormalization()) # 32x32x16
decoder.add(Conv2D(3, kernel_size=1, strides=1, padding='same', activation='sigmoid')) # 32x32x3
return encoder, decoder
IMG_SHAPE = X_train.shape[1:] #(32, 32, 3)
input_img = Input(shape=IMG_SHAPE) #create image input
encoder, decoder = build_autoencoder(IMG_SHAPE)
code = encoder(input_img) #encode image
reconstruction = decoder(code) #decoder image
autoencoder = Model(input_img,reconstruction) #create autoencoder model
autoencoder.compile(optimizer='adam', metrics=['accuracy'], loss='mean_squared_error') # compile model using adam optimizer and measure loss using mean_squared_error
print(autoencoder.summary())
Here's the summary of the modal.
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 32, 32, 3)] 0
sequential (Sequential) (None, 16, 16, 8) 7488
sequential_1 (Sequential) (None, 32, 32, 3) 9587
=================================================================
Total params: 17,075
Trainable params: 16,835
Non-trainable params: 240
_________________________________________________________________
autoencoder.fit(x=X_train, y=X_train,
validation_data=[X_test, X_test], batch_size=32, epochs=200)
#evaluate the model
autoencoder.evaluate(X_test, Y_test)
The error that it throws.
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-29-976ff2f8b7a2> in <module>()
----> 1 autoencoder.evaluate(X_test, Y_test)
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InvalidArgumentError: Graph execution error:
Detected at node 'mean_squared_error/SquaredDifference' defined at (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 846, in launch_instance
app.start()
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 499, in start
self.io_loop.start()
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
self._run_once()
File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
handle._run()
File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 122, in _handle_events
handler_func(fileobj, events)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 577, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 606, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 556, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2828, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-29-976ff2f8b7a2>", line 1, in <module>
autoencoder.evaluate(X_test, Y_test)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1716, in evaluate
tmp_logs = self.test_function(iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1525, in test_function
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1514, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1507, in run_step
outputs = model.test_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1473, in test_step
self.compute_loss(x, y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 919, in compute_loss
y, y_pred, sample_weight, regularization_losses=self.losses)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 141, in __call__
losses = call_fn(y_true, y_pred)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 245, in call
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 1329, in mean_squared_error
return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)
Node: 'mean_squared_error/SquaredDifference'
required broadcastable shapes
[[{{node mean_squared_error/SquaredDifference}}]] [Op:__inference_test_function_1521711]

Related

ValueError when fitting keras model

I have the following code:
from sklearn.datasets import fetch_openml
import numpy as np
import keras
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist["data"], mnist["target"]
y = y.astype(np.uint8)
X_digits = [np.array(X.iloc[i]) for i in range(len(X))]
X = np.array([some_digit.reshape(28, 28) for some_digit in X_digits])
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
model = keras.models.Sequential([
keras.layers.Conv2D(64, 7, activation="relu", padding="same",
input_shape=[28, 28, 1]),
keras.layers.MaxPooling2D(2),
keras.layers.Conv2D(128, 3, activation="relu", padding="same"),
keras.layers.Conv2D(128, 3, activation="relu", padding="same"),
keras.layers.MaxPooling2D(2),
keras.layers.Conv2D(256, 3, activation="relu", padding="same"),
keras.layers.Conv2D(256, 3, activation="relu", padding="same"),
keras.layers.MaxPooling2D(2),
keras.layers.Flatten(),
keras.layers.Dense(128, activation="relu"),
keras.layers.Dropout(0.5),
keras.layers.Dense(64, activation="relu"),
keras.layers.Dropout(0.5),
keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="categorical_crossentropy")
That all seems to work fine. But then on this line:
model.fit(X_train, y_train)
I get this error:
ValueError Traceback (most recent call last)
<ipython-input-19-d768f88d541e> in <module>()
----> 1 model.fit(X_train, y_train)
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in autograph_handler(*args, **kwargs)
1127 except Exception as e: # pylint:disable=broad-except
1128 if hasattr(e, "ag_error_metadata"):
-> 1129 raise e.ag_error_metadata.to_exception(e)
1130 else:
1131 raise
ValueError: in user code:
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 878, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 867, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 860, in run_step **
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 810, in train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 141, in __call__
losses = call_fn(y_true, y_pred)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 245, in call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 1665, in categorical_crossentropy
y_true, y_pred, from_logits=from_logits, axis=axis)
File "/usr/local/lib/python3.7/dist-packages/keras/backend.py", line 4994, in categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
ValueError: Shapes (32, 1) and (32, 10) are incompatible
What is going wrong here?
As #Dr. Snoopy the shape of your labels is not correct.
After you split your data into train and test should make sure, that your labels are poperly encode with the number of classes you want to have (in this case 10).
Simply put this after your split and it should work:
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
y_train.shape
Output should be:
(60000, 10)

Tensorflow 2.0 InvalidArgumentError: assertion failed: [Condition x == y did not hold element-wise:]

i am training a mnist CNN. When i ran my code the problem is coming . I tried other answers but they do not work. I am a new to TensorFlow so can someone explain me this error. Here is my code. i am using Pycharm 2020.2. and Python 3.6 in anaconda. There is no help i could find.
import tensorflow as tf
from tensorflow.keras.models import Sequential
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = tf.keras.utils.normalize(x_train, axis=1)
x_test = tf.keras.utils.normalize(x_train, axis=1)
model = Sequential()
model.add(tf.keras.layers.Dense(256))
model.add(tf.keras.layers.Conv1D(kernel_size=4, strides=1, filters=4, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=3, strides=1, activation="relu", filters=3))
model.add(tf.keras.layers.Dense(128, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=2, filters=2, strides=1, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=1, filters=1, strides=1, activation="relu"))
model.add(tf.keras.layers.Dense(64, activation="relu"))
model.add(tf.keras.layers.MaxPool1D(pool_size=2, strides=1))
model.add(tf.keras.layers.Dense(256, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=4, filters=4, strides=1, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=3, filters=3, strides=1, activation="relu"))
model.add(tf.keras.layers.MaxPool1D(pool_size=2, strides=1))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(128, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=2, filters=2, strides=1, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=1, filters=1, strides=1, activation="relu"))
model.add(tf.keras.layers.Dense(64, activation="relu"))
model.add(tf.keras.layers.Dense(16, activation="softmax"))
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x=x_train, y=y_train, batch_size=64, epochs=5, shuffle=True, validation_split=0.1)
model.summary()
it is giving the error:
Train on 54000 samples, validate on 6000 samples
Epoch 1/5
2020-09-09 15:16:16.953428: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cublas64_10.dll
2020-09-09 15:16:17.146701: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudnn64_7.dll
2020-09-09 15:16:17.741916: W tensorflow/stream_executor/gpu/redzone_allocator.cc:312] Internal: Invoking GPU asm compilation is supported on Cuda non-Windows platforms only
Relying on driver to perform ptx compilation. This message will be only logged once.
2020-09-09 15:16:18.085250: W tensorflow/core/common_runtime/base_collective_executor.cc:217] BaseCollectiveExecutor::StartAbort Invalid argument: assertion failed: [Condition x == y did not hold element-wise:] [x (loss/output_1_loss/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [64 1] [y (loss/output_1_loss/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [64 14]
[[{{node loss/output_1_loss/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert}}]]
64/54000 [..............................] - ETA: 39:34Traceback (most recent call last):
File "F:\anaconda\envs\tensorflow1\lib\site-packages\IPython\core\interactiveshell.py", line 3331, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-d2317d03e1c1>", line 1, in <module>
runfile('F:/Pycharm_projects/my_fun_project/Fake or real news/fake-or-real-news/bitcoin.py', wdir='F:/Pycharm_projects/my_fun_project/Fake or real news/fake-or-real-news')
File "C:\Program Files\JetBrains\PyCharm Community Edition 2019.3.3\plugins\python-ce\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Program Files\JetBrains\PyCharm Community Edition 2019.3.3\plugins\python-ce\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "F:/Pycharm_projects/my_fun_project/Fake or real news/fake-or-real-news/bitcoin.py", line 41, in <module>
model.fit(x=x_train, y=y_train, batch_size=64, epochs=5, shuffle=True, validation_split=0.1)
File "F:\anaconda\envs\tensorflow1\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 819, in fit
use_multiprocessing=use_multiprocessing)
File "F:\anaconda\envs\tensorflow1\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 342, in fit
total_epochs=epochs)
File "F:\anaconda\envs\tensorflow1\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 128, in run_one_epoch
batch_outs = execution_function(iterator)
File "F:\anaconda\envs\tensorflow1\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py", line 98, in execution_function
distributed_function(input_fn))
File "F:\anaconda\envs\tensorflow1\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 568, in __call__
result = self._call(*args, **kwds)
File "F:\anaconda\envs\tensorflow1\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 632, in _call
return self._stateless_fn(*args, **kwds)
File "F:\anaconda\envs\tensorflow1\lib\site-packages\tensorflow_core\python\eager\function.py", line 2363, in __call__
return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
File "F:\anaconda\envs\tensorflow1\lib\site-packages\tensorflow_core\python\eager\function.py", line 1611, in _filtered_call
self.captured_inputs)
File "F:\anaconda\envs\tensorflow1\lib\site-packages\tensorflow_core\python\eager\function.py", line 1692, in _call_flat
ctx, args, cancellation_manager=cancellation_manager))
File "F:\anaconda\envs\tensorflow1\lib\site-packages\tensorflow_core\python\eager\function.py", line 545, in call
ctx=ctx)
File "F:\anaconda\envs\tensorflow1\lib\site-packages\tensorflow_core\python\eager\execute.py", line 67, in quick_execute
six.raise_from(core._status_to_exception(e.code, message), None)
File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.InvalidArgumentError: assertion failed: [Condition x == y did not hold element-wise:] [x (loss/output_1_loss/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [64 1] [y (loss/output_1_loss/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [64 14]
[[node loss/output_1_loss/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert (defined at F:/Pycharm_projects/my_fun_project/Fake or real news/fake-or-real-news/bitcoin.py:41) ]] [Op:__inference_distributed_function_2970]
Function call stack:
distributed_function
The error is because your output_shape and label_shape don't match.
This is the architecture of the model you created:
.
As you can see, your model outputs (batch_size, 14, 16) but the labels you provide have a shape of (batch_size, 16).
In order to fix this try adding the Flatten layer before your final Dense layers.
Code:
model = Sequential()
model.add(tf.keras.layers.Dense(256, input_shape = (28,28)))
model.add(tf.keras.layers.Conv1D(kernel_size=4, strides=1, filters=4, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=3, strides=1, activation="relu", filters=3))
model.add(tf.keras.layers.Dense(128, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=2, filters=2, strides=1, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=1, filters=1, strides=1, activation="relu"))
model.add(tf.keras.layers.Dense(64, activation="relu"))
model.add(tf.keras.layers.MaxPool1D(pool_size=2, strides=1))
model.add(tf.keras.layers.Dense(256, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=4, filters=4, strides=1, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=3, filters=3, strides=1, activation="relu"))
model.add(tf.keras.layers.MaxPool1D(pool_size=2, strides=1))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(128, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=2, filters=2, strides=1, activation="relu"))
model.add(tf.keras.layers.Conv1D(kernel_size=1, filters=1, strides=1, activation="relu"))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64, activation="relu"))
model.add(tf.keras.layers.Dense(16, activation="softmax"))
Now your model architecture looks like this:
Now, your model has matching shapes and will train without any issues.

LSTM prediction model : the loss value doesn't change

I'm trying to implement a simple LSTM prediction model in keras for timeseries. I have 10 timeseries with a lookback_window=28 and number of features is 1. I need to predict the next value (timesteps=28, n_features=1). Here is my model and the way I tried to train it:
model = Sequential()
model.add(LSTM(28, batch_input_shape=(49,28,1), stateful=True, return_sequences=True))
model.add(LSTM(14, stateful=True))
model.add(Dense(1, activation='relu'))
earlyStopping = callbacks.EarlyStopping(monitor='val_loss', patience=100, verbose=1, mode='auto')
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(train_data, train_y,
epochs=1000,
callbacks=[earlyStopping],
batch_size=49,
validation_data=(validation_data, validation_y),
verbose=1,
shuffle=False)
prediction_result = model.predict(test_data, batch_size=49)
I'm not reseting the states after an epoch nor using shuffling because the order in the timeseries is important and there is a connection between them. The problem is the loss value sometimes changes slightly only after the first epoch and then it remains constant and doesn't change at all, most of the time it doesn't change at all . I tried to use a different optimization like RMSprop, changed it's learning rate, removing the earlystope to let it train longer, changing batch_size and even traied without batch, tried the same model stateless, set shuffle=True, added more layers and made it deeper, ... but none of them made any difference! I wonder what am I doing wrong! Any suggestion?!
P.S. My data consists of 10 timeseries and each timeseries has 567 length:
timeseries#1: 451, 318, 404, 199, 225, 158, 357, 298, 339, 155, 135, 239, 306, ....
timeseries#2: 304, 274, 150, 143, 391, 357, 278, 557, 98, 106, 305, 288, 325, ....
...
timeseries#10: 208, 138, 201, 342, 280, 282, 280, 140, 124, 261, 193, .....
My lookback windeow is 28. So I generated the following sequences with 28 timesteps:
[451, 318, 404, 199, 225, 158, 357, 298, 339, 155, 135, 239, 306, .... ]
[318, 404, 199, 225, 158, 357, 298, 339, 155, 135, 239, 306, 56, ....]
[404, 199, 225, 158, 357, 298, 339, 155, 135, 239, 306, 56, 890, ....]
...
[304, 274, 150, 143, 391, 357, 278, 557, 98, 106, 305, 288, 325, ....]
[274, 150, 143, 391, 357, 278, 557, 98, 106, 305, 288, 325, 127, ....]
[150, 143, 391, 357, 278, 557, 98, 106, 305, 288, 325, 127, 798, ....]
...
[208, 138, 201, 342, 280, 282, 280, 140, 124, 261, 193, .....]
[138, 201, 342, 280, 282, 280, 140, 124, 261, 193, 854, .....]
Then, I'm splitting my data as follow (data.shape=(5390,28,1) is 5390 for 10 timeseies):
num_training_ts = int(data.shape[0] / 539 * (1 - config['validation_split_ratio']))
train_size = num_training_ts * 539
train_data = data[:train_size, :, :]
train_y = y[:train_size]
validation_data = data[train_size:-1*539, :, :]
validation_y = y[train_size:-1*539]
test_data = data[-1*539:, :, :] # The last timeseries
test_y = y[-1*539:]
I scaled the data between -1 and 1 using minMaxScale, but here for simplicity I'm using the actual values. At the end I have the following:
train_data.shape=(3234,28,1)
train_y.shape=(3234,)
test_data.shape=(539,28,1)
test_y.shape=(539,)
validation_data.shape=(1617,28,1)
validation_y.shape=(1617,)
When I find this kind of issues first I focus on data: My data are scaled? Do I have enough data for this model?
Then I pass to the model. In your case it seems that all the learn is done in the first iteration. So why don't you try to change the learning rate and the decay of your optimizer?
With keras it's so easy. First define your optimizer (in your code I see you used 'Adam'):
my_adam_optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
then use it in the complie function:
model.compile(loss='mean_squared_error', optimizer=my_adam_compiler)
UPDATE:
The last relu layer 'cuts' the negative values, so if your target contains negatives it's not able to predict them. Somewhere in the topic you said you used the minmaxScaler between -1 and 1, and for sure it gives you problem. By removing the activation parameter you use the defalut, which I think is 'linear'.
Removing the relu activation from the last layer can fix the problem!

Memory error while using keras

I am using keras for CNN but the problem is that there is memory leak. The error is
anushreej#cpusrv-gpu-109:~/12EC35005/MTP_Workspace/MTP$ python cnn_implement.py
Using Theano backend.
[INFO] compiling model...
Traceback (most recent call last):
File "cnn_implement.py", line 23, in <module>
model = CNNModel.build(width=150, height=150, depth=3)
File "/home/ms/anushreej/12EC35005/MTP_Workspace/MTP/cnn/networks/model_define.py", line 27, in build
model.add(Dense(depth*height*width))
File "/home/ms/anushreej/anaconda3/lib/python3.5/site-packages/keras/models.py", line 146, in add
output_tensor = layer(self.outputs[0])
File "/home/ms/anushreej/anaconda3/lib/python3.5/site-packages/keras/engine/topology.py", line 458, in __call__
self.build(input_shapes[0])
File "/home/ms/anushreej/anaconda3/lib/python3.5/site-packages/keras/layers/core.py", line 604, in build
name='{}_W'.format(self.name))
File "/home/ms/anushreej/anaconda3/lib/python3.5/site-packages/keras/initializations.py", line 61, in glorot_uniform
return uniform(shape, s, name=name)
File "/home/ms/anushreej/anaconda3/lib/python3.5/site-packages/keras/initializations.py", line 32, in uniform
return K.variable(np.random.uniform(low=-scale, high=scale, size=shape),
File "mtrand.pyx", line 1255, in mtrand.RandomState.uniform (numpy/random/mtrand/mtrand.c:13575)
File "mtrand.pyx", line 220, in mtrand.cont2_array_sc (numpy/random/mtrand/mtrand.c:2902)
MemoryError
Now I am unable to understand why is this happening. My training images are very small of the size 150*150*3.
The code is -:
# import the necessary packages
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dense
class CNNModel:
#staticmethod
def build(width, height, depth):
# initialize the model
model = Sequential()
# first set of CONV => RELU
model.add(Convolution2D(50, 5, 5, border_mode="same", batch_input_shape=(None, depth, height, width)))
model.add(Activation("relu"))
# second set of CONV => RELU
# model.add(Convolution2D(50, 5, 5, border_mode="same"))
# model.add(Activation("relu"))
# third set of CONV => RELU
# model.add(Convolution2D(50, 5, 5, border_mode="same"))
# model.add(Activation("relu"))
model.add(Flatten())
model.add(Dense(depth*height*width))
# if weightsPath is not None:
# model.load_weights(weightsPath)
return model
I faced the same problem, I think the issue is the number data points just before the Flattening layer are more than your system can handle(i tried in difference systems so one with high ram worked and with less ram gave this error). Just add more CNN layers to reduce the size and then add a flattening layer it works.
This gave me and error:
model = Sequential()
model.add(Convolution2D(32, 3, 3,border_mode='same',input_shape=(1, 96, 96),activation='relu'))
model.add(Convolution2D(64, 3, 3,border_mode='same',activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(Flatten())
model.add(Dense(1000,activation='relu'))
model.add(Dense(97,activation='softmax'))
This didnt give an error
model = Sequential()
model.add(Convolution2D(32, 3, 3,border_mode='same',input_shape=(1, 96, 96),activation='relu'))
model.add(Convolution2D(64, 3, 3,border_mode='same',activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(Convolution2D(64, 3, 3,border_mode='same',activation='relu'))
model.add(Convolution2D(128, 3, 3,border_mode='same',activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(Flatten())
model.add(Dense(1000,activation='relu'))
model.add(Dense(97,activation='softmax')
Hope it helps.

AssertionError when I use deep learning library Keras

who can give me some notice when casue this error ? I am use VGG16-net to do face recongnition.
ERROR (theano.gof.opt): Optimization failure due to:
LocalOptGroup(local_abstractconv_gemm,local_abstractconv_gradinputs_gemm,local_abstractconv_gradweight_gemm,local_conv2d_cpu,local_conv2d_gradinputs_cpu,local_conv2d_gradweight_cpu)
ERROR (theano.gof.opt): node: AbstractConv2d{border_mode='valid',
subsample=(1, 1), filter_flip=True, imshp=(None, None, None, None),
kshp=(512, 512, 3, 3)}(IncSubtensor{Set;::, ::, int64:int64:,
int64:int64:}.0, convolution2d_26_W) ERROR (theano.gof.opt):
TRACEBACK: ERROR (theano.gof.opt): Traceback (most recent call last):
File "D:\Anaconda2\lib\site-packages\theano\gof\opt.py", line 1772, in
process_node
replacements = lopt.transform(node) File "D:\Anaconda2\lib\site-packages\theano\gof\opt.py", line 1223, in
transform
repl = opt.transform(node) File "D:\Anaconda2\lib\site-packages\theano\tensor\nnet\opt.py", line 153,
in local_conv2d_cpu
subsample=node.op.subsample) File "D:\Anaconda2\lib\site-packages\theano\tensor\nnet\conv.py", line 132,
in conv2d
assert image_shape[1] == filter_shape[1] AssertionError
image [None, None, None, None] filters [512, 512, 3, 3] Traceback
(most recent call last):
File "", line 1, in
runfile('E:/Deep Learning/vgg.py', wdir='E:/Deep Learning')
File
"D:\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py",
line 699, in runfile
execfile(filename, namespace)
File
"D:\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py",
line 74, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "E:/Deep Learning/vgg.py", line 110, in
model.fit(data,label,batch_size=100,nb_epoch=10,shuffle=True,verbose=1,show_accuracy=True,validation_split=0.2)
File "D:\Anaconda2\lib\site-packages\keras\models.py", line 402, in
fit
sample_weight=sample_weight)
File "D:\Anaconda2\lib\site-packages\keras\engine\training.py", line
999, in fit
self._make_test_function()
File "D:\Anaconda2\lib\site-packages\keras\engine\training.py", line
666, in _make_test_function
**self._function_kwargs)
File
"D:\Anaconda2\lib\site-packages\keras\backend\theano_backend.py", line
503, in function
return Function(inputs, outputs, updates=updates, **kwargs)
File
"D:\Anaconda2\lib\site-packages\keras\backend\theano_backend.py", line
489, in init
**kwargs)
File "D:\Anaconda2\lib\site-packages\theano\compile\function.py",
line 320, in function
output_keys=output_keys)
File "D:\Anaconda2\lib\site-packages\theano\compile\pfunc.py", line
479, in pfunc
output_keys=output_keys)
File
"D:\Anaconda2\lib\site-packages\theano\compile\function_module.py",
line 1776, in orig_function
output_keys=output_keys).create(
File
"D:\Anaconda2\lib\site-packages\theano\compile\function_module.py",
line 1456, in init
optimizer_profile = optimizer(fgraph)
File "D:\Anaconda2\lib\site-packages\theano\gof\opt.py", line 101,
in call
return self.optimize(fgraph)
File "D:\Anaconda2\lib\site-packages\theano\gof\opt.py", line 89, in
optimize
ret = self.apply(fgraph, *args, **kwargs)
File "D:\Anaconda2\lib\site-packages\theano\gof\opt.py", line 230,
in apply
sub_prof = optimizer.optimize(fgraph)
File "D:\Anaconda2\lib\site-packages\theano\gof\opt.py", line 89, in
optimize
ret = self.apply(fgraph, *args, **kwargs)
File "D:\Anaconda2\lib\site-packages\theano\gof\opt.py", line 2196,
in apply
lopt_change = self.process_node(fgraph, node, lopt)
File "D:\Anaconda2\lib\site-packages\theano\gof\opt.py", line 1777,
in process_node
lopt, node)
File "D:\Anaconda2\lib\site-packages\theano\gof\opt.py", line 1673,
in warn_inplace
return NavigatorOptimizer.warn(exc, nav, repl_pairs, local_opt, node)
File "D:\Anaconda2\lib\site-packages\theano\gof\opt.py", line 1659,
in warn
raise exc
AssertionError
this is my code
def VGG_16(weights_path=None):
model = Sequential()
model.add(ZeroPadding2D((1,1),input_shape=(3,64,64)))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, 3, 3, activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
return model
if weights_path:
import h5py
f = h5py.File(weights_path)
for k in range(f.attrs['nb_layers']):
if k >= len(model.layers):
# we don't look at the last (fully-connected) layers in the savefile
break
g = f['layer_{}'.format(k)]
weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
model.layers[k].set_weights(weights)
f.close()
print('Model loaded.')
if __name__ == "__main__":
train_data = np.empty((5800,3,64,64),dtype='float32')
train_label = np.empty((5800,),dtype="uint8")
data,label = load_data(r'E:\test\face_64_64\target\train.csv',train_data,train_label)
# Test pretrained model
label = np_utils.to_categorical(label,58)
model = VGG_16()
model.add(Flatten())
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(58, activation='softmax'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy',class_mode="categorical")
model.fit(data,label,batch_size=100,nb_epoch=10,shuffle=True,verbose=1,show_accuracy=True,validation_split=0.2)
I’ve faced the same issue with keras/theano. Error was solved by upgrade theano to version 0.8.2. Please check that your theano version >= 0.8.2 and try to upgrade theano to version 0.8.2 in the opposite case . For example, with using pip install:
pip install theano==0.8.2

Resources