what does set_memory_growth do in tensorflow2 - keras

I'm training a cnn on ubuntu server with keras and tensorflow 2. If I run my code without the added code below for memory growth, it throws the error I've posted below. I checked my driver memory and I've posted it below. It looks like my graphics card was running out of memory. I've posted my original code, and error below. I got the added memory growth code from the SO post below. The post says it allows the gpu memory to grow, I'm wondering what that means? Also when I check after running the additional code it still shows that I have a gpu device enabled, so does that mean that my code is still being run on gpu after I run the additional code below to allow gpu memory to grow?
post referenced:
AttributeError: module 'tensorflow' has no attribute 'ConfigProto'
added code:
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
code:
nvidia-smi
output:
Tue Mar 8 14:52:55 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.103.01 Driver Version: 470.103.01 CUDA Version: 11.4 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA GeForce ... Off | 00000000:42:00.0 Off | N/A |
| 0% 37C P8 10W / 260W | 7970MiB / 7979MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 2607 C ...a3/envs/tf-gpu/bin/python 7967MiB |
+-----------------------------------------------------------------------------+
original code:
code:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten
model = Sequential()
# CONVOLUTIONAL LAYER
model.add(Conv2D(filters=32, kernel_size=(4,4),input_shape=(28, 28, 1), activation='relu',))
# POOLING LAYER
model.add(MaxPool2D(pool_size=(2, 2)))
# FLATTEN IMAGES FROM 28 by 28 to 764 BEFORE FINAL LAYER
model.add(Flatten())
# 128 NEURONS IN DENSE HIDDEN LAYER (YOU CAN CHANGE THIS NUMBER OF NEURONS)
model.add(Dense(128, activation='relu'))
# LAST LAYER IS THE CLASSIFIER, THUS 10 POSSIBLE CLASSES
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
model.summary()
output:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 25, 25, 32) 544
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 32) 0
_________________________________________________________________
flatten (Flatten) (None, 4608) 0
_________________________________________________________________
dense (Dense) (None, 128) 589952
_________________________________________________________________
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 591,786
Trainable params: 591,786
Non-trainable params: 0
code:
model.fit(x_train,y_cat_train,epochs=10)
error:
---------------------------------------------------------------------------
UnknownError Traceback (most recent call last)
<ipython-input-19-bed1e94810c6> in <module>
----> 1 model.fit(x_train,y_cat_train,epochs=10)
~/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
64 def _method_wrapper(self, *args, **kwargs):
65 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
---> 66 return method(self, *args, **kwargs)
67
68 # Running inside `run_distribute_coordinator` already.
~/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
846 batch_size=batch_size):
847 callbacks.on_train_batch_begin(step)
--> 848 tmp_logs = train_function(iterator)
849 # Catch OutOfRangeError for Datasets of unknown size.
850 # This blocks until the batch has finished executing.
~/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
578 xla_context.Exit()
579 else:
--> 580 result = self._call(*args, **kwds)
581
582 if tracing_count == self._get_tracing_count():
~/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
642 # Lifting succeeded, so variables are initialized and we can run the
643 # stateless function.
--> 644 return self._stateless_fn(*args, **kwds)
645 else:
646 canon_args, canon_kwds = \
~/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
2418 with self._lock:
2419 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 2420 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
2421
2422 #property
~/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _filtered_call(self, args, kwargs)
1659 `args` and `kwargs`.
1660 """
-> 1661 return self._call_flat(
1662 (t for t in nest.flatten((args, kwargs), expand_composites=True)
1663 if isinstance(t, (ops.Tensor,
~/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1743 and executing_eagerly):
1744 # No tape is watching; skip to running the function.
-> 1745 return self._build_call_outputs(self._inference_function.call(
1746 ctx, args, cancellation_manager=cancellation_manager))
1747 forward_backward = self._select_forward_and_backward_functions(
~/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow/python/eager/function.py in call(self, ctx, args, cancellation_manager)
591 with _InterpolateFunctionError(self):
592 if cancellation_manager is None:
--> 593 outputs = execute.execute(
594 str(self.signature.name),
595 num_outputs=self._num_outputs,
~/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
57 try:
58 ctx.ensure_initialized()
---> 59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
[[node sequential/conv2d/Conv2D (defined at <ipython-input-19-bed1e94810c6>:1) ]] [Op:__inference_train_function_753]
Function call stack:
train_function

Related

Unable to find a valid cuDNN algorithm to run convolution

I just got this message when trying to run a feed forward torch.nn.Conv2d, getting the following stacktrace:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-26-04bd4a00565d> in <module>
3
4 # call training function
----> 5 losses = train(D, G, n_epochs=n_epochs)
<ipython-input-24-b539315e0aa0> in train(D, G, n_epochs, print_every)
46 real_images = real_images.cuda()
47
---> 48 D_real = D(real_images)
49 d_real_loss = real_loss(D_real, True) # smoothing label 1 => 0.9
50
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
<ipython-input-14-bf68e57c25ff> in forward(self, x)
48 """
49
---> 50 x = self.leaky_relu(self.conv1(x))
51 x = self.leaky_relu(self.conv2(x))
52 x = self.leaky_relu(self.conv3(x))
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/container.py in forward(self, input)
98 def forward(self, input):
99 for module in self:
--> 100 input = module(input)
101 return input
102
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
347
348 def forward(self, input):
--> 349 return self._conv_forward(input, self.weight)
350
351 class Conv3d(_ConvNd):
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight)
344 _pair(0), self.dilation, self.groups)
345 return F.conv2d(input, weight, self.bias, self.stride,
--> 346 self.padding, self.dilation, self.groups)
347
348 def forward(self, input):
RuntimeError: Unable to find a valid cuDNN algorithm to run convolution
Running nvidia-smi shows:
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.33.01 Driver Version: 440.33.01 CUDA Version: 10.2 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 GeForce GTX 770 On | 00000000:01:00.0 N/A | N/A |
| 38% 50C P8 N/A / N/A | 624MiB / 4034MiB | N/A Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| 0 Not Supported |
+-----------------------------------------------------------------------------+
I'm using Python 3.7, Pytorch 1.5, and GPU is Nvidia GeForce GTX 770, running on Ubuntu 18.04.2. I haven't found that error message anywhere. Does it ring any bell?.
Thanks a lot in advance.
According to this answer for similar issue with tensorflow, it could occur because the VRAM memory limit was hit (which is rather non-intuitive from the error message).
For my case with PyTorch model training, decreasing batch size helped. You could try this or maybe decrease your model size to consume less VRAM.
This error is quite tricky sometimes. For some certain circumstances, out of memory will also report this error info.
I got this error when inference speed testing different EC2 nodes machine. When I digged thru the logs, I found this:
(pid=20839) /home/ubuntu/src/skai-ml/venv/lib/python3.7/site-packages/torch/cuda/__init__.py:87: UserWarning:
(pid=20839) Found GPU0 GRID K520 which is of cuda capability 3.0.
(pid=20839) PyTorch no longer supports this GPU because it is too old.
(pid=20839) The minimum cuda capability that we support is 3.5.
Lesson learned: don't use g2.XX instance types for PyTorch models. g3.XX and p series worked fine.
Check the number of classes you assign in the code.
This error appeared to me when I tried to run the code on Cifar100 instead of Cifar10 but forgot to change the num_classes from 10 to 100.
the problem is you are using torch.nn.Module for the feed-forward but you are returning with the functional module F.conv2d(). change your return code to nn.Conv2d()
this will probably help you more- https://pytorch.org/docs/stable/nn.html?highlight=conv2d#torch.nn.Conv2d
This thing happened to me a couple of times. Maybe it would be a little basic but. Shutting down running kernels helped me a lot. After shutting down other kernels, memory was restored almost completely and the problem was gone.

Tensorflow.keras: AlreadyExistsError

I am building a model using tensorflow. I trained my model and it worked normally. Then, I modified my code and when I try to train my model, I am getting a AlreadyExistError. I restart my Jupyter Notebook but I’m still getting the same error. I need some help please.
here is my piece of code where I build the network and train it. The problem occurs in the last line.
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dropout, Dense, Flatten, LSTM, MaxPooling1D, Bidirectional
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping, TensorBoard
model = Sequential()
model.add(Conv1D(32, kernel_size=3, activation='elu', padding='same',
input_shape=(vector_size, 1)))
model.add(Conv1D(32, kernel_size=3, activation='elu', padding='same'))
model.add(Conv1D(32, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPooling1D(pool_size=3))
model.add(Bidirectional(LSTM(512, dropout=0.2, recurrent_dropout=0.3)))
model.add(Dense(512, activation='sigmoid'))
model.add(Dropout(0.2))
model.add(Dense(512, activation='sigmoid'))
model.add(Dropout(0.25))
model.add(Dense(512, activation='sigmoid'))
model.add(Dropout(0.25))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001, decay=1e-6), metrics=['accuracy'])
tensorboard = TensorBoard(log_dir='logs/', histogram_freq=0, write_graph=True, write_images=True)
model.summary()
model.fit(np.array(x_train), np.array(y_train), batch_size=batch_size, epochs=no_epochs,
validation_data=(np.array(x_test), np.array(y_test)), callbacks=[tensorboard, EarlyStopping(min_delta=0.0001, patience=3)])
Train on 90000 samples, validate on 10000 samples Epoch 1/10
500/90000 [..............................] - ETA: 2:00:49
/anaconda3/lib/python3.7/site-packages/keras/callbacks/callbacks.py:846:
RuntimeWarning: Early stopping conditioned on metric val_loss which
is not available. Available metrics are: (self.monitor,
','.join(list(logs.keys()))), RuntimeWarning
--------------------------------------------------------------------------- AlreadyExistsError Traceback (most recent call
last) in
1 model.fit(np.array(x_train), np.array(y_train), batch_size=batch_size, epochs=no_epochs,
----> 2 validation_data=(np.array(x_test), np.array(y_test)), callbacks=[tensorboard, EarlyStopping(min_delta=0.0001, patience=3)])
3 print('You can continue')
/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py
in fit(self, x, y, batch_size, epochs, verbose, callbacks,
validation_split, validation_data, shuffle, class_weight,
sample_weight, initial_epoch, steps_per_epoch, validation_steps,
validation_freq, max_queue_size, workers, use_multiprocessing,
**kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py
in fit(self, model, x, y, batch_size, epochs, verbose, callbacks,
validation_split, validation_data, shuffle, class_weight,
sample_weight, initial_epoch, steps_per_epoch, validation_steps,
validation_freq, max_queue_size, workers, use_multiprocessing,
**kwargs)
340 mode=ModeKeys.TRAIN,
341 training_context=training_context,
--> 342 total_epochs=epochs)
343 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
344
/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py
in run_one_epoch(model, iterator, execution_function, dataset_size,
batch_size, strategy, steps_per_epoch, num_samples, mode,
training_context, total_epochs)
126 step=step, mode=mode, size=current_batch_size) as batch_logs:
127 try:
--> 128 batch_outs = execution_function(iterator)
129 except (StopIteration, errors.OutOfRangeError):
130 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py
in execution_function(input_fn)
96 # numpy translates Tensors to values in Eager mode.
97 return nest.map_structure(_non_none_constant_value,
---> 98 distributed_function(input_fn))
99
100 return execution_function
/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py
in call(self, *args, **kwds)
566 xla_context.Exit()
567 else:
--> 568 result = self._call(*args, **kwds)
569
570 if tracing_count == self._get_tracing_count():
/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py
in _call(self, *args, **kwds)
630 # Lifting succeeded, so variables are initialized and we can run the
631 # stateless function.
--> 632 return self._stateless_fn(*args, **kwds)
633 else:
634 canon_args, canon_kwds = \
/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py
in call(self, *args, **kwargs) 2361 with self._lock:
2362 graph_function, args, kwargs =
self._maybe_define_function(args, kwargs)
-> 2363 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access 2364 2365 #property
/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py
in _filtered_call(self, args, kwargs) 1609 if
isinstance(t, (ops.Tensor, 1610
resource_variable_ops.BaseResourceVariable))),
-> 1611 self.captured_inputs) 1612 1613 def _call_flat(self, args, captured_inputs, cancellation_manager=None):
/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py
in _call_flat(self, args, captured_inputs, cancellation_manager)
1690 # No tape is watching; skip to running the function.
1691 return
self._build_call_outputs(self._inference_function.call(
-> 1692 ctx, args, cancellation_manager=cancellation_manager)) 1693
forward_backward = self._select_forward_and_backward_functions(
1694 args,
/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py
in call(self, ctx, args, cancellation_manager)
543 inputs=args,
544 attrs=("executor_type", executor_type, "config_proto", config),
--> 545 ctx=ctx)
546 else:
547 outputs = execute.execute_with_cancellation(
/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py
in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 keras_symbolic_tensors = [
/anaconda3/lib/python3.7/site-packages/six.py in raise_from(value,
from_value)
AlreadyExistsError: Resource
__per_step_0/sequential/bidirectional/forward_lstm/while_grad/body/_429/gradients/AddN_13/tmp_var/N10tensorflow19TemporaryVariableOp6TmpVarE
[[{{node
sequential/bidirectional/forward_lstm/while_grad/body/_429/gradients/AddN_13/tmp_var}}]]
[Op:__inference_distributed_function_12060]
Function call stack: distributed_function
You must have a problem on same architectures from previous run being seen while training.
This should reset the keras session :
from tensorflow.keras import backend
backend.clear_session()
In my case I got the problem when using EMR on AWS, and I solved just uninstalling Keras, if you have tensorflow you already have Keras, but there are other packages that depends of the old Keras library. So you need to uninstall the old Keras to avoid this problem.
!pip uninstall keras

Non-Stateful LSTM Issues with Keras

Good day,
I am trying to create a LSTM model (stateful or non-stateful) but running into several issues.
I am attempting to add a layer using:
model = Sequential()
...
model.add(LSTM(c['num_rnn_unit'],
activation=c['rnn_activation'],
dropout=c['dropout_rnn_input'],
recurrent_dropout=c['dropout_rnn_recurrent'],
return_sequences=True,
stateful=False,
#batch_input_shape=(c['batch_size'],c['num_steps'], c['input_dim'])
))
where:
'num_rnn_unit': np.random.choice([16, 32, 64, 128, 256, 512, 1024])
'rnn_activation': np.random.choice(['tanh', 'sigmoid'])
'dropout_rnn_input': 0
'batch_size': np.random.choice([64, 128])
'num_steps':np.random.choice([5, 10, 15])
'input_dim': 64
I experimented with "stateful=True" and used the commented out "batch_input_shape" but this caused additional errors, which others have had as well but found no workable solution.
So I stuck with trying to make "stateful=False" to work but it yields the error (below).
Any thoughts on why this error is coming up? Thanks in advance!
Here is the traceback:
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\engine\sequential.py in add(self, layer)
180 self.inputs = network.get_source_inputs(self.outputs[0])
181 elif self.outputs:
--> 182 output_tensor = layer(self.outputs[0])
183 if isinstance(output_tensor, list):
184 raise TypeError('All layers in a Sequential model '
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
539
540 if initial_state is None and constants is None:
--> 541 return super(RNN, self).__call__(inputs, **kwargs)
542
543 # If any of `initial_state` or `constants` are specified and are Keras
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\backend\tensorflow_backend.py in symbolic_fn_wrapper(*args, **kwargs)
73 if _SYMBOLIC_SCOPE.value:
74 with get_graph().as_default():
---> 75 return func(*args, **kwargs)
76 else:
77 return func(*args, **kwargs)
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\engine\base_layer.py in __call__(self, inputs, **kwargs)
487 # Actually call the layer,
488 # collecting output(s), mask(s), and shape(s).
--> 489 output = self.call(inputs, **kwargs)
490 output_mask = self.compute_mask(inputs, previous_mask)
491
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in call(self, inputs, mask, training, initial_state)
1689 mask=mask,
1690 training=training,
-> 1691 initial_state=initial_state)
1692
1693 #property
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in call(self, inputs, mask, training, initial_state, constants)
635 mask = mask[0]
636
--> 637 if len(initial_state) != len(self.states):
638 raise ValueError('Layer has ' + str(len(self.states)) +
639 ' states but was passed ' +
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in states(self)
436 num_states = 1
437 else:
--> 438 num_states = len(self.cell.state_size)
439 return [None for _ in range(num_states)]
440 return self._states
TypeError: object of type 'numpy.int32' has no len()
Would this be the first layer "input_shape", with batch_normalization=True:
if c['batch_normalization']:
model.add(BatchNormalization(input_shape=(c['num_steps'], c['input_dim'])))
model.add(TimeDistributed(Dropout(c['dropout_input']),
input_shape=(c['num_steps'], c['input_dim'])))

Tensorflow Keras - Error while stacking LSTM layers

I have the following sequence of layers. Adding additional LSTMs in the mix yields the following error which I cannot really understand.
I'm using python 3.7.3 on Linux Ubuntu x64
GCC 7.4.0
tensorflow-gpu='2.0.0'
print(x_train_uni.shape) # (299980, 20, 1)
simple_lstm_model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:]),
tf.keras.layers.LSTM(64),
tf.keras.layers.LSTM(32),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.LSTM(16),
tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='tanh')
])
simple_lstm_model.compile(optimizer='adam', loss='mae')
which yields:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-32-ba40f416ca84> in <module>
6 tf.keras.layers.LSTM(16),
7 tf.keras.layers.LSTM(8),
----> 8 tf.keras.layers.Dense(1, activation='tanh')
9 ])
10
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/sequential.py in __init__(self, layers, name)
112 tf_utils.assert_no_legacy_layers(layers)
113 for layer in layers:
--> 114 self.add(layer)
115
116 #property
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/sequential.py in add(self, layer)
194 # If the model is being built continuously on top of an input layer:
195 # refresh its output.
--> 196 output_tensor = layer(self.outputs[0])
197 if len(nest.flatten(output_tensor)) != 1:
198 raise TypeError('All layers in a Sequential model '
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/layers/recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
621
622 if initial_state is None and constants is None:
--> 623 return super(RNN, self).__call__(inputs, **kwargs)
624
625 # If any of `initial_state` or `constants` are specified and are Keras
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
810 # are casted, not before.
811 input_spec.assert_input_compatibility(self.input_spec, inputs,
--> 812 self.name)
813 graph = backend.get_graph()
814 with graph.as_default(), backend.name_scope(self._name_scope()):
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
175 'expected ndim=' + str(spec.ndim) + ', found ndim=' +
176 str(ndim) + '. Full shape received: ' +
--> 177 str(x.shape.as_list()))
178 if spec.max_ndim is not None:
179 ndim = x.shape.ndims
ValueError: Input 0 of layer lstm_19 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [None, 128]
If however I change the model like so it actually works.
simple_lstm_model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:]),
# tf.keras.layers.LSTM(64),
# tf.keras.layers.LSTM(32),
# tf.keras.layers.Dropout(0.25),
# tf.keras.layers.LSTM(16),
# tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='tanh')
])
simple_lstm_model.compile(optimizer='adam', loss='mae')
What is that I'm missing? Why two or multiple LSTM layers cannot be stacked one on top of another?
LSTM layer requires as an input sequence. However, the default setting in Keras is to return final scalar.
Hence second LSTM in proposed architecture is feed with scalar instead of required sequence.
The solution is to use return_sequences=True flag (see LSTM arguments in docs):
import tensorflow as tf
x_train_uni = tf.zeros((100, 20, 1))
simple_lstm_model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:], return_sequences=True),
tf.keras.layers.LSTM(64, return_sequences=True),
tf.keras.layers.LSTM(32, return_sequences=True),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.LSTM(16, return_sequences=True),
tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='tanh')
])
simple_lstm_model.compile(optimizer='adam', loss='mae')

How can creat a path to my data for my CNN in jupyter notebook

Intro and setup
So I have been for some time now trying to make a simple Convolution Neural Network. I followed a simple tutorial, which can be found Here's a link!
It is a simple cat vs dog test (2 categories)
I have set my jupyter/tensorflow/keras up in
C:\Users\labadmin
What i have understood is that i just have to put the path from labadmin in order to implement my data for testing and training.
Since i am not sure what is causing the error i have pasted the whole code and error, i think it is about the system not getting the data.
The folder with the Data set-up as following:
labadmin has a folder called data withing that there are two folders
training
test
Both cat images and dog images are shuffled in both folders. There are 10000+ pictures in each folder, so there should be enough,
This is my code:
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
classifier = Sequential()
classifier.add(Convolution2D(32, 3, 3, input_shape = (64, 64, 3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2,2)))
classifier.add(Flatten())
classifier.add(Dense(output_dim = 128, activation = 'relu'))
classifier.add(Dense(output_dim = 1, activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
training_set = train_datagen.flow_from_directory(
'data\\training',
target_size=(64, 64),
batch_size=32,
class_mode='categorical',
shuffle=False)
test_set = test_datagen.flow_from_directory(
'data\\test',
target_size=(64, 64),
batch_size=32,
class_mode='categorical',
shuffle=False)
from IPython.display import display
from PIL import Image
classifier.fit_generator(
training_set,
steps_per_epoch=8000,
epochs=10,
validation_data = test_set,
validation_steps = 800)
import numpy as np
from keras_preprocessing import image
test_image = image.load_img('data\\random.jpg', target_size=(64, 64))
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)
result = classifier.predict(test_image)
training_set.class_indices
if result[0][0]>= 0.5:
prediction = 'dog'
else:
prediction = 'cat'
print(prediction)
I get the following error:
C:\Users\labadmin\Miniconda3\envs\tensorflow\lib\site-packages\ipykernel_launcher.py:26: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3), input_shape=(64, 64, 3..., activation="relu")`
C:\Users\labadmin\Miniconda3\envs\tensorflow\lib\site-packages\ipykernel_launcher.py:35: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(activation="relu", units=128)`
C:\Users\labadmin\Miniconda3\envs\tensorflow\lib\site-packages\ipykernel_launcher.py:36: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(activation="sigmoid", units=1)`
Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.
Epoch 1/10
---------------------------------------------------------------------------
ZeroDivisionError Traceback (most recent call last)
<ipython-input-5-393aaba195e9> in <module>
82 epochs=10,
83 validation_data = test_set,
---> 84 validation_steps = 800)
85
86 # Our image we now send through to test
~\Miniconda3\envs\tensorflow\lib\site-packages\keras\legacy\interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name + '` call to the ' +
90 'Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
~\Miniconda3\envs\tensorflow\lib\site-packages\keras\engine\training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1416 use_multiprocessing=use_multiprocessing,
1417 shuffle=shuffle,
-> 1418 initial_epoch=initial_epoch)
1419
1420 #interfaces.legacy_generator_methods_support
~\Miniconda3\envs\tensorflow\lib\site-packages\keras\engine\training_generator.py in fit_generator(model, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
179 batch_index = 0
180 while steps_done < steps_per_epoch:
--> 181 generator_output = next(output_generator)
182
183 if not hasattr(generator_output, '__len__'):
~\Miniconda3\envs\tensorflow\lib\site-packages\keras\utils\data_utils.py in get(self)
707 "`use_multiprocessing=False, workers > 1`."
708 "For more information see issue #1638.")
--> 709 six.reraise(*sys.exc_info())
~\Miniconda3\envs\tensorflow\lib\site-packages\six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
~\Miniconda3\envs\tensorflow\lib\site-packages\keras\utils\data_utils.py in get(self)
683 try:
684 while self.is_running():
--> 685 inputs = self.queue.get(block=True).get()
686 self.queue.task_done()
687 if inputs is not None:
~\Miniconda3\envs\tensorflow\lib\multiprocessing\pool.py in get(self, timeout)
642 return self._value
643 else:
--> 644 raise self._value
645
646 def _set(self, i, obj):
~\Miniconda3\envs\tensorflow\lib\multiprocessing\pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
117 job, i, func, args, kwds = task
118 try:
--> 119 result = (True, func(*args, **kwds))
120 except Exception as e:
121 if wrap_exception and func is not _helper_reraises_exception:
~\Miniconda3\envs\tensorflow\lib\site-packages\keras\utils\data_utils.py in next_sample(uid)
624 The next value of generator `uid`.
625 """
--> 626 return six.next(_SHARED_SEQUENCES[uid])
627
628
~\Miniconda3\envs\tensorflow\lib\site-packages\keras_preprocessing\image\iterator.py in __next__(self, *args, **kwargs)
98
99 def __next__(self, *args, **kwargs):
--> 100 return self.next(*args, **kwargs)
101
102 def next(self):
~\Miniconda3\envs\tensorflow\lib\site-packages\keras_preprocessing\image\iterator.py in next(self)
107 """
108 with self.lock:
--> 109 index_array = next(self.index_generator)
110 # The transformation of images is not under thread lock
111 # so it can be done in parallel
~\Miniconda3\envs\tensorflow\lib\site-packages\keras_preprocessing\image\iterator.py in _flow_index(self)
83 self._set_index_array()
84
---> 85 current_index = (self.batch_index * self.batch_size) % self.n
86 if self.n > current_index + self.batch_size:
87 self.batch_index += 1
ZeroDivisionError: integer division or modulo by zero
Thank you for your time.
Did you populate your data\\training and data\\test directories? From the output:
Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.
Epoch 1/10
it appears that your data augmentation generator did not find any images and the resulting dataset is empty; consequently, when Keras tries to run the fit_generator, you get the division by 0 error as it tries to iterate through your null image set.

Resources