How can I implement VGG-net on a dataset of different shape?

How can I implement VGG-net on a dataset of different shape? - keras

I am trying to use a part of the VGG16 model for transfer learning using the Fashion MNIST dataset. The data is processed and the model is specified as per below:
data = keras.datasets.fashion_mnist
(train_img, train_labels), (test_img, test_labels) = data.load_data()
train_img.shape, train_labels.shape, test_img.shape, test_labels.shape
#((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))
# transform to rgb as required by VGG
train_img=tf.image.grayscale_to_rgb(tf.expand_dims(train_img, axis=3))
test_img=tf.image.grayscale_to_rgb(tf.expand_dims(test_img, axis=3))
#resize to minimum size of (32x32
train_img=tf.image.resize_with_pad(train_img,32,32)
test_img=tf.image.resize_with_pad(train_img,32,32)
train_img = train_img / 255.
test_img = test_img / 255.
from keras.applications.vgg16 import preprocess_input
train_img = tf.expand_dims(train_img, axis=0)
test_img = tf.expand_dims(test_img, axis=0)
#preprocessing as required by VGG16
train_img=preprocess_input(train_img)
test_img=preprocess_input(test_img)
#using model without last layers
vgg16=tf.keras.applications.VGG16(include_top=False, weights='imagenet', input_shape=(32,32,3))
layer_dict = dict([(layer.name, layer) for layer in vgg16.layers])
#stop at block3_pool and get output
output = layer_dict['block3_pool'].output
x = keras.layers.Flatten()(output)
...add some fully connected layers here...
x = keras.layers.Dense(10, activation='softmax')(x)
final = keras.models.Model(inputs=vgg16.input, outputs=model)
for layer in final.layers[:7]:
layer.trainable = False
final.fit(train_img, train_labels, epochs=50, validation_split=0.2)
When I try to fit the model I get the following error:
UnboundLocalError Traceback (most recent call last)
<ipython-input-65-6a0b99b56337> in <module>()
1 early_stopping_cb=keras.callbacks.EarlyStopping(patience=3, verbose=1,restore_best_weights=True)
----> 2 vgg16_1.fit(train_img, train_labels, epochs=50, validation_split=0.2, callbacks=[early_stopping_cb])
1 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
857 logs = tmp_logs # No error, now safe to assign to logs.
858 callbacks.on_train_batch_end(step, logs)
--> 859 epoch_logs = copy.copy(logs)
860
861 # Run validation.
UnboundLocalError: local variable 'logs' referenced before assignment
I thought this might be due to the training set shape being faulty, but then if I use train_img[0] instead, which has shape (60000,32,32,3), then I get the following error instead:
ValueError Traceback (most recent call last)
<ipython-input-66-2b893ccd9ac9> in <module>()
1 early_stopping_cb=keras.callbacks.EarlyStopping(patience=3, verbose=1,restore_best_weights=True)
----> 2 vgg16_1.fit(train_img[0], train_labels, epochs=50, validation_split=0.2, callbacks=[early_stopping_cb])
10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
64 def _method_wrapper(self, *args, **kwargs):
65 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
---> 66 return method(self, *args, **kwargs)
67
68 # Running inside `run_distribute_coordinator` already.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
849 batch_size=batch_size):
850 callbacks.on_train_batch_begin(step)
--> 851 tmp_logs = train_function(iterator)
852 # Catch OutOfRangeError for Datasets of unknown size.
853 # This blocks until the batch has finished executing.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
578 xla_context.Exit()
579 else:
--> 580 result = self._call(*args, **kwds)
581
582 if tracing_count == self._get_tracing_count():
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
625 # This is the first call of __call__, so we have to initialize.
626 initializers = []
--> 627 self._initialize(args, kwds, add_initializers_to=initializers)
628 finally:
629 # At this point we know that the initialization is complete (or less
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
504 self._concrete_stateful_fn = (
505 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 506 *args, **kwds))
507
508 def invalid_creator_scope(*unused_args, **unused_kwds):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2444 args, kwargs = None, None
2445 with self._lock:
-> 2446 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2447 return graph_function
2448
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
2775
2776 self._function_cache.missed.add(call_context_key)
-> 2777 graph_function = self._create_graph_function(args, kwargs)
2778 self._function_cache.primary[cache_key] = graph_function
2779 return graph_function, args, kwargs
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2665 arg_names=arg_names,
2666 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2667 capture_by_value=self._capture_by_value),
2668 self._function_attributes,
2669 # Tell the ConcreteFunction to clean up its graph once it goes out of
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
979 _, original_func = tf_decorator.unwrap(python_func)
980
--> 981 func_outputs = python_func(*func_args, **func_kwargs)
982
983 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
439 # __wrapped__ allows AutoGraph to swap in a converted function. We give
440 # the function a weak reference to itself to avoid a reference cycle.
--> 441 return weak_wrapped_fn().__wrapped__(*args, **kwds)
442 weak_wrapped_fn = weakref.ref(wrapped_fn)
443
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
966 except Exception as e: # pylint:disable=broad-except
967 if hasattr(e, "ag_error_metadata"):
--> 968 raise e.ag_error_metadata.to_exception(e)
969 else:
970 raise
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:571 train_function *
outputs = self.distribute_strategy.run(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:951 run **
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:533 train_step **
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/compile_utils.py:204 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:143 __call__
losses = self.call(y_true, y_pred)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:246 call
return self.fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:1527 categorical_crossentropy
return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:4561 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_shape.py:1117 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (32, 1) and (32, 10) are incompatible
Any clues where these errors come from and what I am doing wrong? It feels like I might have missed something obvious, but being a Keras novice I can't get my head around what it is. Help much appreciated.

You need to comment two lines on expanding dims as follows. What happens is that it updates the shape of train_img to (1,60000,32,32,3) and model.fit complains that you are using single image for training.
#train_img = tf.expand_dims(train_img, axis=0)
#test_img = tf.expand_dims(test_img, axis=0)
I updated your code and shared Here. You need to update the architecture to improve it for better accuracy. Follow transfer learning approach mentioned here and update your code for better accuacy. Thanks!

Seems the issue was that I had a dense output layer of size 10, while the labels have size 1. Solution was to use sparse categorical cross-entropy loss function instead of simple categorical.

Related

ValueError: The channel dimension of the inputs should be defined. Found `None`

I am extremely new to Tensorflow hence I won't be sure exactly what will you need to solve my issue. So do let me know if you need any additional information.
Basically I'm trying to run images through Sequential. Based on the tutorial on https://www.tensorflow.org/tutorials/images/classification, I am trying to plug and play onto my own dataset.
I'm currently stuck at the running my model using model.fit() where it gave me the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-90-85c03bda7f8f> in <module>
16
17 epochs=1
---> 18 history = model.fit(
19 train_data,
20 validation_data=test_data,
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1132 _r=1):
1133 callbacks.on_train_batch_begin(step)
-> 1134 tmp_logs = self.train_function(iterator)
1135 if data_handler.should_sync:
1136 context.async_wait()
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
816 tracing_count = self.experimental_get_tracing_count()
817 with trace.Trace(self._name) as tm:
--> 818 result = self._call(*args, **kwds)
819 compiler = "xla" if self._jit_compile else "nonXla"
820 new_tracing_count = self.experimental_get_tracing_count()
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
860 # This is the first call of __call__, so we have to initialize.
861 initializers = []
--> 862 self._initialize(args, kwds, add_initializers_to=initializers)
863 finally:
864 # At this point we know that the initialization is complete (or less
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
701 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
702 self._concrete_stateful_fn = (
--> 703 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
704 *args, **kwds))
705
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
3018 args, kwargs = None, None
3019 with self._lock:
-> 3020 graph_function, _ = self._maybe_define_function(args, kwargs)
3021 return graph_function
3022
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3412
3413 self._function_cache.missed.add(call_context_key)
-> 3414 graph_function = self._create_graph_function(args, kwargs)
3415 self._function_cache.primary[cache_key] = graph_function
3416
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3247 arg_names = base_arg_names + missing_arg_names
3248 graph_function = ConcreteFunction(
-> 3249 func_graph_module.func_graph_from_py_func(
3250 self._name,
3251 self._python_function,
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
996 _, original_func = tf_decorator.unwrap(python_func)
997
--> 998 func_outputs = python_func(*func_args, **func_kwargs)
999
1000 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
610 xla_context.Exit()
611 else:
--> 612 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
613 return out
614
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
983 except Exception as e: # pylint:disable=broad-except
984 if hasattr(e, "ag_error_metadata"):
--> 985 raise e.ag_error_metadata.to_exception(e)
986 else:
987 raise
ValueError: in user code:
/Users/mongchanghsi/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:839 train_function *
return step_function(self, iterator)
/Users/mongchanghsi/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:829 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/Users/mongchanghsi/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1262 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/Users/mongchanghsi/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2734 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/Users/mongchanghsi/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3423 _call_for_each_replica
return fn(*args, **kwargs)
/Users/mongchanghsi/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:822 run_step **
outputs = model.train_step(data)
/Users/mongchanghsi/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 train_step
y_pred = self(x, training=True)
/Users/mongchanghsi/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:1032 __call__
outputs = call_fn(inputs, *args, **kwargs)
/Users/mongchanghsi/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/sequential.py:398 call
outputs = layer(inputs, **kwargs)
/Users/mongchanghsi/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:1028 __call__
self._maybe_build(inputs)
/Users/mongchanghsi/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:2722 _maybe_build
self.build(input_shapes) # pylint:disable=not-callable
/Users/mongchanghsi/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/layers/convolutional.py:188 build
input_channel = self._get_input_channel(input_shape)
/Users/mongchanghsi/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/layers/convolutional.py:367 _get_input_channel
raise ValueError('The channel dimension of the inputs '
ValueError: The channel dimension of the inputs should be defined. Found `None`.
Here is my code for the model:
model = Sequential([
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(4)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
epochs=10
history = model.fit(
train_data,
validation_data=test_data,
epochs=epochs
)
I understand that in the tutorial they used a inbuilt preprocessing function however I tried to build my own preprocessing function to facilitate my learning as well.
def preprocessing(image, target_size):
# Extracting labels
parts = tf.strings.split(image, os.sep)
label = parts[-2]
# Decoding image file
path = tf.io.read_file(image)
image = tf.image.decode_jpeg(path)
# Cropping
image = tf.image.crop_to_bounding_box(image, offset_height=25, offset_width=25, target_height=image_size, target_width=image_size)
# Normalizing
image = image / 255
return image, label
list_ds = tf.data.Dataset.list_files(DATA_DIR + '/*/*')
preprocess_function = partial(preprocessing, target_size=image_size)
processed_data = list_ds.map(preprocess_function)
train_data = processed_data.take(8000).batch(batch_size)
test_data = processed_data.skip(8000).batch(batch_size)
Other information that I can provide is that the images are of grey-scale hence 1 channel and I have normalized it /255 in my preprocessing function and the image_size is 300 and batch_size is 100.

Try this:
image = tf.image.decode_jpeg(path, channels=1)

Implementing Wide and Deep Neural Network using Functional API

I am trying to build a Wide and Deep Neural Network using Keras Functional API. I am getting a value to shape mismatch error. I don't understand where I am wrong. I am implementing this on the Fashion MNIST dataset.The X_train shape is (60000,28,28) and Y_train is (60000,). I am guessing that the error is because of the line : input_ = keras.layers.... but I don't understand how to resolve it.
Code :
# Building a Non Sequnetial Model using Functional API One Use of it is in Wide and Deep Neural Networks
input_ = keras.layers.Input(shape=X_train.shape[1:]) # This will return shape of the input [28,28],remeber we dont have to set it to the number of neurons in the layer
hidden1 = keras.layers.Dense(100,activation = "relu")(input_) # We have to call it as a function
hidden2 = keras.layers.Dense(100,activation = "relu")(hidden1)
concat_layer = keras.layers.concatenate([input_,hidden2])
output = keras.layers.Dense(10,activation="softmax")(concat_layer)
model = keras.models.Model(inputs=[input_], outputs=[output])
model.compile(loss = keras.losses.sparse_categorical_crossentropy,optimizer = keras.optimizers.SGD(lr = 0.8),metrics= ["accuracy"])
Tensorboard_cb = keras.callbacks.TensorBoard(Path_Tensor)
model.fit(X_train,Y_train,validation_split=0.2,epochs=100,callbacks=[Tensorboard_cb])
Error :
Epoch 1/100
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-97-675a2b302d27> in <module>
----> 1 model.fit(X_train,Y_train,validation_split=0.2,epochs=100,callbacks=[Tensorboard_cb])
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
821 # This is the first call of __call__, so we have to initialize.
822 initializers = []
--> 823 self._initialize(args, kwds, add_initializers_to=initializers)
824 finally:
825 # At this point we know that the initialization is complete (or less
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
695 self._concrete_stateful_fn = (
696 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 697 *args, **kwds))
698
699 def invalid_creator_scope(*unused_args, **unused_kwds):
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2853 args, kwargs = None, None
2854 with self._lock:
-> 2855 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2856 return graph_function
2857
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\eager\function.py in _maybe_define_function(self, args, kwargs)
3211
3212 self._function_cache.missed.add(call_context_key)
-> 3213 graph_function = self._create_graph_function(args, kwargs)
3214 self._function_cache.primary[cache_key] = graph_function
3215 return graph_function, args, kwargs
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3073 arg_names=arg_names,
3074 override_flat_arg_shapes=override_flat_arg_shapes,
-> 3075 capture_by_value=self._capture_by_value),
3076 self._function_attributes,
3077 function_spec=self.function_spec,
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
984 _, original_func = tf_decorator.unwrap(python_func)
985
--> 986 func_outputs = python_func(*func_args, **func_kwargs)
987
988 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\eager\def_function.py in wrapped_fn(*args, **kwds)
598 # __wrapped__ allows AutoGraph to swap in a converted function. We give
599 # the function a weak reference to itself to avoid a reference cycle.
--> 600 return weak_wrapped_fn().__wrapped__(*args, **kwds)
601 weak_wrapped_fn = weakref.ref(wrapped_fn)
602
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\framework\func_graph.py in wrapper(*args, **kwargs)
971 except Exception as e: # pylint:disable=broad-except
972 if hasattr(e, "ag_error_metadata"):
--> 973 raise e.ag_error_metadata.to_exception(e)
974 else:
975 raise
ValueError: in user code:
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function *
return step_function(self, iterator)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py:796 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
return fn(*args, **kwargs)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py:789 run_step **
outputs = model.train_step(data)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py:749 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:204 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\losses.py:149 __call__
losses = ag_call(y_true, y_pred)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\losses.py:253 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
return target(*args, **kwargs)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\losses.py:1567 sparse_categorical_crossentropy
y_true, y_pred, from_logits=from_logits, axis=axis)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
return target(*args, **kwargs)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\backend.py:4783 sparse_categorical_crossentropy
labels=target, logits=output)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
return target(*args, **kwargs)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\ops\nn_ops.py:4176 sparse_softmax_cross_entropy_with_logits_v2
labels=labels, logits=logits, name=name)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
return target(*args, **kwargs)
c:\users\na462\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\ops\nn_ops.py:4091 sparse_softmax_cross_entropy_with_logits
logits.get_shape()))
ValueError: Shape mismatch: The shape of labels (received (32, 1)) should equal the shape of logits except for the last dimension (received (32, 28, 10)).

you forgot to add Flatten Layer, you input is 3D (None,28,28) but your output is only 2D (None,10), since you did not Flatten so you are getting (None, 28, 10) as out which is wrong.
also i think you did not use one hot encoding, because i can see (32,1) as your output shape rather then (32, 10). you need to change your last Dense layer neuron to 1, or use one hot encoding.
if you want to use Dense(1) then you have to make sure that your mnist dataset output is numeric, such as 0,1,2,3,4..9. but if you want to use Dense(10) then you have to use onehot encoding, your output will be [1,0,0,0,0,0,0,0,0,0] this is equal to 0 in decimal number system. [0,1,0,0,0,0,0,0,0,0] this is equal to 1 in decimal number system. it will have ten columns, each represent single decimal number, if a number in a column is 1 which mean it is that decimal number.
1st column mean 0, [1,0,0,0,0,0,0,0,0,0]
2nd column mean 1 [0,1,0,0,0,0,0,0,0,0]
3rd column mean 2 [0,0,1,0,0,0,0,0,0,0]
so on
last column mean 9 [0,0,0,0,0,0,0,0,0,1]
as for flatten layer, if you input is image then image has width, height and color, which are 3 dimensions. but as you can see above your output is either a single number or 10 columns of 0s and 1s. in order to make 3 dimension in your 1 dimension output you have to use flatten layer. also there is another hidden dimension which represent your data samples, or batch size. in your case it is 32. so your input is (32, 28, 28). and your output is (32, 10).
another thing is you can also use flatten layer first after you feed input. you can give input in the form of (None, 784) here 784 is (28 multiple 28)

Code crashes with message : Failed to get convolution algorithm

I am clueless as to why I keep getting this error. I used the same CNN model to train MNIST dataset, but I did not face any issue previously. Out of nowhere, I start getting this issue. I haven't installed any libraries during that time frame, my gpu drivers are up to date.
I also did a fresh install of CUDA 10.1 with cuDNN v8.0.4 (for cuda 10.1), using tensorflow version 2.3.0 and Anaconda version 2020.07
This is the model:
model=Sequential()
model.add(Conv2D(64,filter_size1,strides=(1,1),input_shape=(None,None,1), data_format='channels_last'))
model.add(Conv2D(43,filter_size2,input_shape=(None,None,64), data_format='channels_last'))
model.add(Conv2D(29,filter_size2,input_shape=(None,None,43), data_format='channels_last'))
model.add(Conv2D(19,filter_size2,input_shape=(None,None,29), data_format='channels_last'))
model.add(Conv2D(10,filter_size2, input_shape=(None,None,19), data_format='channels_last'))
model.add(GlobalAveragePooling2D())
model.add(Activation(activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.fit(X_train, to_categorical(y_train), epochs=5)
This is the error I've been getting:
UnknownError Traceback (most recent call last)
<ipython-input-23-8765eb732021> in <module>
----> 1 model.fit(X_train, to_categorical(y_train), epochs=5)
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
838 # Lifting succeeded, so variables are initialized and we can run the
839 # stateless function.
--> 840 return self._stateless_fn(*args, **kwds)
841 else:
842 canon_args, canon_kwds = \
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\function.py in __call__(self, *args, **kwargs)
2827 with self._lock:
2828 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 2829 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
2830
2831 #property
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\function.py in _filtered_call(self, args, kwargs, cancellation_manager)
1841 `args` and `kwargs`.
1842
-> 1843 return self._call_flat(
1844 [t for t in nest.flatten((args, kwargs), expand_composites=True)
1845 if isinstance(t, (ops.Tensor,
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1921 and executing_eagerly):
1922 # No tape is watching; skip to running the function.
-> 1923 return self._build_call_outputs(self._inference_function.call(
1924 ctx, args, cancellation_manager=cancellation_manager))
1925 forward_backward = self._select_forward_and_backward_functions(
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\function.py in call(self, ctx, args, cancellation_manager)
543 with _InterpolateFunctionError(self):
544 if cancellation_manager is None:
--> 545 outputs = execute.execute(
546 str(self.signature.name),
547 num_outputs=self._num_outputs,
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
57 try:
58 ctx.ensure_initialized()
---> 59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
[[node sequential_2/conv2d_9/Conv2D (defined at <ipython-input-23-8765eb732021>:1) ]] [Op:__inference_train_function_4211]
Function call stack:
train_function
Any help would be greatly appreciated!
Edit:
I loaded a model that I previously saved, and it seems to be working fine. But besides that model, no model is being executed.

I searched for quite a while on GitHub pages, and found out this chunk of code:
import tensorflow as tf
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)
As far as I can understand, it is limiting the amount of GPU memory being used. Please do correct me if I'm wrong, might help someone else facing the same issue.

Function call stack: train_function -> train_function -> train_function

My training data is a list of different length numpy arrays. For example,
x_train[0] = [[ 0.67836523 0.39823654 0.9661108 ... 0.19785157 0.1766675
0.6182588 ]
[-1.664766 -0.360997 0.096446 ... -0.635498 0.300886
-0.045028 ]
[-0.615297 -0.190688 -0.226994 ... 1.648792 -1.691676
-0.411259 ]
...
[-1.380328 -0.231574 -0.078576 ... 1.54852 -1.323094
1.493816 ]
[-2.35968 -4.016114 1.077576 ... -1.23973 -0.65608
1.095033 ]
[ 0.551824 0.115759 -0.163607 ... -0.285045 0.472944
-0.664072 ]]
Here is examples of x_train y_train dimension:
x_train[0].shape = (1136, 512) x_train[1].shape = (650, 512)...etc
y_train[0].shape = (1136, 19) y_train[1] = (650, 19)...etc
Here is my model:
model = Sequential()
model.add(GRU(128, return_sequences=True, input_shape=(None, 512)))
model.add(GRU(128, return_sequences=True))
model.add(TimeDistributed(Dense(19, activation='softmax')))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.summary()
plot_model(
model,
to_file='model.png',
show_shapes=True,
show_layer_names=False,
rankdir='LR')
And a data generator for creating batch of similar sequence length:
def train_generator(x, y):
while True:
index = np.random.randint(len(x))
Xb = np.expand_dims(x[index], axis=0)
yb = np.expand_dims(y[index], axis=0)
for i in range(len(x)):
if (0 <= len(x[i]) - len(x[index]) <= 200) & (i != index):
x_tmp = np.expand_dims(x[i][:len(x[index])], axis=0)
y_tmp = np.expand_dims(y[i][:len(y[index])], axis=0)
Xb = np.append(x_tmp, Xb, 0)
yb = np.append(y_tmp, yb, 0)
yield (Xb, yb)
Then fit:
model.fit_generator(train_generator(x_train, y_train), epochs=10, verbose=1)
Here is the error I'm getting:
Epoch 1/10
---------------------------------------------------------------------------
UnknownError Traceback (most recent call last)
<ipython-input-324-9e3ade034201> in <module>
----> 1 model.fit_generator(train_generator(x_train, y_train), epochs=10, verbose=1)
~\miniconda3\lib\site-packages\tensorflow\python\util\deprecation.py in new_func(*args, **kwargs)
322 'in a future version' if date is None else ('after %s' % date),
323 instructions)
--> 324 return func(*args, **kwargs)
325 return tf_decorator.make_decorator(
326 func, new_func, 'deprecated',
~\miniconda3\lib\site-packages\tensorflow\python\keras\engine\training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1827 use_multiprocessing=use_multiprocessing,
1828 shuffle=shuffle,
-> 1829 initial_epoch=initial_epoch)
1830
1831 #deprecation.deprecated(
~\miniconda3\lib\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
~\miniconda3\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
~\miniconda3\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
~\miniconda3\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
838 # Lifting succeeded, so variables are initialized and we can run the
839 # stateless function.
--> 840 return self._stateless_fn(*args, **kwds)
841 else:
842 canon_args, canon_kwds = \
~\miniconda3\lib\site-packages\tensorflow\python\eager\function.py in __call__(self, *args, **kwargs)
2827 with self._lock:
2828 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 2829 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
2830
2831 #property
~\miniconda3\lib\site-packages\tensorflow\python\eager\function.py in _filtered_call(self, args, kwargs, cancellation_manager)
1846 resource_variable_ops.BaseResourceVariable))],
1847 captured_inputs=self.captured_inputs,
-> 1848 cancellation_manager=cancellation_manager)
1849
1850 def _call_flat(self, args, captured_inputs, cancellation_manager=None):
~\miniconda3\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1922 # No tape is watching; skip to running the function.
1923 return self._build_call_outputs(self._inference_function.call(
-> 1924 ctx, args, cancellation_manager=cancellation_manager))
1925 forward_backward = self._select_forward_and_backward_functions(
1926 args,
~\miniconda3\lib\site-packages\tensorflow\python\eager\function.py in call(self, ctx, args, cancellation_manager)
548 inputs=args,
549 attrs=attrs,
--> 550 ctx=ctx)
551 else:
552 outputs = execute.execute_with_cancellation(
~\miniconda3\lib\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
UnknownError: Fail to find the dnn implementation.
[[{{node CudnnRNN}}]]
[[sequential_18/gru_36/PartitionedCall]] [Op:__inference_train_function_54574]
Function call stack:
train_function -> train_function -> train_function
I've tried googling the error myself, but didn't found any solution.
Any help would be massively appreciated :)

Could not create cudnn handle: CUDNN_STATUS_ALLOC_FAILED

HERE IS THE MODEL I AM USING:
#import tensorflow as tf
def create_model():
return tf.keras.models.Sequential([
#tf.keras.layers.Flatten(input_shape=(2,)),
tf.keras.layers.Conv2D(filters=32,kernel_size=(3,3),strides=(1,1),input_shape=(156,256,3),padding='valid',data_format='channels_last',
activation='relu',kernel_initializer=tf.keras.initializers.he_normal(seed=0),name='Conv1'),
tf.keras.layers.MaxPool2D(pool_size=(2,2),strides=(2,2),padding='valid',data_format='channels_last',name='Pool1'),
tf.keras.layers.Conv2D(filters=64,kernel_size=(3,3),strides=(1,1),padding='valid',data_format='channels_last',
activation='relu',kernel_initializer=tf.keras.initializers.he_normal(seed=3),name='Conv2'),
tf.keras.layers.Conv2D(filters=64,kernel_size=(3,3),strides=(2,2),padding='valid',data_format='channels_last',
activation='relu',kernel_initializer=tf.keras.initializers.he_normal(seed=5),name='Conv3'),
tf.keras.layers.MaxPool2D(pool_size=(2,2),strides=(1,1),padding='valid',data_format='channels_last',name='Pool2'),
tf.keras.layers.Conv2D(filters=128,kernel_size=(3,3),strides=(2,2),padding='valid',data_format='channels_last',
activation='relu',kernel_initializer=tf.keras.initializers.he_normal(seed=9),name='Conv4'),
tf.keras.layers.MaxPool2D(pool_size=(2,2),strides=(2,2),padding='valid',data_format='channels_last',name='Pool3'),
tf.keras.layers.Flatten(data_format='channels_last',name='Flatten'),
tf.keras.layers.Dense(units=30,activation='relu',kernel_initializer=tf.keras.initializers.glorot_normal(seed=32),name='FC1'),
tf.keras.layers.Dense(units=15,activation='relu',kernel_initializer=tf.keras.initializers.glorot_normal(seed=33),name='FC2'),
tf.keras.layers.Dense(units=8,activation='softmax',kernel_initializer=tf.keras.initializers.glorot_normal(seed=3),name='Output'),
])
HERE IS THE ERROR I AM GETTING :
UnknownError Traceback (most recent call last)
<ipython-input-47-264c0fcc37e1> in <module>
1 ##fitting generator
----> 2 model.fit_generator(ImageGenerator,steps_per_epoch=216,epochs=3)
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1295 shuffle=shuffle,
1296 initial_epoch=initial_epoch,
-> 1297 steps_name='steps_per_epoch')
1298
1299 def evaluate_generator(self,
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
263
264 is_deferred = not model._is_compiled
--> 265 batch_outs = batch_function(*batch_data)
266 if not isinstance(batch_outs, list):
267 batch_outs = [batch_outs]
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training.py in train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
971 outputs = training_v2_utils.train_on_batch(
972 self, x, y=y, sample_weight=sample_weight,
--> 973 class_weight=class_weight, reset_metrics=reset_metrics)
974 outputs = (outputs['total_loss'] + outputs['output_losses'] +
975 outputs['metrics'])
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics)
262 y,
263 sample_weights=sample_weights,
--> 264 output_loss_metrics=model._output_loss_metrics)
265
266 if reset_metrics:
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
309 sample_weights=sample_weights,
310 training=True,
--> 311 output_loss_metrics=output_loss_metrics))
312 if not isinstance(outs, list):
313 outs = [outs]
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
250 output_loss_metrics=output_loss_metrics,
251 sample_weights=sample_weights,
--> 252 training=training))
253 if total_loss is None:
254 raise ValueError('The model cannot be run '
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
125 inputs = nest.map_structure(ops.convert_to_tensor, inputs)
126
--> 127 outs = model(inputs, **kwargs)
128 outs = nest.flatten(outs)
129
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs)
889 with base_layer_utils.autocast_context_manager(
890 self._compute_dtype):
--> 891 outputs = self.call(cast_inputs, *args, **kwargs)
892 self._handle_activity_regularization(inputs, outputs)
893 self._set_mask_metadata(inputs, outputs, input_masks)
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\sequential.py in call(self, inputs, training, mask)
254 if not self.built:
255 self._init_graph_network(self.inputs, self.outputs, name=self.name)
--> 256 return super(Sequential, self).call(inputs, training=training, mask=mask)
257
258 outputs = inputs # handle the corner case where self.layers is empty
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\network.py in call(self, inputs, training, mask)
706 return self._run_internal_graph(
707 inputs, training=training, mask=mask,
--> 708 convert_kwargs_to_constants=base_layer_utils.call_context().saving)
709
710 def compute_output_shape(self, input_shape):
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\network.py in _run_internal_graph(self, inputs, training, mask, convert_kwargs_to_constants)
858
859 # Compute outputs.
--> 860 output_tensors = layer(computed_tensors, **kwargs)
861
862 # Update tensor_dict.
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs)
889 with base_layer_utils.autocast_context_manager(
890 self._compute_dtype):
--> 891 outputs = self.call(cast_inputs, *args, **kwargs)
892 self._handle_activity_regularization(inputs, outputs)
893 self._set_mask_metadata(inputs, outputs, input_masks)
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\layers\convolutional.py in call(self, inputs)
195
196 def call(self, inputs):
--> 197 outputs = self._convolution_op(inputs, self.kernel)
198
199 if self.use_bias:
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
1132 call_from_convolution=False)
1133 else:
-> 1134 return self.conv_op(inp, filter)
1135 # copybara:strip_end
1136 # copybara:insert return self.conv_op(inp, filter)
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
637
638 def __call__(self, inp, filter): # pylint: disable=redefined-builtin
--> 639 return self.call(inp, filter)
640
641
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
236 padding=self.padding,
237 data_format=self.data_format,
--> 238 name=self.name)
239
240
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in conv2d(input, filter, strides, padding, use_cudnn_on_gpu, data_format, dilations, name, filters)
2008 data_format=data_format,
2009 dilations=dilations,
-> 2010 name=name)
2011
2012
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\ops\gen_nn_ops.py in conv2d(input, filter, strides, padding, use_cudnn_on_gpu, explicit_paddings, data_format, dilations, name)
1029 input, filter, strides=strides, use_cudnn_on_gpu=use_cudnn_on_gpu,
1030 padding=padding, explicit_paddings=explicit_paddings,
-> 1031 data_format=data_format, dilations=dilations, name=name, ctx=_ctx)
1032 except _core._SymbolicException:
1033 pass # Add nodes to the TensorFlow graph.
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\ops\gen_nn_ops.py in conv2d_eager_fallback(input, filter, strides, padding, use_cudnn_on_gpu, explicit_paddings, data_format, dilations, name, ctx)
1128 explicit_paddings, "data_format", data_format, "dilations", dilations)
1129 _result = _execute.execute(b"Conv2D", 1, inputs=_inputs_flat, attrs=_attrs,
-> 1130 ctx=_ctx, name=name)
1131 _execute.record_gradient(
1132 "Conv2D", _inputs_flat, _attrs, _result, name)
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 keras_symbolic_tensors = [
D:\anaconda\envs\tf_gpu\lib\site-packages\six.py in raise_from(value, from_value)
UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above. [Op:Conv2D]
I am using tensorflow 2.0 installed with anaconda cuda version 10.2.
Can anyone please help me with this same installation works fine when i am not using cnn.
Is it because of i am using CONV2d or is it because i am using generator ?
I am on a windows 10 machine with 16 gb ram and 4gb nvidia 1650 graphics card.

Got the same error and resolved by below:
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_virtual_device_configuration(gpus[0],
[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)])
(with GTX 1660, 6G memory)

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

How can I implement VGG-net on a dataset of different shape? - keras

Seems the issue was that I had a dense output layer of size 10, while the labels have size 1. Solution was to use sparse categorical cross-entropy loss function instead of simple categorical.

Related

ValueError: The channel dimension of the inputs should be defined. Found `None`

Implementing Wide and Deep Neural Network using Functional API

Code crashes with message : Failed to get convolution algorithm

Function call stack: train_function -> train_function -> train_function

Could not create cudnn handle: CUDNN_STATUS_ALLOC_FAILED

Categories

Resources