Related
I am clueless as to why I keep getting this error. I used the same CNN model to train MNIST dataset, but I did not face any issue previously. Out of nowhere, I start getting this issue. I haven't installed any libraries during that time frame, my gpu drivers are up to date.
I also did a fresh install of CUDA 10.1 with cuDNN v8.0.4 (for cuda 10.1), using tensorflow version 2.3.0 and Anaconda version 2020.07
This is the model:
model=Sequential()
model.add(Conv2D(64,filter_size1,strides=(1,1),input_shape=(None,None,1), data_format='channels_last'))
model.add(Conv2D(43,filter_size2,input_shape=(None,None,64), data_format='channels_last'))
model.add(Conv2D(29,filter_size2,input_shape=(None,None,43), data_format='channels_last'))
model.add(Conv2D(19,filter_size2,input_shape=(None,None,29), data_format='channels_last'))
model.add(Conv2D(10,filter_size2, input_shape=(None,None,19), data_format='channels_last'))
model.add(GlobalAveragePooling2D())
model.add(Activation(activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.fit(X_train, to_categorical(y_train), epochs=5)
This is the error I've been getting:
UnknownError Traceback (most recent call last)
<ipython-input-23-8765eb732021> in <module>
----> 1 model.fit(X_train, to_categorical(y_train), epochs=5)
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
838 # Lifting succeeded, so variables are initialized and we can run the
839 # stateless function.
--> 840 return self._stateless_fn(*args, **kwds)
841 else:
842 canon_args, canon_kwds = \
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\function.py in __call__(self, *args, **kwargs)
2827 with self._lock:
2828 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 2829 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
2830
2831 #property
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\function.py in _filtered_call(self, args, kwargs, cancellation_manager)
1841 `args` and `kwargs`.
1842
-> 1843 return self._call_flat(
1844 [t for t in nest.flatten((args, kwargs), expand_composites=True)
1845 if isinstance(t, (ops.Tensor,
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1921 and executing_eagerly):
1922 # No tape is watching; skip to running the function.
-> 1923 return self._build_call_outputs(self._inference_function.call(
1924 ctx, args, cancellation_manager=cancellation_manager))
1925 forward_backward = self._select_forward_and_backward_functions(
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\function.py in call(self, ctx, args, cancellation_manager)
543 with _InterpolateFunctionError(self):
544 if cancellation_manager is None:
--> 545 outputs = execute.execute(
546 str(self.signature.name),
547 num_outputs=self._num_outputs,
~\.conda\envs\GPUEnv\lib\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
57 try:
58 ctx.ensure_initialized()
---> 59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
[[node sequential_2/conv2d_9/Conv2D (defined at <ipython-input-23-8765eb732021>:1) ]] [Op:__inference_train_function_4211]
Function call stack:
train_function
Any help would be greatly appreciated!
Edit:
I loaded a model that I previously saved, and it seems to be working fine. But besides that model, no model is being executed.
I searched for quite a while on GitHub pages, and found out this chunk of code:
import tensorflow as tf
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)
As far as I can understand, it is limiting the amount of GPU memory being used. Please do correct me if I'm wrong, might help someone else facing the same issue.
My training data is a list of different length numpy arrays. For example,
x_train[0] = [[ 0.67836523 0.39823654 0.9661108 ... 0.19785157 0.1766675
0.6182588 ]
[-1.664766 -0.360997 0.096446 ... -0.635498 0.300886
-0.045028 ]
[-0.615297 -0.190688 -0.226994 ... 1.648792 -1.691676
-0.411259 ]
...
[-1.380328 -0.231574 -0.078576 ... 1.54852 -1.323094
1.493816 ]
[-2.35968 -4.016114 1.077576 ... -1.23973 -0.65608
1.095033 ]
[ 0.551824 0.115759 -0.163607 ... -0.285045 0.472944
-0.664072 ]]
Here is examples of x_train y_train dimension:
x_train[0].shape = (1136, 512) x_train[1].shape = (650, 512)...etc
y_train[0].shape = (1136, 19) y_train[1] = (650, 19)...etc
Here is my model:
model = Sequential()
model.add(GRU(128, return_sequences=True, input_shape=(None, 512)))
model.add(GRU(128, return_sequences=True))
model.add(TimeDistributed(Dense(19, activation='softmax')))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.summary()
plot_model(
model,
to_file='model.png',
show_shapes=True,
show_layer_names=False,
rankdir='LR')
And a data generator for creating batch of similar sequence length:
def train_generator(x, y):
while True:
index = np.random.randint(len(x))
Xb = np.expand_dims(x[index], axis=0)
yb = np.expand_dims(y[index], axis=0)
for i in range(len(x)):
if (0 <= len(x[i]) - len(x[index]) <= 200) & (i != index):
x_tmp = np.expand_dims(x[i][:len(x[index])], axis=0)
y_tmp = np.expand_dims(y[i][:len(y[index])], axis=0)
Xb = np.append(x_tmp, Xb, 0)
yb = np.append(y_tmp, yb, 0)
yield (Xb, yb)
Then fit:
model.fit_generator(train_generator(x_train, y_train), epochs=10, verbose=1)
Here is the error I'm getting:
Epoch 1/10
---------------------------------------------------------------------------
UnknownError Traceback (most recent call last)
<ipython-input-324-9e3ade034201> in <module>
----> 1 model.fit_generator(train_generator(x_train, y_train), epochs=10, verbose=1)
~\miniconda3\lib\site-packages\tensorflow\python\util\deprecation.py in new_func(*args, **kwargs)
322 'in a future version' if date is None else ('after %s' % date),
323 instructions)
--> 324 return func(*args, **kwargs)
325 return tf_decorator.make_decorator(
326 func, new_func, 'deprecated',
~\miniconda3\lib\site-packages\tensorflow\python\keras\engine\training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1827 use_multiprocessing=use_multiprocessing,
1828 shuffle=shuffle,
-> 1829 initial_epoch=initial_epoch)
1830
1831 #deprecation.deprecated(
~\miniconda3\lib\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
~\miniconda3\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
~\miniconda3\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
~\miniconda3\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
838 # Lifting succeeded, so variables are initialized and we can run the
839 # stateless function.
--> 840 return self._stateless_fn(*args, **kwds)
841 else:
842 canon_args, canon_kwds = \
~\miniconda3\lib\site-packages\tensorflow\python\eager\function.py in __call__(self, *args, **kwargs)
2827 with self._lock:
2828 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 2829 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
2830
2831 #property
~\miniconda3\lib\site-packages\tensorflow\python\eager\function.py in _filtered_call(self, args, kwargs, cancellation_manager)
1846 resource_variable_ops.BaseResourceVariable))],
1847 captured_inputs=self.captured_inputs,
-> 1848 cancellation_manager=cancellation_manager)
1849
1850 def _call_flat(self, args, captured_inputs, cancellation_manager=None):
~\miniconda3\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1922 # No tape is watching; skip to running the function.
1923 return self._build_call_outputs(self._inference_function.call(
-> 1924 ctx, args, cancellation_manager=cancellation_manager))
1925 forward_backward = self._select_forward_and_backward_functions(
1926 args,
~\miniconda3\lib\site-packages\tensorflow\python\eager\function.py in call(self, ctx, args, cancellation_manager)
548 inputs=args,
549 attrs=attrs,
--> 550 ctx=ctx)
551 else:
552 outputs = execute.execute_with_cancellation(
~\miniconda3\lib\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
UnknownError: Fail to find the dnn implementation.
[[{{node CudnnRNN}}]]
[[sequential_18/gru_36/PartitionedCall]] [Op:__inference_train_function_54574]
Function call stack:
train_function -> train_function -> train_function
I've tried googling the error myself, but didn't found any solution.
Any help would be massively appreciated :)
HERE IS THE MODEL I AM USING:
#import tensorflow as tf
def create_model():
return tf.keras.models.Sequential([
#tf.keras.layers.Flatten(input_shape=(2,)),
tf.keras.layers.Conv2D(filters=32,kernel_size=(3,3),strides=(1,1),input_shape=(156,256,3),padding='valid',data_format='channels_last',
activation='relu',kernel_initializer=tf.keras.initializers.he_normal(seed=0),name='Conv1'),
tf.keras.layers.MaxPool2D(pool_size=(2,2),strides=(2,2),padding='valid',data_format='channels_last',name='Pool1'),
tf.keras.layers.Conv2D(filters=64,kernel_size=(3,3),strides=(1,1),padding='valid',data_format='channels_last',
activation='relu',kernel_initializer=tf.keras.initializers.he_normal(seed=3),name='Conv2'),
tf.keras.layers.Conv2D(filters=64,kernel_size=(3,3),strides=(2,2),padding='valid',data_format='channels_last',
activation='relu',kernel_initializer=tf.keras.initializers.he_normal(seed=5),name='Conv3'),
tf.keras.layers.MaxPool2D(pool_size=(2,2),strides=(1,1),padding='valid',data_format='channels_last',name='Pool2'),
tf.keras.layers.Conv2D(filters=128,kernel_size=(3,3),strides=(2,2),padding='valid',data_format='channels_last',
activation='relu',kernel_initializer=tf.keras.initializers.he_normal(seed=9),name='Conv4'),
tf.keras.layers.MaxPool2D(pool_size=(2,2),strides=(2,2),padding='valid',data_format='channels_last',name='Pool3'),
tf.keras.layers.Flatten(data_format='channels_last',name='Flatten'),
tf.keras.layers.Dense(units=30,activation='relu',kernel_initializer=tf.keras.initializers.glorot_normal(seed=32),name='FC1'),
tf.keras.layers.Dense(units=15,activation='relu',kernel_initializer=tf.keras.initializers.glorot_normal(seed=33),name='FC2'),
tf.keras.layers.Dense(units=8,activation='softmax',kernel_initializer=tf.keras.initializers.glorot_normal(seed=3),name='Output'),
])
HERE IS THE ERROR I AM GETTING :
UnknownError Traceback (most recent call last)
<ipython-input-47-264c0fcc37e1> in <module>
1 ##fitting generator
----> 2 model.fit_generator(ImageGenerator,steps_per_epoch=216,epochs=3)
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1295 shuffle=shuffle,
1296 initial_epoch=initial_epoch,
-> 1297 steps_name='steps_per_epoch')
1298
1299 def evaluate_generator(self,
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
263
264 is_deferred = not model._is_compiled
--> 265 batch_outs = batch_function(*batch_data)
266 if not isinstance(batch_outs, list):
267 batch_outs = [batch_outs]
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training.py in train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
971 outputs = training_v2_utils.train_on_batch(
972 self, x, y=y, sample_weight=sample_weight,
--> 973 class_weight=class_weight, reset_metrics=reset_metrics)
974 outputs = (outputs['total_loss'] + outputs['output_losses'] +
975 outputs['metrics'])
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics)
262 y,
263 sample_weights=sample_weights,
--> 264 output_loss_metrics=model._output_loss_metrics)
265
266 if reset_metrics:
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
309 sample_weights=sample_weights,
310 training=True,
--> 311 output_loss_metrics=output_loss_metrics))
312 if not isinstance(outs, list):
313 outs = [outs]
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
250 output_loss_metrics=output_loss_metrics,
251 sample_weights=sample_weights,
--> 252 training=training))
253 if total_loss is None:
254 raise ValueError('The model cannot be run '
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
125 inputs = nest.map_structure(ops.convert_to_tensor, inputs)
126
--> 127 outs = model(inputs, **kwargs)
128 outs = nest.flatten(outs)
129
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs)
889 with base_layer_utils.autocast_context_manager(
890 self._compute_dtype):
--> 891 outputs = self.call(cast_inputs, *args, **kwargs)
892 self._handle_activity_regularization(inputs, outputs)
893 self._set_mask_metadata(inputs, outputs, input_masks)
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\sequential.py in call(self, inputs, training, mask)
254 if not self.built:
255 self._init_graph_network(self.inputs, self.outputs, name=self.name)
--> 256 return super(Sequential, self).call(inputs, training=training, mask=mask)
257
258 outputs = inputs # handle the corner case where self.layers is empty
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\network.py in call(self, inputs, training, mask)
706 return self._run_internal_graph(
707 inputs, training=training, mask=mask,
--> 708 convert_kwargs_to_constants=base_layer_utils.call_context().saving)
709
710 def compute_output_shape(self, input_shape):
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\network.py in _run_internal_graph(self, inputs, training, mask, convert_kwargs_to_constants)
858
859 # Compute outputs.
--> 860 output_tensors = layer(computed_tensors, **kwargs)
861
862 # Update tensor_dict.
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs)
889 with base_layer_utils.autocast_context_manager(
890 self._compute_dtype):
--> 891 outputs = self.call(cast_inputs, *args, **kwargs)
892 self._handle_activity_regularization(inputs, outputs)
893 self._set_mask_metadata(inputs, outputs, input_masks)
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\keras\layers\convolutional.py in call(self, inputs)
195
196 def call(self, inputs):
--> 197 outputs = self._convolution_op(inputs, self.kernel)
198
199 if self.use_bias:
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
1132 call_from_convolution=False)
1133 else:
-> 1134 return self.conv_op(inp, filter)
1135 # copybara:strip_end
1136 # copybara:insert return self.conv_op(inp, filter)
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
637
638 def __call__(self, inp, filter): # pylint: disable=redefined-builtin
--> 639 return self.call(inp, filter)
640
641
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
236 padding=self.padding,
237 data_format=self.data_format,
--> 238 name=self.name)
239
240
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in conv2d(input, filter, strides, padding, use_cudnn_on_gpu, data_format, dilations, name, filters)
2008 data_format=data_format,
2009 dilations=dilations,
-> 2010 name=name)
2011
2012
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\ops\gen_nn_ops.py in conv2d(input, filter, strides, padding, use_cudnn_on_gpu, explicit_paddings, data_format, dilations, name)
1029 input, filter, strides=strides, use_cudnn_on_gpu=use_cudnn_on_gpu,
1030 padding=padding, explicit_paddings=explicit_paddings,
-> 1031 data_format=data_format, dilations=dilations, name=name, ctx=_ctx)
1032 except _core._SymbolicException:
1033 pass # Add nodes to the TensorFlow graph.
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\ops\gen_nn_ops.py in conv2d_eager_fallback(input, filter, strides, padding, use_cudnn_on_gpu, explicit_paddings, data_format, dilations, name, ctx)
1128 explicit_paddings, "data_format", data_format, "dilations", dilations)
1129 _result = _execute.execute(b"Conv2D", 1, inputs=_inputs_flat, attrs=_attrs,
-> 1130 ctx=_ctx, name=name)
1131 _execute.record_gradient(
1132 "Conv2D", _inputs_flat, _attrs, _result, name)
D:\anaconda\envs\tf_gpu\lib\site-packages\tensorflow_core\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 keras_symbolic_tensors = [
D:\anaconda\envs\tf_gpu\lib\site-packages\six.py in raise_from(value, from_value)
UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above. [Op:Conv2D]
I am using tensorflow 2.0 installed with anaconda cuda version 10.2.
Can anyone please help me with this same installation works fine when i am not using cnn.
Is it because of i am using CONV2d or is it because i am using generator ?
I am on a windows 10 machine with 16 gb ram and 4gb nvidia 1650 graphics card.
Got the same error and resolved by below:
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_virtual_device_configuration(gpus[0],
[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)])
(with GTX 1660, 6G memory)
I try to train my network and confront this error in model.fit_generator.
model.fit_generator(
train_gen, validation_data=valid_gen,
epochs=NUM_EPOCHS, steps_per_epoch=len(train_gen),
validation_steps=len(valid_gen)
)
An Error IndexError: list index out of range is reported, I will attach the Traceback at the end since it is too long.
train_gen and valid_gen is created by flow_from_directory. I've inspected the len(train_gen) and len(valid_gen), but I can't see where the problem resides.
If any further information needed, please let me know, thanks a lot!
Traceback attached
IndexError Traceback (most recent call last)
<ipython-input-21-d09c3e50a215> in <module>
1 history = model.fit_generator(train_gen, validation_data=valid_gen,
2 epochs=NUM_EPOCHS, steps_per_epoch=len(train_gen),
----> 3 validation_steps=len(valid_gen))
~/anaconda3/envs/fastai/lib/python3.7/site-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name + '` call to the ' +
90 'Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
~/anaconda3/envs/fastai/lib/python3.7/site-packages/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1416 use_multiprocessing=use_multiprocessing,
1417 shuffle=shuffle,
-> 1418 initial_epoch=initial_epoch)
1419
1420 #interfaces.legacy_generator_methods_support
~/anaconda3/envs/fastai/lib/python3.7/site-packages/keras/engine/training_generator.py in fit_generator(model, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
179 batch_index = 0
180 while steps_done < steps_per_epoch:
--> 181 generator_output = next(output_generator)
182
183 if not hasattr(generator_output, '__len__'):
~/anaconda3/envs/fastai/lib/python3.7/site-packages/keras/utils/data_utils.py in get(self)
707 "`use_multiprocessing=False, workers > 1`."
708 "For more information see issue #1638.")
--> 709 six.reraise(*sys.exc_info())
~/anaconda3/envs/fastai/lib/python3.7/site-packages/six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
~/anaconda3/envs/fastai/lib/python3.7/site-packages/keras/utils/data_utils.py in get(self)
683 try:
684 while self.is_running():
--> 685 inputs = self.queue.get(block=True).get()
686 self.queue.task_done()
687 if inputs is not None:
~/anaconda3/envs/fastai/lib/python3.7/multiprocessing/pool.py in get(self, timeout)
681 return self._value
682 else:
--> 683 raise self._value
684
685 def _set(self, i, obj):
~/anaconda3/envs/fastai/lib/python3.7/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
119 job, i, func, args, kwds = task
120 try:
--> 121 result = (True, func(*args, **kwds))
122 except Exception as e:
123 if wrap_exception and func is not _helper_reraises_exception:
~/anaconda3/envs/fastai/lib/python3.7/site-packages/keras/utils/data_utils.py in next_sample(uid)
624 The next value of generator `uid`.
625 """
--> 626 return six.next(_SHARED_SEQUENCES[uid])
627
628
~/anaconda3/envs/fastai/lib/python3.7/site-packages/keras_preprocessing/image/iterator.py in __next__(self, *args, **kwargs)
102
103 def __next__(self, *args, **kwargs):
--> 104 return self.next(*args, **kwargs)
105
106 def next(self):
~/anaconda3/envs/fastai/lib/python3.7/site-packages/keras_preprocessing/image/iterator.py in next(self)
114 # The transformation of images is not under thread lock
115 # so it can be done in parallel
--> 116 return self._get_batches_of_transformed_samples(index_array)
117
118 def _get_batches_of_transformed_samples(self, index_array):
~/anaconda3/envs/fastai/lib/python3.7/site-packages/keras_preprocessing/image/iterator.py in _get_batches_of_transformed_samples(self, index_array)
225 filepaths = self.filepaths
226 for i, j in enumerate(index_array):
--> 227 img = load_img(filepaths[j],
228 color_mode=self.color_mode,
229 target_size=self.target_size,
IndexError: list index out of range
This problem is not arising when i am using Colab.
When i am running fit statement following problem arises in different
system
1.In colab there is no error but this is too much slow as it is just a
part of my project
2.In Nvidia Titanx 12 Gb GPU i am getting an Resource Exhaust error
3.In a 33Gb GPU workstation i am getting a different error i am attaching
the error statements with this problem. Also only this code is showing problem in this system.
input_sh=Input(shape=(256,256,1))
x=Conv2D(128,(3,3),strides=1,activation='relu',padding='same')(input_sh)
x=Conv2D(64,(3,3),activation='relu',padding='same')(x)
x=Conv2D(64,(3,3),activation='relu',strides=1,padding='same')(x)
mapp=Conv2D(32,(3,3),activation='relu',padding='same')(x)
x=MaxPooling2D((2,2))(mapp)
x=Conv2D(10,(3,3),activation='relu',padding='same',name='sal')(x)
encod=MaxPooling2D((2,2),name='ecod')(x)
x=UpSampling2D((2,2))(encod)
x=Conv2D(10,(3,3),activation='relu',padding='same')(x)
x=UpSampling2D((2,2))(x)
x=Conv2D(32,(3,3),activation='relu',strides=1,padding='same')(x)
x=UpSampling2D((2,2))(x)
x=Conv2D(64,(3,3),activation='relu',strides=1,padding='same')(x)
x=Conv2D(16,(3,3),activation='relu',strides=1,padding='same')(x)
x=Conv2D(16,(3,3),activation='relu',strides=1,padding='same')(x)
x=Conv2D(16,(3,3),activation='relu',strides=1,padding='same')(x)
x=Conv2D(16,(3,3),activation='relu',strides=2,padding='same')(x)
decod=Conv2D(1,(3,3),activation='sigmoid',padding='same')(x)
saliency=Model(input_sh,decod)
saliency.summary()
from skimage.color import rgb2lab, lab2rgb, rgb2gray
folder=r"F:\INTERN\Subset16k"
onlyfiles = [f for f in os.listdir(folder) if
os.path.isfile(os.path.join(folder, f))]
xdataset=np.ndarray(shape=(800,256,256,1),dtype=np.float32)
ydataset=np.ndarray(shape=(800,256,256,2),dtype=np.float32)
vxdataset=np.ndarray(shape=(200,256,256,1),dtype=np.float32)
vydataset=np.ndarray(shape=(200,256,256,2),dtype=np.float32)
xtestset=np.ndarray(shape=(50,256,256,1),dtype=np.float32)
ytestset=np.ndarray(shape=(50,256,256,2),dtype=np.float32)
for i in range(0,1050):
if(i==1):
print('pass')
ft=folder + "/" + onlyfiles[i]
im=load_img(ft)
x=img_to_array(im)
lab_image = rgb2lab(1.0/255*x)
x= (lab_image + [0, 128, 128]) / [100, 255, 255]
if(i<800):
xdataset[i]=x[:,:,0].reshape(256,256,1)
ydataset[i]=x[:,:,1:]
elif(i<1000):
vxdataset[i-800]=x[:,:,0].reshape(256,256,1)
vydataset[i-800]=x[:,:,1:]
else:
xtestset[i-1000]=x[:,:,0].reshape(256,256,1)
ytestset[i-1000]=x[:,:,1:]
saliency.compile(optimizer='adam',loss='mse')
saliency.fit(xdataset,xdataset,epochs=10,verbose=1)
UnknownError Traceback (most recent call
last)
<ipython-input-16-a5e8d074b015> in <module>
----> 1 saliency.fit(tx,tx,epochs=5,verbose=1)
~\Anaconda3\envs\Suvidha\lib\site-packages\keras\engine\training.py in
fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split,
validation_data, shuffle, class_weight, sample_weight, initial_epoch,
steps_per_epoch, validation_steps, **kwargs)
1037 initial_epoch=initial_epoch,
1038
steps_per_epoch=steps_per_epoch,
-> 1039
validation_steps=validation_steps)
1040
1041 def evaluate(self, x=None, y=None,
~\Anaconda3\envs\Suvidha\lib\site-
packages\keras\engine\training_arrays.py in fit_loop(model, f, ins,
out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins,
shuffle, callback_metrics, initial_epoch, steps_per_epoch,
validation_steps)
197 ins_batch[i] = ins_batch[i].toarray()
198
--> 199 outs = f(ins_batch)
200 outs = to_list(outs)
201 for l, o in zip(out_labels, outs):
~\Anaconda3\envs\Suvidha\lib\site-
packages\keras\backend\tensorflow_backend.py in __call__(self, inputs)
2713 return self._legacy_call(inputs)
2714
-> 2715 return self._call(inputs)
2716 else:
2717 if py_any(is_tensor(x) for x in inputs):
~\Anaconda3\envs\Suvidha\lib\site-
packages\keras\backend\tensorflow_backend.py in _call(self, inputs)
2673 fetched = self._callable_fn(*array_vals,
run_metadata=self.run_metadata)
2674 else:
-> 2675 fetched = self._callable_fn(*array_vals)
2676 return fetched[:len(self.outputs)]
2677
~\Anaconda3\envs\Suvidha\lib\site-
packages\tensorflow\python\client\session.py in __call__(self, *args,
**kwargs)
1437 ret = tf_session.TF_SessionRunCallable(
1438 self._session._session, self._handle, args, status,
-> 1439 run_metadata_ptr)
1440 if run_metadata:
1441 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
~\Anaconda3\envs\Suvidha\lib\site-
packages\tensorflow\python\framework\errors_impl.py in __exit__(self,
type_arg, value_arg, traceback_arg)
526 None, None,
527 compat.as_text(c_api.TF_Message(self.status.status)),
--> 528 c_api.TF_GetCode(self.status.status))
529 # Delete the underlying status object from memory otherwise it
stays alive
530 # as there is a reference to status from this from the traceback
due to
UnknownError: Failed to get convolution algorithm. This is probably
because cuDNN failed to initialize, so try looking to see if a warning
log message was printed above.
[[{{node conv2d_1/convolution}}]]
[[{{node loss/mul}}]]