Multiple Keras GridSearchCV with multiprocessing: TypeError: cannot pickle '_thread.RLock' object - keras

Edit: I figured that leaving out the early stopping solves the problem, but that is not an option for me. Is there a way to include early stopping in a grid search together with multiprocessing?
I am trying to run GridSeachCV in a Jupyter Notebook for multiple datasets, one after another, using multiprocessing for each grid search. This works fine for the first grid search, but after that I keep getting errors for all other grid searches. I even get errors when I run the same cell again, that worked initially.
import numpy as np
from sklearn.model_selection import GridSearchCV
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
def create_model(n_weights=1000, hidden_layers=1, learning_rate=0.001):
# formulas derived from nWeights = sum (d(l-1)+1)*d(l) for all layers l with output dim d(l)
if hidden_layers == 1: # 100% of neurons in first hidden layer
neurons = [ceil((n_weights - 1) / 3)]
elif hidden_layers == 2: # 70% / 30% split of neurons
x = 1/7 * (np.sqrt(21 * n_weights + 79) - 10)
neurons = list(map(floor,[7/3 * x, x]))
elif hidden_layers == 3: # 50% / 30% / 20% split
x = 1/21 * (np.sqrt(84 * n_weights + 205) - 17)
neurons = list(map(floor, [5/2 * x, 3/2 * x, x]))
else:
raise Exception('Only 1, 2 or 3 layers allowed')
model = Sequential([Dense(neurons[0], activation='relu', input_dim=1)])
for n in neurons[1:]:
model.add(Dense(n, activation='relu'))
model.add(Dense(1))
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss='mse')
return model
batch_size = [64, 128]
learning_rate = [0.01, 0.001]
hidden_layers = [1, 2, 3]
n_weights = [10, 30, 60]
p_grid = dict(n_weights=n_weights, hidden_layers=hidden_layers, batch_size=batch_size, learning_rate=learning_rate)
earlyStop = keras.callbacks.EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)
epochs = 5000
# first grid search
model1 = KerasRegressor(create_model, epochs=epochs, verbose=0)
grid1 = GridSearchCV(estimator=model, param_grid=p_grid, n_jobs=-1, cv=4, verbose=1)
result1 = grid1.fit(X_train1, y_train1, callbacks=[earlyStop])
# second grid search
model2 = KerasRegressor(create_model, epochs=epochs, verbose=0)
grid2 = GridSearchCV(estimator=model, param_grid=p_grid, n_jobs=-1, cv=4, verbose=1)
result2 = grid2.fit(X_train1, y_train1, callbacks=[earlyStop])
The above code runs two identical grid searches on two different data sets. The first one works fine, but the second fails in this line
grid2 = GridSearchCV(estimator=model, param_grid=p_grid, n_jobs=-1, cv=4, verbose=1)
with the error
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\oli-w\anaconda3\envs\tensorflow\lib\site-packages\joblib\externals\loky\backend\queues.py", line 153, in _feed
obj_ = dumps(obj, reducers=reducers)
File "C:\Users\oli-w\anaconda3\envs\tensorflow\lib\site-packages\joblib\externals\loky\backend\reduction.py", line 271, in dumps
dump(obj, buf, reducers=reducers, protocol=protocol)
File "C:\Users\oli-w\anaconda3\envs\tensorflow\lib\site-packages\joblib\externals\loky\backend\reduction.py", line 264, in dump
_LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
File "C:\Users\oli-w\anaconda3\envs\tensorflow\lib\site-packages\joblib\externals\cloudpickle\cloudpickle_fast.py", line 602, in dump
return Pickler.dump(self, obj)
TypeError: cannot pickle '_thread.RLock' object
"""
The above exception was the direct cause of the following exception:
PicklingError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_9700/1799096090.py in <module>
1 # second grid search
2 model2 = KerasRegressor(create_model, epochs=epochs, verbose=0)
----> 3 grid2 = GridSearchCV(estimator=model, param_grid=p_grid, n_jobs=-1, cv=4, verbose=1)
4 result2 = grid2.fit(X_train1, y_train1, callbacks=[earlyStop])
~\anaconda3\envs\tensorflow\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
889 return results
890
--> 891 self._run_search(evaluate_candidates)
892
893 # multimetric is determined here because in the case of a callable
~\anaconda3\envs\tensorflow\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
1390 def _run_search(self, evaluate_candidates):
1391 """Search all candidates in param_grid"""
-> 1392 evaluate_candidates(ParameterGrid(self.param_grid))
1393
1394
~\anaconda3\envs\tensorflow\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params, cv, more_results)
836 )
837
--> 838 out = parallel(
839 delayed(_fit_and_score)(
840 clone(base_estimator),
~\anaconda3\envs\tensorflow\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1054
1055 with self._backend.retrieval_context():
-> 1056 self.retrieve()
1057 # Make sure that we get a last message telling us we are done
1058 elapsed_time = time.time() - self._start_time
~\anaconda3\envs\tensorflow\lib\site-packages\joblib\parallel.py in retrieve(self)
933 try:
934 if getattr(self._backend, 'supports_timeout', False):
--> 935 self._output.extend(job.get(timeout=self.timeout))
936 else:
937 self._output.extend(job.get())
~\anaconda3\envs\tensorflow\lib\site-packages\joblib\_parallel_backends.py in wrap_future_result(future, timeout)
540 AsyncResults.get from multiprocessing."""
541 try:
--> 542 return future.result(timeout=timeout)
543 except CfTimeoutError as e:
544 raise TimeoutError from e
~\anaconda3\envs\tensorflow\lib\concurrent\futures\_base.py in result(self, timeout)
437 raise CancelledError()
438 elif self._state == FINISHED:
--> 439 return self.__get_result()
440 else:
441 raise TimeoutError()
~\anaconda3\envs\tensorflow\lib\concurrent\futures\_base.py in __get_result(self)
386 def __get_result(self):
387 if self._exception:
--> 388 raise self._exception
389 else:
390 return self._result
PicklingError: Could not pickle the task to send it to the workers.
I would understand if the grid search didn't work at all, but it confuses me that it works once and then starts throwing errors. I was only able to run another grid search after I restarted the kernel. What can I do in order to make multiple grid searches possible in one sitting without restarting the kernel?

Related

PyTorch: "TypeError: Caught TypeError in DataLoader worker process 0."

I am trying to implement RoBERTa model for sentiment analysis. First, I declared GPReviewDataset to create a PyTorch Dataset.
MAX_LEN = 160
class GPReviewDataset(Dataset):
def __init__(self, reviews, targets, tokenizer, max_len):
self.reviews = reviews
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.reviews)
def __getitem__(self, item):
review = str(self.reviews[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
review,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'review_text': review,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
Next, I implement create_data_loader to create a couple of data loaders. Here’s a helper function to do it:
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = GPReviewDataset(
reviews=df.text.to_numpy(),
targets=df.sentiment.to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=4
)
BATCH_SIZE = 16
train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)
dt = next(iter(train_data_loader))
However, when I run this code then it stops and gives me out these errors:
TypeError Traceback (most recent call last)
<ipython-input-35-a673c0794f60> in <module>()
----> 1 dt = next(iter(train_data_loader))
3 frames
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in __next__(self)
433 if self._sampler_iter is None:
434 self._reset()
--> 435 data = self._next_data()
436 self._num_yielded += 1
437 if self._dataset_kind == _DatasetKind.Iterable and \
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
1083 else:
1084 del self._task_info[idx]
-> 1085 return self._process_data(data)
1086
1087 def _try_put_index(self):
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _process_data(self, data)
1109 self._try_put_index()
1110 if isinstance(data, ExceptionWrapper):
-> 1111 data.reraise()
1112 return data
1113
/usr/local/lib/python3.6/dist-packages/torch/_utils.py in reraise(self)
426 # have message field
427 raise self.exc_type(message=msg)
--> 428 raise self.exc_type(msg)
429
430
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/worker.py", line 198, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "<ipython-input-18-1e537ce5a428>", line 25, in __getitem__
'targets': torch.tensor(target, dtype=torch.long)
TypeError: new(): invalid data type 'str'
I do not understand why it happens, can anyone please help me and explain.
You need to define your classes as integer. I assume you are working on a classification problem. But it looks like you have defined your classes as strings. You need to convert your classes from string to integer. For example, if df.sentiment corresponds to positive, you must represent with 0 or df.sentiment corresponds to negative you need to represent with 1 in a new column.
def to_int_sentiment(label):
if label == "positive":
return 0
elif label == "negative":
return 1
df['int_sentiment'] = df.sentiment.apply(to_int_sentiment)
Then you should use column df.int_sentiment instead of df.sentiment. So you have to change create_data_loader function as follows.
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = GPReviewDataset(
reviews=df.text.to_numpy(),
targets=df.int_sentiment.to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=4
)

How to manage multilabel segmentation map for Active contour loss?

I am using Active contour loss (https://github.com/xuuuuuuchen/Active-Contour-Loss/blob/master/Active-Contour-Loss.py) which is as follows:
from keras import backend as K
import numpy as np
def Active_Contour_Loss(y_true, y_pred):
#y_pred = K.cast(y_pred, dtype = 'float64')
x = y_pred[:,:,1:,:] - y_pred[:,:,:-1,:] # horizontal and vertical directions
y = y_pred[:,:,:,1:] - y_pred[:,:,:,:-1]
delta_x = x[:,:,1:,:-2]**2
delta_y = y[:,:,:-2,1:]**2
delta_u = K.abs(delta_x + delta_y)
epsilon = 0.00000001 # where is a parameter to avoid square root is zero in practice.
w = 1
lenth = w * K.sum(K.sqrt(delta_u + epsilon)) # equ.(11) in the paper
"""
region term
"""
C_1 = np.ones((256, 256))
C_2 = np.zeros((256, 256))
region_in = K.abs(K.sum( y_pred[:,0,:,:] * ((y_true[:,0,:,:] - C_1)**2) ) ) # equ.(12) in the paper
region_out = K.abs(K.sum( (1-y_pred[:,0,:,:]) * ((y_true[:,0,:,:] - C_2)**2) )) # equ.(12) in the paper
lambdaP = 1 # lambda parameter could be various.
loss = lenth + lambdaP * (region_in + region_out)
return loss
However, when I use it for the U-net model. I am getting below error while compiling.
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1606 try:
-> 1607 c_op = c_api.TF_FinishOperation(op_desc)
1608 except errors.InvalidArgumentError as e:
InvalidArgumentError: Dimensions must be equal, but are 4 and 256 for 'loss_2/activation_57_loss/mul_1' (op: 'Mul') with input shapes: [?,256,4], [?,256,256].
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
12 frames
<ipython-input-33-b98b233ef3b2> in <module>()
50 # model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
51
---> 52 model.compile(optimizer='adam', loss=Active_Contour_Loss, metrics=['accuracy'])
/usr/local/lib/python3.6/dist-packages/keras/engine/training.py in compile(self, optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, target_tensors, **kwargs)
343 with K.name_scope(self.output_names[i] + '_loss'):
344 output_loss = weighted_loss(y_true, y_pred,
--> 345 sample_weight, mask)
346 if len(self.outputs) > 1:
347 self.metrics_tensors.append(output_loss)
/usr/local/lib/python3.6/dist-packages/keras/engine/training_utils.py in weighted(y_true, y_pred, weights, mask)
426 """
427 # score_array has ndim >= 2
--> 428 score_array = fn(y_true, y_pred)
429 if mask is not None:
430 # Cast the mask to floatX to avoid float64 upcasting in Theano
<ipython-input-32-b273672af934> in Active_Contour_Loss(y_true, y_pred)
28 C_2 = np.zeros((256, 256))
29
---> 30 region_in = K.abs(K.sum( y_pred[:,0,:,:] * ((y_true[:,0,:,:] - C_1)**2) ) ) # equ.(12) in the paper
31 region_out = K.abs(K.sum( (1-y_pred[:,0,:,:]) * ((y_true[:,0,:,:] - C_2)**2) )) # equ.(12) in the paper
32
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/math_ops.py in binary_op_wrapper(x, y)
897 with ops.name_scope(None, op_name, [x, y]) as name:
898 if isinstance(x, ops.Tensor) and isinstance(y, ops.Tensor):
--> 899 return func(x, y, name=name)
900 elif not isinstance(y, sparse_tensor.SparseTensor):
901 try:
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/math_ops.py in _mul_dispatch(x, y, name)
1204 is_tensor_y = isinstance(y, ops.Tensor)
1205 if is_tensor_y:
-> 1206 return gen_math_ops.mul(x, y, name=name)
1207 else:
1208 assert isinstance(y, sparse_tensor.SparseTensor) # Case: Dense * Sparse.
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gen_math_ops.py in mul(x, y, name)
6699 # Add nodes to the TensorFlow graph.
6700 _, _, _op = _op_def_lib._apply_op_helper(
-> 6701 "Mul", x=x, y=y, name=name)
6702 _result = _op.outputs[:]
6703 _inputs_flat = _op.inputs
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
792 op = g.create_op(op_type_name, inputs, dtypes=None, name=scope,
793 input_types=input_types, attrs=attr_protos,
--> 794 op_def=op_def)
795
796 # Conditionally invoke tfdbg v2's op callback(s).
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/util/deprecation.py in new_func(*args, **kwargs)
505 'in a future version' if date is None else ('after %s' % date),
506 instructions)
--> 507 return func(*args, **kwargs)
508
509 doc = _add_deprecated_arg_notice_to_docstring(
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py in create_op(***failed resolving arguments***)
3355 raise TypeError("Input #%d is not a tensor: %s" % (idx, a))
3356 return self._create_op_internal(op_type, inputs, dtypes, input_types, name,
-> 3357 attrs, op_def, compute_device)
3358
3359 def _create_op_internal(
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
3424 input_types=input_types,
3425 original_op=self._default_original_op,
-> 3426 op_def=op_def)
3427 self._create_op_helper(ret, compute_device=compute_device)
3428 return ret
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
1768 op_def, inputs, node_def.attr)
1769 self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1770 control_input_ops)
1771 # pylint: enable=protected-access
1772
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1608 except errors.InvalidArgumentError as e:
1609 # Convert to ValueError for backwards compatibility.
-> 1610 raise ValueError(str(e))
1611
1612 return c_op
ValueError: Dimensions must be equal, but are 4 and 256 for 'loss_2/activation_57_loss/mul_1' (op: 'Mul') with input shapes: [?,256,4], [?,256,256].
I tried to assign C_1 and C_2 with shape (256,256,4). But, the segmentation mask is not generated. Am I missing something?
The source code you shared for Active Contours seems fine. Probably, you are feeding the loss function with wrong parameter size.
Since the code is working with tensors, you should feed the function 4-D. Try something like that:
a0 = np.zeros([1,256,256,1], dtype='float64')
a0[0,...] = np.expand_dims(C_1, -1).astype(np.float64)
a1 = np.zeros([1,256,256,1], dtype='float64')
a1[0,...] = np.expand_dims(C_2, -1).astype(np.float64)
los_ac = Active_Contour_Loss(a0, a1)
print(sess.run(los_ac))
I wrote this code quickly but I suppose it'll work.

How to use SHAP with a linear SVC model from sklearn using Pipeline?

I am doing text classification using a linear SVC model from sklearn. Now I want to visualize which words/tokens have the highest impact on the classification decision by using SHAP (https://github.com/slundberg/shap).
Right now this does not work because I am getting an error that seems to originate from the vectorizer step in the pipeline I have defined - whats wrong here?
Is my general approach on how to use SHAP in this case correct?
x_Train, x_Test, y_Train, y_Test = train_test_split(df_all['PDFText'], df_all['class'], test_size = 0.2, random_state = 1234)
pipeline = Pipeline([
(
'tfidv',
TfidfVectorizer(
ngram_range=(1,3),
analyzer='word',
strip_accents = ascii,
use_idf = True,
sublinear_tf=True,
max_features=6000,
min_df=2,
max_df=1.0
)
),
(
'lin_svc',
svm.SVC(
C=1.0,
probability=True,
kernel='linear'
)
)
])
pipeline.fit(x_Train, y_Train)
shap.initjs()
explainer = shap.KernelExplainer(pipeline.predict_proba, x_Train)
shap_values = explainer.shap_values(x_Test, nsamples=100)
shap.force_plot(explainer.expected_value[0], shap_values[0][0,:], x_Test.iloc[0,:])
This is the error message I get:
Provided model function fails when applied to the provided data set.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-81-4bca63616b3b> in <module>
3
4 # use Kernel SHAP to explain test set predictions
----> 5 explainer = shap.KernelExplainer(pipeline.predict_proba, x_Train)
6 shap_values = explainer.shap_values(x_Test, nsamples=100)
7
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\shap\explainers\kernel.py in __init__(self, model, data, link, **kwargs)
95 self.keep_index_ordered = kwargs.get("keep_index_ordered", False)
96 self.data = convert_to_data(data, keep_index=self.keep_index)
---> 97 model_null = match_model_to_data(self.model, self.data)
98
99 # enforce our current input type limitations
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\shap\common.py in match_model_to_data(model, data)
80 out_val = model.f(data.convert_to_df())
81 else:
---> 82 out_val = model.f(data.data)
83 except:
84 print("Provided model function fails when applied to the provided data set.")
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\metaestimators.py in <lambda>(*args, **kwargs)
116
117 # lambda, but not partial, allows help() to work with update_wrapper
--> 118 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
119 # update the docstring of the returned function
120 update_wrapper(out, self.fn)
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py in predict_proba(self, X)
379 for name, transform in self.steps[:-1]:
380 if transform is not None:
--> 381 Xt = transform.transform(Xt)
382 return self.steps[-1][-1].predict_proba(Xt)
383
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in transform(self, raw_documents, copy)
1631 check_is_fitted(self, '_tfidf', 'The tfidf vector is not fitted')
1632
-> 1633 X = super(TfidfVectorizer, self).transform(raw_documents)
1634 return self._tfidf.transform(X, copy=False)
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in transform(self, raw_documents)
1084
1085 # use the same matrix-building strategy as fit_transform
-> 1086 _, X = self._count_vocab(raw_documents, fixed_vocab=True)
1087 if self.binary:
1088 X.data.fill(1)
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in _count_vocab(self, raw_documents, fixed_vocab)
940 for doc in raw_documents:
941 feature_counter = {}
--> 942 for feature in analyze(doc):
943 try:
944 feature_idx = vocabulary[feature]
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in <lambda>(doc)
326 tokenize)
327 return lambda doc: self._word_ngrams(
--> 328 tokenize(preprocess(self.decode(doc))), stop_words)
329
330 else:
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in <lambda>(x)
254
255 if self.lowercase:
--> 256 return lambda x: strip_accents(x.lower())
257 else:
258 return strip_accents
AttributeError: 'numpy.ndarray' object has no attribute 'lower'
KernelExplainer expects to receive a classification model as the first argument. Please check the use of Pipeline with Shap following the link.
In your case, you can use the Pipeline as follows:
x_Train = pipeline.named_steps['tfidv'].fit_transform(x_Train)
explainer = shap.KernelExplainer(pipeline.named_steps['lin_svc'].predict_proba, x_Train)

How can creat a path to my data for my CNN in jupyter notebook

Intro and setup
So I have been for some time now trying to make a simple Convolution Neural Network. I followed a simple tutorial, which can be found Here's a link!
It is a simple cat vs dog test (2 categories)
I have set my jupyter/tensorflow/keras up in
C:\Users\labadmin
What i have understood is that i just have to put the path from labadmin in order to implement my data for testing and training.
Since i am not sure what is causing the error i have pasted the whole code and error, i think it is about the system not getting the data.
The folder with the Data set-up as following:
labadmin has a folder called data withing that there are two folders
training
test
Both cat images and dog images are shuffled in both folders. There are 10000+ pictures in each folder, so there should be enough,
This is my code:
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
classifier = Sequential()
classifier.add(Convolution2D(32, 3, 3, input_shape = (64, 64, 3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2,2)))
classifier.add(Flatten())
classifier.add(Dense(output_dim = 128, activation = 'relu'))
classifier.add(Dense(output_dim = 1, activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
training_set = train_datagen.flow_from_directory(
'data\\training',
target_size=(64, 64),
batch_size=32,
class_mode='categorical',
shuffle=False)
test_set = test_datagen.flow_from_directory(
'data\\test',
target_size=(64, 64),
batch_size=32,
class_mode='categorical',
shuffle=False)
from IPython.display import display
from PIL import Image
classifier.fit_generator(
training_set,
steps_per_epoch=8000,
epochs=10,
validation_data = test_set,
validation_steps = 800)
import numpy as np
from keras_preprocessing import image
test_image = image.load_img('data\\random.jpg', target_size=(64, 64))
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)
result = classifier.predict(test_image)
training_set.class_indices
if result[0][0]>= 0.5:
prediction = 'dog'
else:
prediction = 'cat'
print(prediction)
I get the following error:
C:\Users\labadmin\Miniconda3\envs\tensorflow\lib\site-packages\ipykernel_launcher.py:26: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3), input_shape=(64, 64, 3..., activation="relu")`
C:\Users\labadmin\Miniconda3\envs\tensorflow\lib\site-packages\ipykernel_launcher.py:35: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(activation="relu", units=128)`
C:\Users\labadmin\Miniconda3\envs\tensorflow\lib\site-packages\ipykernel_launcher.py:36: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(activation="sigmoid", units=1)`
Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.
Epoch 1/10
---------------------------------------------------------------------------
ZeroDivisionError Traceback (most recent call last)
<ipython-input-5-393aaba195e9> in <module>
82 epochs=10,
83 validation_data = test_set,
---> 84 validation_steps = 800)
85
86 # Our image we now send through to test
~\Miniconda3\envs\tensorflow\lib\site-packages\keras\legacy\interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name + '` call to the ' +
90 'Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
~\Miniconda3\envs\tensorflow\lib\site-packages\keras\engine\training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1416 use_multiprocessing=use_multiprocessing,
1417 shuffle=shuffle,
-> 1418 initial_epoch=initial_epoch)
1419
1420 #interfaces.legacy_generator_methods_support
~\Miniconda3\envs\tensorflow\lib\site-packages\keras\engine\training_generator.py in fit_generator(model, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
179 batch_index = 0
180 while steps_done < steps_per_epoch:
--> 181 generator_output = next(output_generator)
182
183 if not hasattr(generator_output, '__len__'):
~\Miniconda3\envs\tensorflow\lib\site-packages\keras\utils\data_utils.py in get(self)
707 "`use_multiprocessing=False, workers > 1`."
708 "For more information see issue #1638.")
--> 709 six.reraise(*sys.exc_info())
~\Miniconda3\envs\tensorflow\lib\site-packages\six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
~\Miniconda3\envs\tensorflow\lib\site-packages\keras\utils\data_utils.py in get(self)
683 try:
684 while self.is_running():
--> 685 inputs = self.queue.get(block=True).get()
686 self.queue.task_done()
687 if inputs is not None:
~\Miniconda3\envs\tensorflow\lib\multiprocessing\pool.py in get(self, timeout)
642 return self._value
643 else:
--> 644 raise self._value
645
646 def _set(self, i, obj):
~\Miniconda3\envs\tensorflow\lib\multiprocessing\pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
117 job, i, func, args, kwds = task
118 try:
--> 119 result = (True, func(*args, **kwds))
120 except Exception as e:
121 if wrap_exception and func is not _helper_reraises_exception:
~\Miniconda3\envs\tensorflow\lib\site-packages\keras\utils\data_utils.py in next_sample(uid)
624 The next value of generator `uid`.
625 """
--> 626 return six.next(_SHARED_SEQUENCES[uid])
627
628
~\Miniconda3\envs\tensorflow\lib\site-packages\keras_preprocessing\image\iterator.py in __next__(self, *args, **kwargs)
98
99 def __next__(self, *args, **kwargs):
--> 100 return self.next(*args, **kwargs)
101
102 def next(self):
~\Miniconda3\envs\tensorflow\lib\site-packages\keras_preprocessing\image\iterator.py in next(self)
107 """
108 with self.lock:
--> 109 index_array = next(self.index_generator)
110 # The transformation of images is not under thread lock
111 # so it can be done in parallel
~\Miniconda3\envs\tensorflow\lib\site-packages\keras_preprocessing\image\iterator.py in _flow_index(self)
83 self._set_index_array()
84
---> 85 current_index = (self.batch_index * self.batch_size) % self.n
86 if self.n > current_index + self.batch_size:
87 self.batch_index += 1
ZeroDivisionError: integer division or modulo by zero
Thank you for your time.
Did you populate your data\\training and data\\test directories? From the output:
Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.
Epoch 1/10
it appears that your data augmentation generator did not find any images and the resulting dataset is empty; consequently, when Keras tries to run the fit_generator, you get the division by 0 error as it tries to iterate through your null image set.

Keras error when finetuning InceptionV3

I am trying to follow the "Fine-tune InceptionV3 on a new set of classes" sample code to freeze the first 172 layers and re-train the last layers on cats/dogs dataset. I keep getting an error which I have noted at the bottom. Please help. I am using Ubuntu 16.04, keras 1.2.1, theano 0.9.0beta1.dev, numpy 1.12.0 and python 3.5.
from PIL import Image
import os
import matplotlib.pyplot as plt
import numpy as np
data_root_dir = "/home/ubuntu/ML/data/dogscats/"
train_dir = os.path.join(data_root_dir,"sample", "train")
valid_dir = os.path.join(data_root_dir, "valid")
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=True)
# add a global spatial average pooling layer
x = base_model.output
#x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)
# this is the model we will train
model = Model(input=base_model.input, output=predictions)
for layer in model.layers[:172]:
layer.trainable = False
for layer in model.layers[172:]:
layer.trainable = True
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')
from sklearn.preprocessing import OneHotEncoder
def get_data(path, target_size=(299,299)):
batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size)
return np.concatenate([batches.next() for i in range(batches.nb_sample)])
def get_batches(dirname, gen=image.ImageDataGenerator(), shuffle=True, batch_size=2, class_mode='categorical',
target_size=(299,299)):
return gen.flow_from_directory(dirname, target_size=target_size,
class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
def onehot(x): return np.array(OneHotEncoder().fit_transform(x.reshape(-1,1)).todense())
# Use batch size of 1 since we're just doing preprocessing on the CPU
val_batches = get_batches(valid_dir, shuffle=False, batch_size=10)
train_batches = get_batches(train_dir, shuffle=False, batch_size=10)
val_classes = val_batches.classes
trn_classes = train_batches.classes
val_labels = onehot(val_classes)
trn_labels = onehot(trn_classes)
model.fit_generator(train_batches, samples_per_epoch=train_batches.n, nb_epoch=10,
validation_data=val_batches, nb_val_samples=val_batches.n)
The exception is: padding must be zero for average_exc_pad
Here is the full stack-trace:
ValueError Traceback (most recent call last)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
883 outputs =\
--> 884 self.fn() if output_subset is None else\
885 self.fn(output_subset=output_subset)
ValueError: padding must be zero for average_exc_pad
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-4-369d7760ec6e> in <module>()
34
35 model.fit_generator(train_batches, samples_per_epoch=train_batches.n, nb_epoch=10,
---> 36 validation_data=val_batches, nb_val_samples=val_batches.n)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/training.py in fit_generator(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, initial_epoch)
1551 outs = self.train_on_batch(x, y,
1552 sample_weight=sample_weight,
-> 1553 class_weight=class_weight)
1554
1555 if not isinstance(outs, list):
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight)
1314 ins = x + y + sample_weights
1315 self._make_train_function()
-> 1316 outputs = self.train_function(ins)
1317 if len(outputs) == 1:
1318 return outputs[0]
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/backend/theano_backend.py in __call__(self, inputs)
957 def __call__(self, inputs):
958 assert isinstance(inputs, (list, tuple))
--> 959 return self.function(*inputs)
960
961
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
896 node=self.fn.nodes[self.fn.position_of_error],
897 thunk=thunk,
--> 898 storage_map=getattr(self.fn, 'storage_map', None))
899 else:
900 # old-style linkers raise their own exceptions
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/gof/link.py in raise_with_op(node, thunk, exc_info, storage_map)
323 # extra long error message in that case.
324 pass
--> 325 reraise(exc_type, exc_value, exc_trace)
326
327
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/six.py in reraise(tp, value, tb)
683 value = tp()
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
687
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
882 try:
883 outputs =\
--> 884 self.fn() if output_subset is None else\
885 self.fn(output_subset=output_subset)
886 except Exception:
ValueError: padding must be zero for average_exc_pad
Apply node that caused the error: AveragePoolGrad{ignore_border=True, mode='average_exc_pad', ndim=2}(Join.0, IncSubtensor{InplaceInc;::, ::, :int64:, :int64:}.0, TensorConstant{(2,) of 3}, TensorConstant{(2,) of 1}, TensorConstant{(2,) of 1})
Toposort index: 5270
Inputs types: [TensorType(float32, 4D), TensorType(float32, 4D), TensorType(int64, vector), TensorType(int64, vector), TensorType(int64, vector)]
Inputs shapes: [(10, 2048, 8, 8), (10, 2048, 8, 8), (2,), (2,), (2,)]
Inputs strides: [(524288, 256, 32, 4), (524288, 256, 32, 4), (8,), (8,), (8,)]
Inputs values: ['not shown', 'not shown', array([3, 3]), array([1, 1]), array([1, 1])]
Outputs clients: [[Elemwise{add,no_inplace}(CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, AveragePoolGrad{ignore_border=True, mode='average_exc_pad', ndim=2}.0)]]
Fine-tuning in that situation possibly means using the convolutional layers as pre-trained feature extractors. So you don't really want the top layers (densely connected layers) of the Inception network.
Changing
base_model = InceptionV3(weights='imagenet', include_top=True)
to
base_model = InceptionV3(weights='imagenet', include_top=False)
should work.
Also, if you have 200 classes you should change
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)
to
predictions = Dense(200, activation='softmax')(x)
So your last layer will have the desired 200 elements.

Resources