Data for a Keras generator is being randomly fetched from a DB at the end of each epoch using on_epoch_end() and once during initialization. During training with multiple workers I've noticed multiple calls to the on_epoch_end() function at the end of each epoch instead of the expected single call.
Using traceback I've noticed the extra calls caused by multiple workers are all made by threading.py:
[custom_generator.py] on_epoch_end called
File "/opt/conda/lib/python3.9/threading.py", line 930, in _bootstrap
self._bootstrap_inner()
File "/opt/conda/lib/python3.9/threading.py", line 973, in _bootstrap_inner
self.run()
File "/opt/conda/lib/python3.9/threading.py", line 910, in run
self._target(*self._args, **self._kwargs)
File "/opt/conda/lib/python3.9/site-packages/keras/utils/data_utils.py", line 761, in _run
self.sequence.on_epoch_end()
File "/home/user/custom_generator.py", line 110, in on_epoch_end
traceback.print_stack()
Generator
class CustomGenerator(tf.keras.utils.Sequence):
def __init__(self, sampler, processor, batch_size, steps_per_epoch):
# Input size: (512,512,3) RGB Images
self.samples = sampler.fetch(batch_size*steps_per_epoch)
self.batch_size = batch_size
self.steps_per_epoch = steps_per_epoch
def __len__(self):
return self.steps_per_epoch
def __getitem__(self, index):
return processor(
self.samples[index * self.batch_size:(index+1) * self.batch_size]
)
def on_epoch_end(self):
self.samples = sampler.fetch(self.batch_size*self.steps_per_epoch)
traceback.print_stack()
gen = CustomGenerator(Sampler(), Processor(), batch_size=1, steps_per_epoch=1500)
Im using Tensorflow 2.8.0, same issue happened at TF 2.3.0 as well. Is there any way to reduce the number of calls to one without reducing the number of workers? Is this behaviour documented somewhere? Strangely, I am unable to reproduce the issue with dummy data.
EDIT
I've managed to reproduce the problem. It happens only when both training and validation generators are supplied to the model.fit(). The validation generator has the appropriate number of calls made to the on_epoch_end() function. The training generator, however, has at least twice the expected calls.
Reproducible code
class Generator(tf.keras.utils.Sequence):
def __init__(self, steps_per_epoch, batch_size):
self.oee_counter = 0
self.steps_per_epoch = steps_per_epoch
self.batch_size = batch_size
def __getitem__(self, idx):
self.X = np.random.random((self.batch_size,512))
self.y = np.random.randint(low=0, high=2, size=self.batch_size)
return self.X, self.y
def on_epoch_end(self):
self.oee_counter += 1
print('Called OEE')
def __len__(self):
return self.steps_per_epoch
gen1 = Generator(1500,1)
gen2 = Generator(2000,1)
inp = tf.keras.layers.Input((512))
out = tf.keras.layers.Dense(1,activation='sigmoid')(inp)
m = tf.keras.Model(inp, out)
m.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.BinaryCrossentropy())
m.fit(
x=gen1,
validation_data=gen2,
validation_freq=1,
epochs=3,
workers=10,
max_queue_size=50,
use_multiprocessing=False,
)
print(gen1.oee_counter) # 6 calls (expected 3)
print(gen2.oee_counter) # 3 calls (correct)
Related
I'm training a model in PyTorch 1.13.0 (I have also tried this on the nightly build torch-1.14.0.dev20221207 to no avail) on my M1 Mac and would like to use MPS hardware acceleration. I have the following relevant code in my project to send the model and input tensors to MPS:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") # This always results in MPS
model.to(device)
... And in my Dataset subclass:
class MyDataset(Dataset):
def __init__(self, df, window_size):
self.df = df
self.window_size = window_size
self.data = []
self.labels = []
for i in range(len(df) - window_size):
x = torch.tensor(df.iloc[i:i+window_size].values, dtype=torch.float, device=device)
y = torch.tensor(df.iloc[i+window_size].values, dtype=torch.float, device=device)
self.data.append(x)
self.labels.append(y)
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx], self.labels[idx]
This results in the following traceback during my first training step:
Traceback (most recent call last):
File "lstm_model.py", line 263, in <module>
train_losses, val_losses = train_model(model, criterion, optimizer, train_loader, val_loader, epochs=100)
File "lstm_model.py", line 212, in train_model
train_loss += train_step(model, criterion, optimizer, x, y)
File "lstm_model.py", line 191, in train_step
y_pred = model(x)
File "miniconda3/envs/pytenv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "lstm_model.py", line 182, in forward
out, _ = self.lstm(x, (h0, c0))
File "miniconda3/envs/pytenv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "miniconda3/envs/pytenv/lib/python3.10/site-packages/torch/nn/modules/rnn.py", line 774, in forward
result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
RuntimeError: Placeholder storage has not been allocated on MPS device!
I've tried creating tensors in my Dataset subclass without a device specified and then calling .to(device) on them:
x = torch.tensor(df.iloc[i:i+window_size].values, dtype=torch.float)
x = x.to(device)
y = torch.tensor(df.iloc[i+window_size].values, dtype=torch.float)
y = y.to(device)
I've also tried creating the tensors without a device specified in my Dataset subclass and sending tensors to device in both the forward method of my model and in my train_step function.
How can I resolve my error?
I am working on a GRU and when I try to make predictions I get an error indicating that I need to define h for forward(). I have tried several things and ran out of patience after googling and scouring stack overflow for hours.
This is the class:
class GRUNet(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob = 0.2):
super(GRUNet, self).__init__()
self.hidden_dim = hidden_dim
self.n_layers = n_layers
self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
self.fc = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()
def forward(self, x, h):
out, h = self.gru(x,h)
out = self.fc(self.relu(out[:,-1]))
return out, h
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device)
return hidden
and then this is where I load the model and try to make a prediction. Both of these are in the same script.
inputs = np.load('.//Pred//input_list.npy')
print(inputs.ndim, inputs.shape)
Gmodel = GRUNet(24,256,1,2)
Gmodel = torch.load('.//GRU//GRU_1028_48.pkl')
Gmodel.eval()
pred = Gmodel(inputs)
Without any other arguments to Gmodel I get the following:
Traceback (most recent call last):
File ".\grunet.py", line 136, in <module>
pred = Gmodel(inputs)
File "C:\Users\ryang\Anaconda-3\envs\tf-gpu\lib\site-packages\torch\nn\modules\module.py", line 547, in __call__
result = self.forward(*input, **kwargs)
TypeError: forward() missing 1 required positional argument: 'h'
You need to provide the hidden state as well which is usually initially all zeros or simply None!
That is you either need to explicitly provide one like this :
hidden_state = torch.zeros(size=(num_layers*direction, batch_size, hidden_dim)).to(device)
pred = Gmodel(inputs, hidden_state)
or simply do :
hidden_state = None
pred = Gmodel(inputs, hidden_state)
I am trying to use Elmo from tensorflow hub with tf.keras, to perform NER. Training is fine and loss is decreasing, also test set gives good results. But I am unable to predict, as I get following error:
2019-05-02 15:41:42.785946: I tensorflow/stream_executor/dso_loader.cc:152] successfully opened CUDA library libcublas.so.10.0 locally
Traceback (most recent call last):
File "elmo_eva_brain.py", line 668, in <module>
np.array([['hello', 'world'] + ['--PAD--'] * 18])))
File "/home/ashwanipandey/eva_ml/experimental/eva_brain/venv/lib64/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 1113, in predict
self, x, batch_size=batch_size, verbose=verbose, steps=steps)
File "/home/ashwanipandey/eva_ml/experimental/eva_brain/venv/lib64/python3.6/site-packages/tensorflow/python/keras/engine/training_arrays.py", line 329, in model_iteration
batch_outs = f(ins_batch)
File "/home/ashwanipandey/eva_ml/experimental/eva_brain/venv/lib64/python3.6/site-packages/tensorflow/python/keras/backend.py", line 3076, in __call__
run_metadata=self.run_metadata)
File "/home/ashwanipandey/eva_ml/experimental/eva_brain/venv/lib64/python3.6/site-packages/tensorflow/python/client/session.py", line 1439, in __call__
run_metadata_ptr)
File "/home/ashwanipandey/eva_ml/experimental/eva_brain/venv/lib64/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 528, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: len(seq_lens) != input.dims(0), (256 vs. 1)
[[{{node Embed/elmo/elmo_module_apply_tokens/bilm/ReverseSequence}}]]
[[{{node Tag/t_output/transpose_1}}]]
256 is my batch size during training. I am trying to predict just one sentence.
I tried to search a lot on internet, but all in vane. Any help is much appreciated.
I can definitely get predictions if I repeat my vector 256 times and set batch_size to 256 during prediction. But as you can see this is highly inefficient workaround.
Here is code for custom layer
class ElmoEmbeddingLayer(keras.layers.Layer):
def __init__(self, dimensions=1024, batch_size=512, word_size=20, **kwargs):
self.dimensions = 1024
self.trainable = True
self.batch_size = _BATCH_SIZE
self.word_size = _WORD_SIZE
super().__init__(**kwargs)
def build(self, input_shape):
self.elmo = hub.Module('https://tfhub.dev/google/elmo/2', trainable=self.trainable,
name=f"{self.name}_module")
super().build(input_shape)
def call(self, x, mask=None):
result = self.elmo(inputs={
"tokens": K.cast(x, tf.string),
"sequence_len": K.constant(self.batch_size*[self.word_size], dtype=tf.int32)
},
as_dict=True,
signature='tokens',
)['elmo']
return result
def compute_mask(self, inputs, mask=None):
return K.not_equal(inputs, '--PAD--')
def compute_output_shape(self, input_shape):
return (None, self.word_size, self.dimensions)
def get_config(self):
config = {
'dimensions': self.dimensions,
'trainable': self.trainable,
'batch_size': self.batch_size,
'word_size': self.word_size
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
Here is my model architecture:
model architecture
I had the same problem with you, working on an RNN ELMo pos-tagger model. Finally I followed the solution predicting in batches and keeping the test sample I want:
model.predict([X_test[:split_te]], batch_size=256)[0]
For more ideas (like copying weights) look here!
The number of samples (in train and also test set) must be divisible by the batch_size. Otherwise the last batch in keras will break the architecture.
So for example a solution is to use samples until split_tr for training and split_te for predicting:
split_tr = (X_train.shape[0]//BATCH_SIZE)*BATCH_SIZE
split_te = (X_test.shape[0]//BATCH_SIZE)*BATCH_SIZE
model.fit(X_train_text[:split_tr], y_train[:split_tr], batch_size=BATCH_SIZE, epochs=15, validation_data=(X_test_text[:split_te], y_test[:split_te]), verbose=1)
I am trying to load a Keras model for prediction only (i.e. I do not have to compile the model, per Pepslee's post here).
When I try to use model.predict_generator(), I get:
Using TensorFlow backend.
Exception in thread Thread-1:
Traceback (most recent call last):
File "/user/pkgs/anaconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner
self.run()
File "/user/pkgs/anaconda2/lib/python2.7/threading.py", line 754, in run
self.__target(*self.__args, **self.__kwargs)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/keras/utils/data_utils.py", line 559, in _run
sequence = list(range(len(self.sequence)))
ValueError: __len__() should return >= 0
I am working with Tensorflow version 1.12.0, Keras version 2.2.4. I need to use these versions to ensure compatibility with my cuDNN version, which I have no control over.
How can I get around this error?
EDIT
I was asked for an example. Unfortunately there's too much proprietary info here for me to give much detail, but here are the bare bones (note the model is not actually an LSTM):
class LSTMmodel():
def __init__(self, hid1 = 10, batch_size=32, mode='test'):
self.hid_dim_1 = hid1
self.t_per_e, self.test_generator = self.read_data()
#Load the entire fitted model
model_name = ''.join(glob.glob('*model.h5'))
self.__model = load_model(model_name, compile=False)
def read_data(self):
num_test_minibatches = 10
test_IDs = range(111, 111+10)
params = {'list_IDs': test_IDs, 'batch_size': self.batch_size, 'n_vars': 354}
test_generator = DataGenerator(test_IDs, **params)
t_per_e = int(len(test_IDs) - self.batch_size + 1)
return t_per_e, test_generator
def lstm_model():
#Model building here. Not needed, since not compiling the model
return 0
def lstm_predict(self):
pred = self.__model.predict_generator(self.test_generator, self.t_per_e)
return pred
class DataGenerator(keras.utils.Sequence):
#Other methods in here as necessary
def __len__(self):
'Denotes the number of batches per epoch'
batches_per_epoch = int(np.floor(len(self.list_IDs) - self.batch_size + 1))
return batches_per_epoch
def __data_generation(self, other_params_here):
'Generates data containing batch_size samples'
return preprocessed_data
def test_lstm():
test_inst = LSTMmodel(hid1=10) #hid1 is a hyperparameter
test_prediction = test_inst.lstm_predict()
return test_prediction
if __name__ == '__main__':
testvals = test_lstm()
Basically, the workflow is:
1) test_lstm() creates an instance of the LSTMmodel class, and then calls lstm_predict.
2) lstm_predict uses predict_generator which takes in the generator for the test set and the number of examples to generate (steps from here).
3) The generator for the test set is created as an instance of class DataGenerator() in the read_data() method of class LSTMmodel(). Importantly, the test data generator is created in the same way as the training data generator and the validation data generator.
4) self.__model is created by loading a fully trained model in the init method of class LSTMmodel().
How can I get rid of the error?
I am using keras 2.0.8 with tensorflow 1.3.0 backend.
I am loading a model in the class init and then use it to predict multithreaded.
import tensorflow as tf
from keras import backend as K
from keras.models import load_model
class CNN:
def __init__(self, model_path):
self.cnn_model = load_model(model_path)
self.session = K.get_session()
self.graph = tf.get_default_graph()
def query_cnn(self, data):
X = self.preproccesing(data)
with self.session.as_default():
with self.graph.as_default():
return self.cnn_model.predict(X)
I initialize the CNN once and the query_cnn method happens from multiple threads.
The exception i get in my log is:
File "/home/*/Similarity/CNN.py", line 43, in query_cnn
return self.cnn_model.predict(X)
File "/usr/local/lib/python3.5/dist-packages/keras/models.py", line 913, in predict
return self.model.predict(x, batch_size=batch_size, verbose=verbose)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1713, in predict
verbose=verbose, steps=steps)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1269, in _predict_loop
batch_outs = f(ins_batch)
File "/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py", line 2273, in __call__
**self.session_kwargs)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1321, in _do_run
options, run_metadata)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.NotFoundError: PruneForTargets: Some target nodes not found: group_deps
The code works fine most of the times, its probably some problem with the multithreading.
How can i fix it?
Make sure you finish the graph creation before creating the other threads.
Calling finalize() on the graph may help you with that.
def __init__(self, model_path):
self.cnn_model = load_model(model_path)
self.session = K.get_session()
self.graph = tf.get_default_graph()
self.graph.finalize()
Update 1: finalize() will make your graph read-only so it can be safely used in multiple threads. As a side effect, it will help you find unintentional behavior and sometimes memory leaks as it will throw an exception when you try to modify the graph.
Imagine that you have a thread that does for instance one hot encoding of your inputs. (bad example:)
def preprocessing(self, data):
one_hot_data = tf.one_hot(data, depth=self.num_classes)
return self.session.run(one_hot_data)
If you print the amount of objects in the graph you will notice that it will increase over time
# amount of nodes in tf graph
print(len(list(tf.get_default_graph().as_graph_def().node)))
But if you define the graph first that won't be the case (slightly better code):
def preprocessing(self, data):
# run pre-created operation with self.input as placeholder
return self.session.run(self.one_hot_data, feed_dict={self.input: data})
Update 2: According to this thread you need to call model._make_predict_function() on a keras model before doing multithreading.
Keras builds the GPU function the first time you call predict(). That
way, if you never call predict, you save some time and resources.
However, the first time you call predict is slightly slower than every
other time.
The updated code:
def __init__(self, model_path):
self.cnn_model = load_model(model_path)
self.cnn_model._make_predict_function() # have to initialize before threading
self.session = K.get_session()
self.graph = tf.get_default_graph()
self.graph.finalize() # make graph read-only
Update 3: I did a proof of concept of a warming up, because _make_predict_function() doesn't seems to work as expected.
First I created a dummy model:
import tensorflow as tf
from keras.layers import *
from keras.models import *
model = Sequential()
model.add(Dense(256, input_shape=(2,)))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='adam')
model.save("dummymodel")
Then in another script I loaded that model and made it run on multiple threads
import tensorflow as tf
from keras import backend as K
from keras.models import load_model
import threading as t
import numpy as np
K.clear_session()
class CNN:
def __init__(self, model_path):
self.cnn_model = load_model(model_path)
self.cnn_model.predict(np.array([[0,0]])) # warmup
self.session = K.get_session()
self.graph = tf.get_default_graph()
self.graph.finalize() # finalize
def preproccesing(self, data):
# dummy
return data
def query_cnn(self, data):
X = self.preproccesing(data)
with self.session.as_default():
with self.graph.as_default():
prediction = self.cnn_model.predict(X)
print(prediction)
return prediction
cnn = CNN("dummymodel")
th = t.Thread(target=cnn.query_cnn, kwargs={"data": np.random.random((500, 2))})
th2 = t.Thread(target=cnn.query_cnn, kwargs={"data": np.random.random((500, 2))})
th3 = t.Thread(target=cnn.query_cnn, kwargs={"data": np.random.random((500, 2))})
th4 = t.Thread(target=cnn.query_cnn, kwargs={"data": np.random.random((500, 2))})
th5 = t.Thread(target=cnn.query_cnn, kwargs={"data": np.random.random((500, 2))})
th.start()
th2.start()
th3.start()
th4.start()
th5.start()
th2.join()
th.join()
th3.join()
th5.join()
th4.join()
Commenting the lines for the warmingup and finalize I was able to reproduce your first issue