how to call a pre-trained decoder model in custom loss function in keras? - keras

I trained a auto-encoder model before and saved decoder model. Next, I train a new model that labeled as 'netA', I want to use decoder model in custom loss function. and tried, but I got error, there was my code and error information:
def custom_loss(y_true,y_pred):
a = decoder(y_pred)
b = decoder(y_true)
c = K.mean(K.square(a-b))
return c
input_feature = 409
output_feature = 256
model = Sequential()
model.add(Dense(256, activation = 'relu',input_shape=(input_feature,)))
model.add(Dense(128, activation = 'relu'))
model.add(Dense(64))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(128,activation='relu'))
model.add(Dense(output_feature,activation='sigmoid'))
model.summary()
model.compile(optimizer = Adam(lr = 1e-4),loss=custom_loss, metrics = ['mse'])
history = model.fit(x_train_pca_scale, y_train_scale_coding, epochs = 200, batch_size = 32, verbose= 2,validation_data = (x_test_pca_scale, y_test_scale_coding))
the error is :
AssertionError Traceback (most recent call last)
in
23
24 model.summary()
---> 25 model.compile(optimizer = Adam(lr = 1e-4),loss=custom_loss, metrics = ['mse'])
26 #checkpointer = ModelCheckpoint(filepath='/home/lidan/3DFacePrediction/gene.face.autoencoder/gene.face.min.val_loss.hd5', monitor='val_loss',verbose=1,mode='min',save_best_only=True)
27
~/software/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py
in symbolic_fn_wrapper(*args, **kwargs)
73 if _SYMBOLIC_SCOPE.value:
74 with get_graph().as_default():
---> 75 return func(*args, **kwargs)
76 else:
77 return func(*args, **kwargs)
~/software/anaconda3/lib/python3.7/site-packages/keras/engine/training.py
in compile(self, optimizer, loss, metrics, loss_weights,
sample_weight_mode, weighted_metrics, target_tensors, **kwargs)
227 # loss_weight_2 * output_2_loss_fn(...) +
228 # layer losses.
--> 229 self.total_loss = self._prepare_total_loss(masks)
230
231 # Functions for train, test and predict will
~/software/anaconda3/lib/python3.7/site-packages/keras/engine/training.py
in _prepare_total_loss(self, masks)
690
691 output_loss = loss_fn(
--> 692 y_true, y_pred, sample_weight=sample_weight)
693
694 if len(self.outputs) > 1:
~/software/anaconda3/lib/python3.7/site-packages/keras/losses.py in
call(self, y_true, y_pred, sample_weight)
69 scope_name = 'lambda' if self.name == '' else self.name
70 with K.name_scope(scope_name):
---> 71 losses = self.call(y_true, y_pred)
72 return losses_utils.compute_weighted_loss(
73 losses, sample_weight, reduction=self.reduction)
~/software/anaconda3/lib/python3.7/site-packages/keras/losses.py in
call(self, y_true, y_pred)
130 Loss values per sample.
131 """
--> 132 return self.fn(y_true, y_pred, **self._fn_kwargs)
133
134 def get_config(self):
in custom_loss(y_true, y_pred)
3 def custom_loss(y_true,y_pred):
4 a = decoder(y_pred)
----> 5 b = decoder(y_true)
6 c = K.mean(K.square(a-b))
7 return c
~/software/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py
in symbolic_fn_wrapper(*args, **kwargs)
73 if _SYMBOLIC_SCOPE.value:
74 with get_graph().as_default():
---> 75 return func(*args, **kwargs)
76 else:
77 return func(*args, **kwargs)
~/software/anaconda3/lib/python3.7/site-packages/keras/engine/base_layer.py
in call(self, inputs, **kwargs)
487 # Actually call the layer,
488 # collecting output(s), mask(s), and shape(s).
--> 489 output = self.call(inputs, **kwargs)
490 output_mask = self.compute_mask(inputs, previous_mask)
491
~/software/anaconda3/lib/python3.7/site-packages/keras/engine/network.py
in call(self, inputs, mask)
581 return self._output_tensor_cache[cache_key]
582 else:
--> 583 output_tensors, _, _ = self.run_internal_graph(inputs, masks)
584 return output_tensors
585
~/software/anaconda3/lib/python3.7/site-packages/keras/engine/network.py
in run_internal_graph(self, inputs, masks)
796 input_shapes = unpack_singleton(
797 [x._keras_shape for x in computed_tensors])
--> 798 shapes = to_list(layer.compute_output_shape(input_shapes))
799 uses_learning_phase = any(
800 [x._uses_learning_phase for x in computed_tensors])
~/software/anaconda3/lib/python3.7/site-packages/keras/layers/core.py
in compute_output_shape(self, input_shape)
915 def compute_output_shape(self, input_shape):
916 assert input_shape and len(input_shape) >= 2
--> 917 assert input_shape[-1]
918 output_shape = list(input_shape)
919 output_shape[-1] = self.units
AssertionError:
I felt confused about the error information,because the decoder model worked well in y_pred and failed in y_true.
Could someone help me solve it or just give me another way to put a saved decoder model in loss function? Thank you very much!

Related

layers compatibility between attention layer and CONV1D in keras

I am building a model in bilstm-attention-conv1d fashion (i want to use multiple conv1d with different kernel sizes) I am facing the layers incompatibility issue between the attention layer and conv1d layer. I have tried Reshape function but it's not working, Following is my code:
my model is as follows
sequence_input = Input(shape=(maxlen,), dtype="int32")
embedded_sequences = Embedding(50000, output_dim=output_dim)(sequence_input)
lstm = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences = True), name="bi_lstm_0")(embedded_sequences)
# Getting our LSTM outputs
(lstm, forward_h, forward_c, backward_h, backward_c) = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences=True, return_state=True),
name="bi_lstm_1")(lstm)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
context_vector, attention_weights = Attention(10)(lstm, state_h)
x = Reshape((maxlen, output_dim, 1))(context_vector)
kernel_sizes = [1,2,3,4,5]
convs = []
for kernel_size in range(len(kernel_sizes)):
conv = Conv1D(128, kernel_size, activation='relu')(x)
convs.append(conv)
avg_pool = GlobalAveragePooling1D()(convs)
max_pool = GlobalMaxPooling1D()(convs)
conc = concatenate([avg_pool, max_pool])
output = Dense(50, activation="sigmoid")(conc)
model = keras.Model(inputs=sequence_input, outputs=output)
print(model.summary())
my code gives me the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-114-8e5c0c75e84a> in <module>()
13 context_vector, attention_weights = Attention(10)(lstm, state_h)
14
---> 15 x = Reshape((maxlen, output_dim, 1))(context_vector)
16
17
6 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
950 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
951 return self._functional_construction_call(inputs, args, kwargs,
--> 952 input_list)
953
954 # Maintains info about the `Layer.call` stack.
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1089 # Check input assumptions set after layer building, e.g. input shape.
1090 outputs = self._keras_tensor_symbolic_call(
-> 1091 inputs, input_masks, args, kwargs)
1092
1093 if outputs is None:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _keras_tensor_symbolic_call(self, inputs, input_masks, args, kwargs)
820 return nest.map_structure(keras_tensor.KerasTensor, output_signature)
821 else:
--> 822 return self._infer_output_signature(inputs, args, kwargs, input_masks)
823
824 def _infer_output_signature(self, inputs, args, kwargs, input_masks):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _infer_output_signature(self, inputs, args, kwargs, input_masks)
861 # TODO(kaftan): do we maybe_build here, or have we already done it?
862 self._maybe_build(inputs)
--> 863 outputs = call_fn(inputs, *args, **kwargs)
864
865 self._handle_activity_regularization(inputs, outputs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in call(self, inputs)
555 # Set the static shape for the result since it might lost during array_ops
556 # reshape, eg, some `None` dim in the result could be inferred.
--> 557 result.set_shape(self.compute_output_shape(inputs.shape))
558 return result
559
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in compute_output_shape(self, input_shape)
546 output_shape = [input_shape[0]]
547 output_shape += self._fix_unknown_dimension(input_shape[1:],
--> 548 self.target_shape)
549 return tensor_shape.TensorShape(output_shape)
550
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in _fix_unknown_dimension(self, input_shape, output_shape)
534 output_shape[unknown] = original // known
535 elif original != known:
--> 536 raise ValueError(msg)
537 return output_shape
538
ValueError: total size of new array must be unchanged, input_shape = [256], output_shape = [2500, 100, 1]
kindly help me

A simple linear regression model with a DenseVariational layer in Tensorflow-Probability returns: TypeError: 'NoneType' object is not callable

This is an attempt to use Tensforflow-Probability and more specifically the DenseVariational layer but it fails for some reason. How can I correct the code?
x_train = np.linspace(-1, 1, 100)[:, np.newaxis]
y_train = x_train + 0.3*np.random.randn(100)[:, np.newaxis]
def prior(kernel_size, bias_size, dtype = None):
n = kernel_size + bias_size
prior_model = Sequential([
tfpl.DistributionLambda(
lambda t: tfd.MultivariateNormalDiag(loc = tf.zeros(n) , scale_diag = tf.ones(n)
))
])
def posterior(kernel_size, bias_size, dtype = None):
n = kernel_size + bias_size
posterior_model = Sequential([
tfpl.VariableLayer(tfpl.MultivariateNormalTriL.params_size(n) , dtype = dtype), # The parameters of the model are declared Variables that are trainable
tfpl.MultivariateNormalTriL(n) # The posterior function will return to the Variational layer that will call it a MultivariateNormalTril object that will have as many dimensions
# as the parameters of the Variational Dense Layer. That means that each parameter will be generated by a distinct Normal Gaussian shifted and scaled
# by a mu and sigma learned from the data, independently of all the other weights. The output of this Variablelayer will become the input to the
# MultivariateNormalTriL object.
# The shape of the VariableLayer object will be defined by the number of parameters needed to create the MultivariateNormalTriL object given
# that it will live in a Space of n dimensions (event_size = n). This number is returned by the tfpl.MultivariateNormalTriL.params_size(n)
])
return(posterior_model)
model = Sequential([
tfpl.DenseVariational(
input_shape = (1, ), # The input is of dimensionality 1, a series
units = 1, # A linear regression is represented by a Dense layer with one single unit
make_prior_fn = prior, # We pass the function we have defined which returns the prior distribution on the weights
make_posterior_fn = posterior, # We pass the function we have defined which returns the variational approximation of the posterior distribution on the weights
kl_weight = 1/ x_train.shape[0], # Tensorflow scales the likelihood loss calculated using the mini-batch to become an unbiased estimator of the true loss but does not do the
# same for the DL divergence loss. Here we instruct it to do the necessary scaling.
kl_use_exact = True # Unless there is a closed form equation for the KL divergence in the library of Tensorflow setting True will return error. By setting False instead
# the KL Divergence will be approxiated using Sampling
)
])
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-14-e7cf0bfd5902> in <module>
17 # same for the DL divergence loss. Here we instruct it to do the necessary scaling.
18
---> 19 kl_use_exact = True # Unless there is a closed form equation for the KL divergence in the library of Tensorflow setting True will return error. By setting False instead
20 # the KL Divergence will be approxiated using Sampling
21
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\sequential.py in __init__(self, layers, name)
140 layers = [layers]
141 for layer in layers:
--> 142 self.add(layer)
143
144 #property
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\sequential.py in add(self, layer)
204 # and create the node connecting the current layer
205 # to the input layer we just created.
--> 206 layer(x)
207 set_inputs = True
208
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in __call__(self, *args, **kwargs)
924 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
925 return self._functional_construction_call(inputs, args, kwargs,
--> 926 input_list)
927
928 # Maintains info about the `Layer.call` stack.
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1115 try:
1116 with ops.enable_auto_cast_variables(self._compute_dtype_object):
-> 1117 outputs = call_fn(cast_inputs, *args, **kwargs)
1118
1119 except errors.OperatorNotAllowedInGraphError as e:
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in wrapper(*args, **kwargs)
253 try:
254 with conversion_ctx:
--> 255 return converted_call(f, args, kwargs, options=options)
256 except Exception as e: # pylint:disable=broad-except
257 if hasattr(e, 'ag_error_metadata'):
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in converted_call(f, args, kwargs, caller_fn_scope, options)
455 if conversion.is_in_whitelist_cache(f, options):
456 logging.log(2, 'Whitelisted %s: from cache', f)
--> 457 return _call_unconverted(f, args, kwargs, options, False)
458
459 if ag_ctx.control_status_ctx().status == ag_ctx.Status.DISABLED:
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in _call_unconverted(f, args, kwargs, options, update_cache)
337
338 if kwargs is not None:
--> 339 return f(*args, **kwargs)
340 return f(*args)
341
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow_probability\python\layers\dense_variational_v2.py in call(self, inputs)
120
121 q = self._posterior(inputs)
--> 122 r = self._prior(inputs)
123 self.add_loss(self._kl_divergence_fn(q, r))
124
TypeError: 'NoneType' object is not callable
Did you fail to return ?
def prior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
prior_model = tf.keras.Sequential([
tfp.layers.DistributionLambda(
lambda t: tfd.MultivariateNormalDiag(loc=tf.zeros(n), scale_diag=tf.ones(n)
))
])
return (prior_model)

Non-Stateful LSTM Issues with Keras

Good day,
I am trying to create a LSTM model (stateful or non-stateful) but running into several issues.
I am attempting to add a layer using:
model = Sequential()
...
model.add(LSTM(c['num_rnn_unit'],
activation=c['rnn_activation'],
dropout=c['dropout_rnn_input'],
recurrent_dropout=c['dropout_rnn_recurrent'],
return_sequences=True,
stateful=False,
#batch_input_shape=(c['batch_size'],c['num_steps'], c['input_dim'])
))
where:
'num_rnn_unit': np.random.choice([16, 32, 64, 128, 256, 512, 1024])
'rnn_activation': np.random.choice(['tanh', 'sigmoid'])
'dropout_rnn_input': 0
'batch_size': np.random.choice([64, 128])
'num_steps':np.random.choice([5, 10, 15])
'input_dim': 64
I experimented with "stateful=True" and used the commented out "batch_input_shape" but this caused additional errors, which others have had as well but found no workable solution.
So I stuck with trying to make "stateful=False" to work but it yields the error (below).
Any thoughts on why this error is coming up? Thanks in advance!
Here is the traceback:
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\engine\sequential.py in add(self, layer)
180 self.inputs = network.get_source_inputs(self.outputs[0])
181 elif self.outputs:
--> 182 output_tensor = layer(self.outputs[0])
183 if isinstance(output_tensor, list):
184 raise TypeError('All layers in a Sequential model '
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
539
540 if initial_state is None and constants is None:
--> 541 return super(RNN, self).__call__(inputs, **kwargs)
542
543 # If any of `initial_state` or `constants` are specified and are Keras
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\backend\tensorflow_backend.py in symbolic_fn_wrapper(*args, **kwargs)
73 if _SYMBOLIC_SCOPE.value:
74 with get_graph().as_default():
---> 75 return func(*args, **kwargs)
76 else:
77 return func(*args, **kwargs)
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\engine\base_layer.py in __call__(self, inputs, **kwargs)
487 # Actually call the layer,
488 # collecting output(s), mask(s), and shape(s).
--> 489 output = self.call(inputs, **kwargs)
490 output_mask = self.compute_mask(inputs, previous_mask)
491
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in call(self, inputs, mask, training, initial_state)
1689 mask=mask,
1690 training=training,
-> 1691 initial_state=initial_state)
1692
1693 #property
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in call(self, inputs, mask, training, initial_state, constants)
635 mask = mask[0]
636
--> 637 if len(initial_state) != len(self.states):
638 raise ValueError('Layer has ' + str(len(self.states)) +
639 ' states but was passed ' +
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in states(self)
436 num_states = 1
437 else:
--> 438 num_states = len(self.cell.state_size)
439 return [None for _ in range(num_states)]
440 return self._states
TypeError: object of type 'numpy.int32' has no len()
Would this be the first layer "input_shape", with batch_normalization=True:
if c['batch_normalization']:
model.add(BatchNormalization(input_shape=(c['num_steps'], c['input_dim'])))
model.add(TimeDistributed(Dropout(c['dropout_input']),
input_shape=(c['num_steps'], c['input_dim'])))

Why am I getting a Pytorch Runtime Error on Test Set

I have a model that is a binary image classification model with the resnext model. I keep getting a run time error when it gets to the test set. Error message is
RuntimeError: Expected object of backend CPU but got backend CUDA for argument #2 'weight'
I am sending my test set tensors to my GPU like my train model. I've looked at the following and I'm doing what was suggested here as stated above.
Here is my model code:
resnext = models.resnext50_32x4d(pretrained=True)
resnext = resnext.to(device)
for param in resnext.parameters():
param.requires_grad = True
resnext.classifier = nn.Sequential(nn.Linear(2048, 1000),
nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(1000, 2),
nn.Softmax(dim = 1))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnext.classifier.parameters(), lr=0.001)
import time
start_time = time.time()
epochs = 1
max_trn_batch = 5
max_tst_batch = 156
y_val_list = []
policy_list = []
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
for i in tqdm(range(0, max_trn_batch)):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train, policy) in enumerate(train_loader):
#print(y_train, policy)
X_train = X_train.to(device)
y_train = y_train.to(device)
if b == max_trn_batch:
break
b+=1
# Apply the model
y_pred = resnext(X_train)
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%1 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/63610] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()/(100*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test, policy) in enumerate(test_loader):
policy_list.append(policy)
X_test.to(device)
y_test.to(device)
if b == max_tst_batch:
break
# Apply the model
y_val = resnext(X_test)
y_val_list.append(y_val.data)
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
Here is the full traceback:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-84-48bce2e8d4fa> in <module>
60
61 # Apply the model
---> 62 y_val = resnext(X_test)
63 y_val_list.append(y_val.data)
64 # Tally the number of correct predictions
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
545 result = self._slow_forward(*input, **kwargs)
546 else:
--> 547 result = self.forward(*input, **kwargs)
548 for hook in self._forward_hooks.values():
549 hook_result = hook(self, input, result)
C:\ProgramData\Anaconda3\lib\site-packages\torchvision\models\resnet.py in forward(self, x)
194
195 def forward(self, x):
--> 196 x = self.conv1(x)
197 x = self.bn1(x)
198 x = self.relu(x)
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
545 result = self._slow_forward(*input, **kwargs)
546 else:
--> 547 result = self.forward(*input, **kwargs)
548 for hook in self._forward_hooks.values():
549 hook_result = hook(self, input, result)
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\conv.py in forward(self, input)
341
342 def forward(self, input):
--> 343 return self.conv2d_forward(input, self.weight)
344
345 class Conv3d(_ConvNd):
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\conv.py in conv2d_forward(self, input, weight)
338 _pair(0), self.dilation, self.groups)
339 return F.conv2d(input, weight, self.bias, self.stride,
--> 340 self.padding, self.dilation, self.groups)
341
342 def forward(self, input):
RuntimeError: Expected object of backend CPU but got backend CUDA for argument #2 'weight'
Again, my tensors and the model are sent to the GPU so I'm not sure what is going on. Does anyone see my mistake?
[...] my tensors and the model are sent to the GPU [...]
Not the test Tensors. It is a simple mistake:
X_test.to(device)
y_test.to(device)
should be
X_test = X_test.to(device)
y_test = y_test.to(device)

Problem in GridSearching a LSTM network - Batch_size issue

I wrote code to apply the gridsearch method to a LSTM network built with keras. Everything seems to work fine, but i get some problem with passing the batch_size.
I tried to change the format of batch_size but, as i understand, it must be a tuple.
#LSTM ok
from Methods.LSTM_1HL import LSTM_method
Yhat_train_LSTM, Yhat_test_LSTM = LSTM_method(X_train, X_test, Y_train,
Y_test)
def create_model(optimizer, hl1_nodes, input_shape):
# creation of the NN - Electric Load
# LSTM layers followed by other LSTM layer must have the parameter "return_sequences" set at True
model = Sequential()
model.add(LSTM(units = hl1_nodes , input_shape=input_shape, return_sequences=False))
model.add(Dense(1, activation="linear")) # output layer
model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])
model.summary()
return model
def LSTM_method(X_train, X_test, Y_train, Y_test):
# normalize X and Y data
mmsx = MinMaxScaler()
mmsy = MinMaxScaler()
X_train = mmsx.fit_transform(X_train)
X_test = mmsx.transform(X_test)
Y_train = mmsy.fit_transform(Y_train)
Y_test = mmsy.transform(Y_test)
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
# NN for Electric Load
# LSTM Input Shape
time_steps = 1 # number of time-steps you are feeding a sequence (?)
inputs_numb = X_train.shape[1] # number of inputs
input_shape=(time_steps, inputs_numb)
model = KerasRegressor(build_fn=create_model,verbose=1)
#GridSearch code
start=time()
optimizers = ['rmsprop', 'adam']
epochs = np.array([100, 500, 1000])
hl1_nodes = np.array([1, 10, 50])
btcsz = np.array([1,X_train.shape[0]])
param_grid = dict(optimizer=optimizers, hl1_nodes=hl1_nodes, input_shape=input_shape, nb_epoch=epochs,batch_size=btcsz)
scoring = make_scorer(accuracy_score) #in order to use a metric as a scorer
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring = scoring)
grid_result = grid.fit(X_train, Y_train)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
for params, mean_score, scores in grid_result.grid_scores_:
print("%f (%f) with: %r" % (scores.mean(), scores.std(), params))
print("total time:",time()-start)
# Predictions - Electric Load
Yhat_train = grid_result.predict(X_train, verbose=0)
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
Yhat_test = grid_result.predict(X_test, verbose=0)
# Denormalization - Electric Load
Yhat_train = mmsy.inverse_transform(Yhat_train)
Yhat_test = mmsy.inverse_transform(Yhat_test)
Y_train = mmsy.inverse_transform(Y_train)
Y_test = mmsy.inverse_transform(Y_test)
return Yhat_train, Yhat_test
Below the error I get:
TypeError Traceback (most recent call last)
in
10 #from Methods.LSTM_1HL import create_model
11
---> 12 Yhat_train_LSTM, Yhat_test_LSTM = LSTM_method(X_train, X_test, Y_train, Y_test)
c:\Users\ER180124\Code\LoadForecasting\Methods\LSTM_1HL.py in LSTM_method(X_train, X_test, Y_train, Y_test)
62 scoring = make_scorer(accuracy_score) #in order to use a metric as a scorer
63 grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring = scoring)
---> 64 grid_result = grid.fit(X_train, Y_train)
65
66 print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
~\.conda\envs\PierEnv\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
720 return results_container[0]
721
--> 722 self._run_search(evaluate_candidates)
723
724 results = results_container[0]
~\.conda\envs\PierEnv\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
1189 def _run_search(self, evaluate_candidates):
1190 """Search all candidates in param_grid"""
-> 1191 evaluate_candidates(ParameterGrid(self.param_grid))
1192
1193
~\.conda\envs\PierEnv\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
709 for parameters, (train, test)
710 in product(candidate_params,
--> 711 cv.split(X, y, groups)))
712
713 all_candidate_params.extend(candidate_params)
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
915 # remaining jobs.
916 self._iterating = False
--> 917 if self.dispatch_one_batch(iterator):
918 self._iterating = self._original_iterator is not None
919
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in (.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
~\.conda\envs\PierEnv\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
526 estimator.fit(X_train, **fit_params)
527 else:
--> 528 estimator.fit(X_train, y_train, **fit_params)
529
530 except Exception as e:
~\.conda\envs\PierEnv\lib\site-packages\keras\wrappers\scikit_learn.py in fit(self, x, y, **kwargs)
139 **self.filter_sk_params(self.build_fn.__call__))
140 else:
--> 141 self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
142
143 loss_name = self.model.loss
c:\Users\ER180124\Code\LoadForecasting\Methods\LSTM_1HL.py in create_model(optimizer, hl1_nodes, input_shape)
19 # LSTM layers followed by other LSTM layer must have the parameter "return_sequences" set at True
20 model = Sequential()
---> 21 model.add(LSTM(units = hl1_nodes , input_shape=input_shape, return_sequences=False))
22 model.add(Dense(1, activation="linear")) # output layer
23 model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])
~\.conda\envs\PierEnv\lib\site-packages\keras\legacy\interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name + '` call to the ' +
90 'Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
~\.conda\envs\PierEnv\lib\site-packages\keras\layers\recurrent.py in __init__(self, units, activation, recurrent_activation, use_bias, kernel_initializer, recurrent_initializer, bias_initializer, unit_forget_bias, kernel_regularizer, recurrent_regularizer, bias_regularizer, activity_regularizer, kernel_constraint, recurrent_constraint, bias_constraint, dropout, recurrent_dropout, implementation, return_sequences, return_state, go_backwards, stateful, unroll, **kwargs)
2183 stateful=stateful,
2184 unroll=unroll,
-> 2185 **kwargs)
2186 self.activity_regularizer = regularizers.get(activity_regularizer)
2187
~\.conda\envs\PierEnv\lib\site-packages\keras\layers\recurrent.py in __init__(self, cell, return_sequences, return_state, go_backwards, stateful, unroll, **kwargs)
406 '(tuple of integers, '
407 'one integer per RNN state).')
--> 408 super(RNN, self).__init__(**kwargs)
409 self.cell = cell
410 self.return_sequences = return_sequences
~\.conda\envs\PierEnv\lib\site-packages\keras\engine\base_layer.py in __init__(self, **kwargs)
145 batch_size = None
146 batch_input_shape = (
--> 147 batch_size,) + tuple(kwargs['input_shape'])
148 self.batch_input_shape = batch_input_shape
149
TypeError: 'int' object is not iterable
I do not understand why in the last part of the error message i get: "batch_size = None" while i define a batch size that is a tuple.
Well, I think I got your problem.
When you are doing CV Search, a param grid is generated from your param dictionary using most probably a cross product of possible configurations. Your param dictionary has input_shape of (time_steps, inputs_numb) which is a sequence of two integers actually. So, your input shape parameter is either time_steps or inputs_numb. Which then becomes (None,) + (times_steps) or (None,) + (inputs_numb) in the final line of the stack trace. This is a tuple + int operation so it is not valid. Instead, you want your configuration space to have only one possible input_shape.
What you should do is to convert this line
input_shape=(time_steps, inputs_numb)
to this:
input_shape=[(time_steps, inputs_numb)]

Resources