I am trying to implement a feature extractor LSTM network. First, I have implemented the input LSTM layer with different input and output shapes. The input was 18 which is the features(column) numbers of data tensor and output was 256 nodes which is the hidden layer nodes used in next layers. But, the shape was incompatible in the first layer. So, I changed the first layer to have both 18 for input and output nodes, the problem did not solved.
I also checked with squeezed and unsqueezed (unsqueeze(0)) tensors and both were incompatible with the mentioned input layers.
How should I set parameters of input LSTM layer?
The main architecture of my network was with 18 input nodes in input and 256 output in the first layer:
FeatureExtractorNetworkLSTM(
(fenet): ModuleList(
(0): LSTM(18, 256)
(1): Dropout(p=0.3, inplace=False)
(2): LSTM(256, 256)
(3): Dropout(p=0.3, inplace=False)
(4): LSTM(256, 256)
(5): Dropout(p=0.3, inplace=False)
(6): LSTM(256, 256)
(7): Dropout(p=0.3, inplace=False)
(8): LSTM(256, 256)
(9): Dropout(p=0.3, inplace=False)
(10): LSTM(256, 256)
)
)
And then changed the architecture to below, which has 18 nodes both in input and output:
FeatureExtractorNetworkLSTM(
(fenet): ModuleList(
(0): LSTM(18, 18)
(1): Dropout(p=0.3, inplace=False)
(2): LSTM(256, 256)
(3): Dropout(p=0.3, inplace=False)
(4): LSTM(256, 256)
(5): Dropout(p=0.3, inplace=False)
(6): LSTM(256, 256)
(7): Dropout(p=0.3, inplace=False)
(8): LSTM(256, 256)
(9): Dropout(p=0.3, inplace=False)
(10): LSTM(256, 256)
)
)
but again this error is shown:
RuntimeError : input.size(-1) must be equal to input_size. Expected 18, got 1
Note that using batch is not necessity.
The code of network:
class FeatureExtractorNetworkLSTM(nn.Module):
def __init__(self,
input_size=256, hidden_size=256, output_size=64,
alpha=0.001,
n_layers=3, dropout_prob=0,
optimizer='Adam', chkpt_dir='runs/ppo'):
super(FeatureExtractorNetworkLSTM, self).__init__()
self.checkpoint_file = os.path.join(chkpt_dir, 'fe_LSTM_ppo')
self.fenet = nn.ModuleList()
for n in range(n_layers):
if n == 0:
self.fenet.append(nn.LSTM(input_size, input_size, 1))
#input_size = input_size if n == 0 else hidden_size
elif n == 1:
self.fenet.append(nn.LSTM(input_size, hidden_size, 1))
if dropout_prob > 0:
self.fenet.append(nn.Dropout(dropout_prob))
else:
self.fenet.append(nn.LSTM(hidden_size, hidden_size, 1))
Sample of input tensor:
tensor([[ 1.0658e+00, 1.0675e+00, 1.0637e+00, 1.0652e+00, 1.4717e+04,
6.2318e+01, 6.0329e+01, 5.8142e+01, 1.0639e+00, 1.0592e+00,
1.0549e+00, 6.2538e+01, 2.6001e-03, 2.6311e-04, 2.3370e-03,
1.0681e+00, 1.0558e+00, 1.0619e+00],
[ 1.0652e+00, 1.0667e+00, 1.0647e+00, 1.0665e+00, 1.1889e+04,
6.5088e+01, 6.2078e+01, 5.9319e+01, 1.0643e+00, 1.0595e+00,
1.0550e+00, 6.3402e+01, 2.6151e-03, 2.2250e-04, 2.3926e-03,
1.0684e+00, 1.0564e+00, 1.0624e+00],
[ 1.0665e+00, 1.0668e+00, 1.0645e+00, 1.0648e+00, 7.0910e+03,
5.8982e+01, 5.8555e+01, 5.7163e+01, 1.0644e+00, 1.0597e+00,
1.0551e+00, 5.8190e+01, 2.4615e-03, 5.5073e-05, 2.4064e-03,
1.0685e+00, 1.0569e+00, 1.0627e+00],
[ 1.0648e+00, 1.0651e+00, 1.0630e+00, 1.0641e+00, 7.8530e+03,
5.6691e+01, 5.7200e+01, 5.6324e+01, 1.0643e+00, 1.0599e+00,
1.0552e+00, 5.7425e+01, 2.2588e-03, -1.1809e-04, 2.3769e-03,
1.0685e+00, 1.0576e+00, 1.0630e+00],
[ 1.0641e+00, 1.0642e+00, 1.0621e+00, 1.0626e+00, 6.8300e+03,
5.1931e+01, 5.4325e+01, 5.4523e+01, 1.0641e+00, 1.0600e+00,
1.0553e+00, 5.3252e+01, 1.9554e-03, -3.3719e-04, 2.2926e-03,
1.0684e+00, 1.0580e+00, 1.0632e+00],
[ 1.0626e+00, 1.0628e+00, 1.0610e+00, 1.0623e+00, 6.1390e+03,
5.0850e+01, 5.3664e+01, 5.4107e+01, 1.0638e+00, 1.0601e+00,
1.0553e+00, 4.8368e+01, 1.6675e-03, -5.0007e-04, 2.1676e-03,
1.0681e+00, 1.0587e+00, 1.0634e+00]], device='cuda:0')
and the current input tensor shape is:
torch.Size([6, 18])
Complete traceback
RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_4592/1425274335.py in <module>
7 for t in range(len(env.data)): # while not env.done
8 # Select and perform an action
----> 9 action, probs, value = agent.choose_action(state)
10 reward, done, _ = env.step(action)
11
c:\analytics_ai\analytics_ai\trading_bots\fe_ppo_agent.py in choose_action(self, state)
146 self.actor.device)
147
--> 148 fe_states = self.fe_net(state)
149
150 dist = self.actor(fe_states)
C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
c:\analytics_ai\analytics_ai\trading_bots\models.py in forward(self, state)
418 #dist = self.actor(state)
419 for i, l in enumerate(self.fenet):
--> 420 f_state = l(state)
421 #dist = torch.distributions.categorical.Categorical(dist)
422
C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\nn\modules\rnn.py in forward(self, input, hx)
687 hx = self.permute_hidden(hx, sorted_indices)
688
--> 689 self.check_forward_args(input, hx, batch_sizes)
690 if batch_sizes is None:
691 result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\nn\modules\rnn.py in check_forward_args(self, input, hidden, batch_sizes)
630 batch_sizes: Optional[Tensor],
631 ):
--> 632 self.check_input(input, batch_sizes)
633 self.check_hidden_size(hidden[0], self.get_expected_hidden_size(input, batch_sizes),
634 'Expected hidden[0] size {}, got {}')
C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\nn\modules\rnn.py in check_input(self, input, batch_sizes)
203 expected_input_dim, input.dim()))
204 if self.input_size != input.size(-1):
--> 205 raise RuntimeError(
206 'input.size(-1) must be equal to input_size. Expected {}, got {}'.format(
207 self.input_size, input.size(-1)))
RuntimeError: input.size(-1) must be equal to input_size. Expected 18, got 1
I also checked it with torch.unsqueeze(0) which converts the shape to:
torch.Size([1, 6, 18])
but did not worked.
Related
I have a trained LSTM-AE, of which the architecture is as follows:
In brief, I have an LSTM-AE of depth 3, the number of cells on the LSTM layers on the encoder side are [120, 80, 50] (and symmetric for the decoder). I built the model using the code shown on this page. For information, because I want to train the LSTM-AT directly on variable-length time series, so I didn't specify the timestamps in the input layer, which means the model is trained on batches of size 1 (one time series per batch).
I can extract the encoder just fine, but I cannot do the same for the decoder :-(... My goal is to check, given a vector of 50 features (which are extracted by the encoder), whether the decoder can reconstruct the input series.
Here's my attempt so far:
# load the full autoencoder
model = load_model(path_to_model)
# reconstruct the decoder
in_layer = Input(shape=(None, 50))
time_dist = model.layers[-1]
dec_1 = model.layers[-2]
dec_2 = model.layers[-3]
dec_3 = model.layers[-4]
rep_vec = model.layers[-5]
out_layer = time_dist(dec_1(dec_2(dec_3(rep_vec(in_layer)))))
decoder = Model(in_layer, out_layer, name='decoder')
res = decoder(input_feature) # input_feature has shape (50,)
I obtained this error:
InvalidArgumentError: slice index 1 of dimension 0 out of bounds. [Op:StridedSlice] name: decoder/repeat/strided_slice/
If you are interested in the full error log...
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
Input In [86], in <module>
13 out_layer = time_dist(dec_1(dec_2(dec_3(rep_vec(in_layer)))))
14 decoder = Model(in_layer, out_layer, name='decoder')
---> 15 res = decoder(input_feature)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:1030, in Layer.__call__(self, *args, **kwargs)
1026 inputs = self._maybe_cast_inputs(inputs, input_list)
1028 with autocast_variable.enable_auto_cast_variables(
1029 self._compute_dtype_object):
-> 1030 outputs = call_fn(inputs, *args, **kwargs)
1032 if self._activity_regularizer:
1033 self._handle_activity_regularization(inputs, outputs)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py:420, in Functional.call(self, inputs, training, mask)
401 #doc_controls.do_not_doc_inheritable
402 def call(self, inputs, training=None, mask=None):
403 """Calls the model on new inputs.
404
405 In this case `call` just reapplies
(...)
418 a list of tensors if there are more than one outputs.
419 """
--> 420 return self._run_internal_graph(
421 inputs, training=training, mask=mask)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py:556, in Functional._run_internal_graph(self, inputs, training, mask)
553 continue # Node is not computable, try skipping.
555 args, kwargs = node.map_arguments(tensor_dict)
--> 556 outputs = node.layer(*args, **kwargs)
558 # Update tensor_dict.
559 for x_id, y in zip(node.flat_output_ids, nest.flatten(outputs)):
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:1030, in Layer.__call__(self, *args, **kwargs)
1026 inputs = self._maybe_cast_inputs(inputs, input_list)
1028 with autocast_variable.enable_auto_cast_variables(
1029 self._compute_dtype_object):
-> 1030 outputs = call_fn(inputs, *args, **kwargs)
1032 if self._activity_regularizer:
1033 self._handle_activity_regularization(inputs, outputs)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/layers/core.py:919, in Lambda.call(self, inputs, mask, training)
915 return var
917 with backprop.GradientTape(watch_accessed_variables=True) as tape,\
918 variable_scope.variable_creator_scope(_variable_creator):
--> 919 result = self.function(inputs, **kwargs)
920 self._check_variables(created_variables, tape.watched_variables())
921 return result
File D:/PhD/Code/feature_learning/train_models/train_lstmae.py:30, in repeat_vector(args)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206, in add_dispatch_support.<locals>.wrapper(*args, **kwargs)
204 """Call target, and fall back on dispatchers if there is a TypeError."""
205 try:
--> 206 return target(*args, **kwargs)
207 except (TypeError, ValueError):
208 # Note: convert_to_eager_tensor currently raises a ValueError, not a
209 # TypeError, when given unexpected types. So we need to catch both.
210 result = dispatch(wrapper, args, kwargs)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:1040, in _slice_helper(tensor, slice_spec, var)
1038 var_empty = constant([], dtype=dtypes.int32)
1039 packed_begin = packed_end = packed_strides = var_empty
-> 1040 return strided_slice(
1041 tensor,
1042 packed_begin,
1043 packed_end,
1044 packed_strides,
1045 begin_mask=begin_mask,
1046 end_mask=end_mask,
1047 shrink_axis_mask=shrink_axis_mask,
1048 new_axis_mask=new_axis_mask,
1049 ellipsis_mask=ellipsis_mask,
1050 var=var,
1051 name=name)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206, in add_dispatch_support.<locals>.wrapper(*args, **kwargs)
204 """Call target, and fall back on dispatchers if there is a TypeError."""
205 try:
--> 206 return target(*args, **kwargs)
207 except (TypeError, ValueError):
208 # Note: convert_to_eager_tensor currently raises a ValueError, not a
209 # TypeError, when given unexpected types. So we need to catch both.
210 result = dispatch(wrapper, args, kwargs)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:1213, in strided_slice(input_, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask, var, name)
1210 if strides is None:
1211 strides = ones_like(begin)
-> 1213 op = gen_array_ops.strided_slice(
1214 input=input_,
1215 begin=begin,
1216 end=end,
1217 strides=strides,
1218 name=name,
1219 begin_mask=begin_mask,
1220 end_mask=end_mask,
1221 ellipsis_mask=ellipsis_mask,
1222 new_axis_mask=new_axis_mask,
1223 shrink_axis_mask=shrink_axis_mask)
1225 parent_name = name
1227 if var is not None:
File ~/venv/lib/python3.8/site-packages/tensorflow/python/ops/gen_array_ops.py:10505, in strided_slice(input, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask, name)
10503 return _result
10504 except _core._NotOkStatusException as e:
> 10505 _ops.raise_from_not_ok_status(e, name)
10506 except _core._FallbackException:
10507 pass
File ~/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:6897, in raise_from_not_ok_status(e, name)
6895 message = e.message + (" name: " + name if name is not None else "")
6896 # pylint: disable=protected-access
-> 6897 six.raise_from(core._status_to_exception(e.code, message), None)
File <string>:3, in raise_from(value, from_value)
InvalidArgumentError: slice index 1 of dimension 0 out of bounds. [Op:StridedSlice] name: decoder/repeat/strided_slice/
I appreciate very much any advice you would give me!
Edit
Here is the code I used to build the mode:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.initializers import GlorotUniform
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.backend import shape
def repeat_vector(args):
"""Builds the repeat vector layer dynamically by the size of the input series"""
layer_to_repeat = args[0]
sequence_layer = args[1]
return RepeatVector(shape(sequence_layer)[1])(layer_to_repeat)
n_atts = 3 # time series of 3 measurements
n_units = [120, 80, 50] # encoder - 1st layer: 120, 2nd layer: 80, 3rd layer: 50 (and symmetric for decoder)
n_layers = len(n_units)
init = GlorotUniform(seed=420)
reg = None
optimizer = Adam(learning_rate=0.0001)
activ = 'tanh'
loss_metric = 'mse'
inputs = Input(shape=(None, n_atts), name='input_layer')
# the encoder
encoded = LSTM(n_units[0], name='encoder_1', return_sequences=(n_layers != 1), kernel_initializer=init,
kernel_regularizer=reg, activation=activ)(inputs)
for i in range(1, n_layers):
if i != n_layers - 1:
encoded = LSTM(n_units[i], name='encoder_{}'.format(i + 1), return_sequences=(n_layers != 1),
kernel_initializer=init, kernel_regularizer=reg, activation=activ)(encoded)
else:
encoded = LSTM(n_units[i], name='encoder_{}'.format(i + 1), return_sequences=False,
kernel_initializer=init, kernel_regularizer=reg, activation=activ)(encoded)
# repeat the vector (plug the encoder to the decoder)
repeated = Lambda(repeat_vector, output_shape=(None, n_units[-1]), name='repeat')([encoded, inputs])
# the decoder
decoded = LSTM(n_units[n_layers - 1], return_sequences=True, name='decoder_1',
kernel_initializer=init, kernel_regularizer=reg, activation=activ)(repeated) # first layer
for i in range(1, n_layers):
decoded = LSTM(n_units[n_layers - 1 - i], return_sequences=True, name='decoder_{}'.format(i + 1),
kernel_initializer=init, kernel_regularizer=reg, activation=activ)(decoded)
# last layer
tdist = TimeDistributed(Dense(n_atts))(decoded)
# compile the model
model = Model(inputs, tdist, name='lstm-ae')
model.compile(optimizer=optimizer, loss=loss_metric)
For information, I use tensorflow 2.5.
Because the number of units is read from a config file, I wrote the code this way to add the layers programmatically.
I'm trying to train a multilayer perceptron based on the Iris dataset using TensorFlow in Pycharm with Jupyter Notebook. Every time I run my code it fails on the model.fit() function and gives the following error.
ValueError: Shapes (None, 1) and (None, 3) are incompatible.
I've tried playing around with different values for the hyperparameters but there's obviously something I'm not getting. Any help/pointers that anyone could provide would be much appreciated.
Here's my data setup and preprocessing:
import pandas as pd
import numpy as np
# read iris data into pandas dataframe
iris = pd.read_csv("data/IRIS.csv", header=0)
# apply label to index column
iris.index.name = "id"
# create copy of iris dataframe in which to store normalised values and keep original dataframe for comparison later on
iris_unnormalized = iris
iris_normalized = iris.copy()
# isolate columns with numerical values
iris_num = iris.select_dtypes(include=[np.number])
# find max value in each column
col_maxes = iris_num.max()
# find overall max value among all columns
iris_num_max = col_maxes.max()
# divide all numerical values by overall max value in order to normalize data to a value between 0 and 1
iris_num_norm = iris_num / iris_num_max
# reassign normalised values back to their corresponding columns
iris_normalized[iris_num_norm.columns] = iris_num_norm
# specify seed for reproducibility
np.random.seed(1671)
training = iris_normalized.sample(frac = 0.8)
test = iris_normalized.drop(training.index)
# initialize the training input and output list
# same for testing set
X_train = []
Y_train = []
X_test = []
Y_test = []
# loop through the dataframe and separate inputs and outputs for training and testing
for index, row in training.iterrows():
X_train.append([row['sepal length cm'], row['sepal width cm'], row['petal length cm'], row['petal width cm']])
Y_train.append([row['species']])
for index, row in test.iterrows():
X_test.append([row['sepal length cm'], row['sepal width cm'], row['petal length cm'], row['petal width cm']])
Y_test.append([row['species']])
X_train = np.array(X_train).astype('float32')
Y_train = np.array(Y_train)
X_test = np.array(X_test).astype('float32')
Y_test = np.array(Y_test)
print(X_train.shape, "training samples") # Output: (120, 4) training samples
print(X_test.shape, "test samples") # Output: (30, 4) test samples
Here's where I try to create the neural network:
import tensorflow as tf
from tensorflow import keras
NB_CLASSES = 3 # number of iris varieties
N_HIDDEN = 128
BATCH_SIZE = 10
VERBOSE = 1
VALIDATION_SPLIT = 0.2 # how much of training set to hold for validation
EPOCHS = 200
model = tf.keras.models.Sequential(
[
keras.layers.Dense(N_HIDDEN, input_shape=(10,4,), batch_size=BATCH_SIZE, name="dense_layer1", activation="relu"),
keras.layers.Dense(N_HIDDEN, input_shape=(4,), batch_size=BATCH_SIZE, name="dense_layer2", activation="relu"),
keras.layers.Dense(NB_CLASSES, input_shape=(4,), batch_size=BATCH_SIZE, name="dense_layer3", activation="softmax"),
]
)
model.summary()
################### model summary output: #####################
Layer (type) Output Shape Param #
=================================================================
dense_layer1 (Dense) (10, 10, 128) 640
_________________________________________________________________
dense_layer2 (Dense) (10, 10, 128) 16512
_________________________________________________________________
dense_layer3 (Dense) (10, 10, 3) 387
=================================================================
Total params: 17,539
Trainable params: 17,539
Non-trainable params: 0
# compiling the model
model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])
#train the model
model.fit(X_train, Y_train,
batch_size=BATCH_SIZE,
epochs = EPOCHS,
verbose = VERBOSE,
validation_split = VALIDATION_SPLIT)
The data setup and normalization work fine but when I run the code to create the neural network I get the below error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_8888/2571387568.py in <module>
38
39 #train the model
---> 40 model.fit(X_train, Y_train,
41 batch_size=BATCH_SIZE,
42 epochs = EPOCHS,
\venv\lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1182 _r=1):
1183 callbacks.on_train_batch_begin(step)
-> 1184 tmp_logs = self.train_function(iterator)
1185 if data_handler.should_sync:
1186 context.async_wait()
\venv\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
883
884 with OptionalXlaContext(self._jit_compile):
--> 885 result = self._call(*args, **kwds)
886
887 new_tracing_count = self.experimental_get_tracing_count()
\venv\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
931 # This is the first call of __call__, so we have to initialize.
932 initializers = []
--> 933 self._initialize(args, kwds, add_initializers_to=initializers)
934 finally:
935 # At this point we know that the initialization is complete (or less
\venv\lib\site-packages\tensorflow\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
757 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
758 self._concrete_stateful_fn = (
--> 759 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
760 *args, **kwds))
761
\venv\lib\site-packages\tensorflow\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
3064 args, kwargs = None, None
3065 with self._lock:
-> 3066 graph_function, _ = self._maybe_define_function(args, kwargs)
3067 return graph_function
3068
\venv\lib\site-packages\tensorflow\python\eager\function.py in _maybe_define_function(self, args, kwargs)
3461
3462 self._function_cache.missed.add(call_context_key)
-> 3463 graph_function = self._create_graph_function(args, kwargs)
3464 self._function_cache.primary[cache_key] = graph_function
3465
\venv\lib\site-packages\tensorflow\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3296 arg_names = base_arg_names + missing_arg_names
3297 graph_function = ConcreteFunction(
-> 3298 func_graph_module.func_graph_from_py_func(
3299 self._name,
3300 self._python_function,
\venv\lib\site-packages\tensorflow\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses)
1005 _, original_func = tf_decorator.unwrap(python_func)
1006
-> 1007 func_outputs = python_func(*func_args, **func_kwargs)
1008
1009 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
\venv\lib\site-packages\tensorflow\python\eager\def_function.py in wrapped_fn(*args, **kwds)
666 # the function a weak reference to itself to avoid a reference cycle.
667 with OptionalXlaContext(compile_with_xla):
--> 668 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
669 return out
670
\venv\lib\site-packages\tensorflow\python\framework\func_graph.py in wrapper(*args, **kwargs)
992 except Exception as e: # pylint:disable=broad-except
993 if hasattr(e, "ag_error_metadata"):
--> 994 raise e.ag_error_metadata.to_exception(e)
995 else:
996 raise
ValueError: in user code:
\venv\lib\site-packages\keras\engine\training.py:853 train_function *
return step_function(self, iterator)
\venv\lib\site-packages\keras\engine\training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
\venv\lib\site-packages\keras\engine\training.py:835 run_step **
outputs = model.train_step(data)
\venv\lib\site-packages\keras\engine\training.py:788 train_step
loss = self.compiled_loss(
\venv\lib\site-packages\keras\engine\compile_utils.py:201 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
\venv\lib\site-packages\keras\losses.py:141 __call__
losses = call_fn(y_true, y_pred)
\venv\lib\site-packages\keras\losses.py:245 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
\venv\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
return target(*args, **kwargs)
\venv\lib\site-packages\keras\losses.py:1665 categorical_crossentropy
return backend.categorical_crossentropy(
\venv\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
return target(*args, **kwargs)
\venv\lib\site-packages\keras\backend.py:4839 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
\venv\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1161 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 1) and (None, 3) are incompatible
Code to build a model, The issue I am having is when I attempt to load model and implement to test dataset, I get the error:
learning_rate=0.001
epochs = 10
decay_rate = learning_rate / epochs
def scheduler(epochs, lr):
if epochs == 15:
lr = 0.001
return lr
else:
lr = lr * tensorflow.math.exp(-0.1)
return lr
callback = keras.callbacks.LearningRateScheduler(scheduler)
wv_model = Sequential()
# Add embedding layer
# No of output dimenstions is 100 as we embedded with Word2Vec 100d
Embed_Layer = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=(MAX_SEQUENCE_LENGTH,), trainable=True)
# define Inputs
review_input = Input(shape=(MAX_SEQUENCE_LENGTH,),dtype= 'int32',name = 'review_input')
review_embedding = Embed_Layer(review_input)
Flatten_Layer = Flatten()
review_flatten = Flatten_Layer(review_embedding)
output_size = 2
dense1 = Dense(100,activation='relu')(review_flatten)
dense2 = Dense(32,activation='relu')(dense1)
predict = Dense(5, activation='softmax')(dense2)
wv_model = Model(inputs=[review_input],outputs=[predict])
# wv_model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['acc'])
opt = keras.optimizers.SGD(lr = 0.01, momentum=0.8, decay=0.0)
wv_model.compile(loss='mean_squared_error', optimizer=opt, metrics=['mean_squared_error'])
tensorboard = TensorBoard(
log_dir="logs",
histogram_freq=1,
write_graph=True,
write_images=False,
update_freq="epoch",
profile_batch=2,
embeddings_freq=0,
embeddings_metadata=None)
keras_callbacks = [tensorboard]
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)
stp = keras.callbacks.EarlyStopping(patience=4)
callbacks_list = [checkpoint,stp, tensorboard,callback]
wv_model.fit(X_train, y_train, validation_data=(X_test, y_test),
epochs=epochs, batch_size=256,
verbose=1, callbacks=callbacks_list)
eval = wv_model.evaluate(X_test, y_test)[1]
print(eval)
wv_model.load_weights('./models/best_model.h5')
print(wv_model.summary())
Out:
Layer (type) Output Shape Param #
=================================================================
review_input (InputLayer) [(None, 100)] 0
_________________________________________________________________
embedding_8 (Embedding) (None, 100, 100) 22228800
_________________________________________________________________
flatten_8 (Flatten) (None, 10000) 0
_________________________________________________________________
dense_24 (Dense) (None, 100) 1000100
_________________________________________________________________
dense_25 (Dense) (None, 32) 3232
_________________________________________________________________
dense_26 (Dense) (None, 5) 165
=================================================================
Total params: 23,232,297
Trainable params: 23,232,297
Non-trainable params: 0
_________________________________________________________________
None
To validate the dataset:
predictions = load_model('./models/best_model.h5').predict(X12_test)
print("y_test", y_test)
print("predictions", predictions)
print("validation set RMSE ", rmse2(predictions, y_test))
y_test = y_test.overall.values
Out:
WARNING:tensorflow:Model was constructed with shape (None, 100) for input Tensor("review_input_13:0", shape=(None, 100), dtype=int32), but it was called on an input with incompatible shape (None, 6000).
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-80-82850281ff1c> in <module>
----> 1 predictions_o = load_model('./models/best_model.h5').predict(X12_test)
2
3 print("y1_test_truth", y1_test)
4 print("predictions_o", predictions_o)
5 print("validation set RMSE ", rmse2(predictions_o, y1_test))
~/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
128 raise ValueError('{} is not supported in multi-worker mode.'.format(
129 method.__name__))
--> 130 return method(self, *args, **kwargs)
131
132 return tf_decorator.make_decorator(
~/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py in predict(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)
1597 for step in data_handler.steps():
1598 callbacks.on_predict_batch_begin(step)
-> 1599 tmp_batch_outputs = predict_function(iterator)
1600 if data_handler.should_sync:
1601 context.async_wait()
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
821 # This is the first call of __call__, so we have to initialize.
822 initializers = []
--> 823 self._initialize(args, kwds, add_initializers_to=initializers)
824 finally:
825 # At this point we know that the initialization is complete (or less
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
694 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
695 self._concrete_stateful_fn = (
--> 696 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
697 *args, **kwds))
698
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2853 args, kwargs = None, None
2854 with self._lock:
-> 2855 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2856 return graph_function
2857
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3211
3212 self._function_cache.missed.add(call_context_key)
-> 3213 graph_function = self._create_graph_function(args, kwargs)
3214 self._function_cache.primary[cache_key] = graph_function
3215 return graph_function, args, kwargs
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3063 arg_names = base_arg_names + missing_arg_names
3064 graph_function = ConcreteFunction(
-> 3065 func_graph_module.func_graph_from_py_func(
3066 self._name,
3067 self._python_function,
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
984 _, original_func = tf_decorator.unwrap(python_func)
985
--> 986 func_outputs = python_func(*func_args, **func_kwargs)
987
988 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
598 # __wrapped__ allows AutoGraph to swap in a converted function. We give
599 # the function a weak reference to itself to avoid a reference cycle.
--> 600 return weak_wrapped_fn().__wrapped__(*args, **kwds)
601 weak_wrapped_fn = weakref.ref(wrapped_fn)
602
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
971 except Exception as e: # pylint:disable=broad-except
972 if hasattr(e, "ag_error_metadata"):
--> 973 raise e.ag_error_metadata.to_exception(e)
974 else:
975 raise
ValueError: in user code:
/home/x/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1462 predict_function *
return step_function(self, iterator)
/home/x/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1452 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/home/x/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1211 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/home/x/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/home/x/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2945 _call_for_each_replica
return fn(*args, **kwargs)
/home/x/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1445 run_step **
outputs = model.predict_step(data)
/home/x/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1418 predict_step
return self(x, training=False)
/home/x/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:985 __call__
outputs = call_fn(inputs, *args, **kwargs)
/home/x/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py:385 call
return self._run_internal_graph(
/home/x/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py:508 _run_internal_graph
outputs = node.layer(*args, **kwargs)
/home/x/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:975 __call__
input_spec.assert_input_compatibility(self.input_spec, inputs,
/home/x/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py:212 assert_input_compatibility
raise ValueError(
ValueError: Input 0 of layer dense_24 is incompatible with the layer: expected axis -1 of input shape to have value 10000 but received input with shape [None, 600000]
I'm trying to work out where and what I need to change to ensure the dimensions are working correctly, however I haven't managed to work out what exactly I need to change. Any help would be greatly appreciated.
Updates:
shape of data:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state = 40)
[nSamp, inpShape] = X_train.shape
print("X train shape ", X_train.shape)
print("X test shape ", X_test.shape)
print("y train shape ",y_train.shape)
print("y test shape ",y_test.shape)
print(nSamp, inpShape)
Out:
X train shape (160000, 100)
X test shape (40000, 100)
y train shape (160000, 5)
y test shape (40000, 5)
160000 100
From the warning in the first line it seems that X12_test is not of correct shape, according to the warning you have your model is built to take an input of shape (None, 100) while you are calling using an input of shape (None, 6000)
I am trying to build a simple Model using the IAM Handwritten dataset from Kaggle and some sample code from a textbook I'm using, but I keep getting an error when I try to fit the model.
The error says ValueError: Layer sequential_2 expects 1 inputs, but it received 2 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, None, None, None) dtype=float32>, <tf.Tensor 'IteratorGetNext:1' shape=(None, None) dtype=float32>]
full source code :
from __future__ import division
import numpy as np
import os
import glob
import tensorflow as tf
from random import *
from PIL import Image
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.image as mpimg
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Lambda, ELU, Activation, BatchNormalization
from keras.layers.convolutional import Convolution2D, Cropping2D, ZeroPadding2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD, Adam, RMSprop
d = {}
from subprocess import check_output
with open('./forms_for_parsing.txt') as f:
for line in f:
key = line.split(' ')[0]
writer = line.split(' ')[1]
d[key] = writer
print(len(d.keys()))
tmp = []
target_list = []
path_to_files = os.path.join('./input/data_subset/data_subset', '*')
for filename in sorted(glob.glob(path_to_files)):
# print(filename)
tmp.append(filename)
image_name = filename.split('/')[-1]
file, ext = os.path.splitext(image_name)
parts = file.split('-')
p = parts[0].split('\\')
form = p[1] + '-' + parts[1]
for key in d:
if key == form:
target_list.append(str(d[form]))
# print(d)
# print(parts[0])
# p = parts[0].split('\\')
# print(p[1])
# print(form)
img_files = np.asarray(tmp)
img_targets = np.asarray(target_list)
print(img_files.shape)
print(img_targets.shape)
for filename in img_files[:20]:
img=mpimg.imread(filename)
plt.figure(figsize=(10,10))
plt.imshow(img, cmap ='gray')
encoder = LabelEncoder()
encoder.fit(img_targets)
encoded_Y = encoder.transform(img_targets)
print(img_files[:5], img_targets[:5], encoded_Y[:5])
train_files, rem_files, train_targets, rem_targets = train_test_split(
img_files, encoded_Y, train_size=0.66, random_state=52, shuffle= True)
validation_files, test_files, validation_targets, test_targets = train_test_split(
rem_files, rem_targets, train_size=0.5, random_state=22, shuffle=True)
print(train_files.shape, validation_files.shape, test_files.shape)
print(train_targets.shape, validation_targets.shape, test_targets.shape)
batch_size = 16 # 8
num_classes = 50
# Start with train generator shared in the class and add image augmentations
def generate_data(samples, target_files, batch_size=batch_size, factor = 0.1 ):
num_samples = len(samples)
from sklearn.utils import shuffle
while 1: # Loop forever so the generator never terminates
for offset in range(0, num_samples, batch_size):
batch_samples = samples[offset:offset+batch_size]
batch_targets = target_files[offset:offset+batch_size]
images = []
targets = []
for i in range(len(batch_samples)):
batch_sample = batch_samples[i]
batch_target = batch_targets[i]
im = Image.open(batch_sample)
cur_width = im.size[0]
cur_height = im.size[1]
# print(cur_width, cur_height)
height_fac = 113 / cur_height
new_width = int(cur_width * height_fac)
size = new_width, 113
imresize = im.resize((size), Image.ANTIALIAS) # Resize so height = 113 while keeping aspect ratio
now_width = imresize.size[0]
now_height = imresize.size[1]
# Generate crops of size 113x113 from this resized image and keep random 10% of crops
avail_x_points = list(range(0, now_width - 113 ))# total x start points are from 0 to width -113
# Pick random x%
pick_num = int(len(avail_x_points)*factor)
# Now pick
random_startx = sample(avail_x_points, pick_num)
for start in random_startx:
imcrop = imresize.crop((start, 0, start+113, 113))
images.append(np.asarray(imcrop))
targets.append(batch_target)
# trim image to only see section with road
X_train = np.array(images)
y_train = np.array(targets)
#reshape X_train for feeding in later
X_train = X_train.reshape(X_train.shape[0], 113, 113, 1)
#convert to float and normalize
X_train = X_train.astype('float32')
X_train /= 255
#One hot encode y
y_train = to_categorical(y_train, num_classes)
yield shuffle(X_train, y_train)
train_generator = generate_data(train_files, train_targets, batch_size=batch_size, factor = 0.3)
validation_generator = generate_data(validation_files, validation_targets, batch_size=batch_size, factor = 0.3)
test_generator = generate_data(test_files, test_targets, batch_size=batch_size, factor = 0.1)
def resize_image(image):
return tf.image.resize(image,[56,56])
# Function to resize image to 64x64
row, col, ch = 113, 113, 1
model = Sequential()
model.add(ZeroPadding2D((1, 1), input_shape=(row, col, ch)))
# Resise data within the neural network
model.add(Lambda(resize_image)) #resize images to allow for easy computation
# CNN model - Building the model suggested in paper
model.add(Convolution2D(filters= 32, kernel_size =(5,5), strides= (2,2), padding='same', name='conv1')) #96
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2), name='pool1'))
model.add(Convolution2D(filters= 64, kernel_size =(3,3), strides= (1,1), padding='same', name='conv2')) #256
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2), name='pool2'))
model.add(Convolution2D(filters= 128, kernel_size =(3,3), strides= (1,1), padding='same', name='conv3')) #256
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2), name='pool3'))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(512, name='dense1')) #1024
# model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(256, name='dense2')) #1024
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes,name='output'))
model.add(Activation('softmax')) #softmax since output is within 50 classes
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
model.summary()
nb_epoch = 8
samples_per_epoch = 3268
nb_val_samples = 842
# #save every model using Keras checkpoint
from keras.callbacks import ModelCheckpoint
#filepath="check-{epoch:02d}-{val_loss:.4f}.hdf5"
filepath="low_loss.hdf5"
checkpoint = ModelCheckpoint(filepath= filepath, verbose=1, save_best_only=False)
callbacks_list = [checkpoint]
# #Model fit generator
history_object = model.fit_generator(train_generator, steps_per_epoch = (samples_per_epoch/batch_size),
validation_data=validation_generator,
validation_steps=nb_val_samples, epochs=nb_epoch, verbose=1, callbacks=callbacks_list)
and this is error i got :
ValueError Traceback (most recent call last)
<ipython-input-79-99c01bc062d8> in <module>
12
13 # #Model fit generator
---> 14 history_object = model.fit_generator(train_generator, steps_per_epoch = (samples_per_epoch/batch_size),
15 validation_data=validation_generator,
16 validation_steps=nb_val_samples, epochs=nb_epoch, verbose=1, callbacks=callbacks_list)
~\anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py in new_func(*args, **kwargs)
322 'in a future version' if date is None else ('after %s' % date),
323 instructions)
--> 324 return func(*args, **kwargs)
325 return tf_decorator.make_decorator(
326 func, new_func, 'deprecated',
~\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1813 """
1814 _keras_api_gauge.get_cell('fit_generator').set(True)
-> 1815 return self.fit(
1816 generator,
1817 steps_per_epoch=steps_per_epoch,
~\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
~\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
~\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
~\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
812 # In this case we have not created variables on the first call. So we can
813 # run the first trace but we should fail if variables are created.
--> 814 results = self._stateful_fn(*args, **kwds)
815 if self._created_variables:
816 raise ValueError("Creating variables on a non-first call to a function"
~\anaconda3\lib\site-packages\tensorflow\python\eager\function.py in __call__(self, *args, **kwargs)
2826 """Calls a graph function specialized to the inputs."""
2827 with self._lock:
-> 2828 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
2829 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
2830
~\anaconda3\lib\site-packages\tensorflow\python\eager\function.py in _maybe_define_function(self, args, kwargs)
3208 and self.input_signature is None
3209 and call_context_key in self._function_cache.missed):
-> 3210 return self._define_function_with_shape_relaxation(args, kwargs)
3211
3212 self._function_cache.missed.add(call_context_key)
~\anaconda3\lib\site-packages\tensorflow\python\eager\function.py in _define_function_with_shape_relaxation(self, args, kwargs)
3139 expand_composites=True)
3140
-> 3141 graph_function = self._create_graph_function(
3142 args, kwargs, override_flat_arg_shapes=relaxed_arg_shapes)
3143 self._function_cache.arg_relaxed[rank_only_cache_key] = graph_function
~\anaconda3\lib\site-packages\tensorflow\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3063 arg_names = base_arg_names + missing_arg_names
3064 graph_function = ConcreteFunction(
-> 3065 func_graph_module.func_graph_from_py_func(
3066 self._name,
3067 self._python_function,
~\anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
984 _, original_func = tf_decorator.unwrap(python_func)
985
--> 986 func_outputs = python_func(*func_args, **func_kwargs)
987
988 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py in wrapped_fn(*args, **kwds)
598 # __wrapped__ allows AutoGraph to swap in a converted function. We give
599 # the function a weak reference to itself to avoid a reference cycle.
--> 600 return weak_wrapped_fn().__wrapped__(*args, **kwds)
601 weak_wrapped_fn = weakref.ref(wrapped_fn)
602
~\anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py in wrapper(*args, **kwargs)
971 except Exception as e: # pylint:disable=broad-except
972 if hasattr(e, "ag_error_metadata"):
--> 973 raise e.ag_error_metadata.to_exception(e)
974 else:
975 raise
ValueError: in user code:
C:\Users\subha\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function *
return step_function(self, iterator)
C:\Users\subha\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:796 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
C:\Users\subha\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
C:\Users\subha\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
C:\Users\subha\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
return fn(*args, **kwargs)
C:\Users\subha\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:789 run_step **
outputs = model.train_step(data)
C:\Users\subha\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:747 train_step
y_pred = self(x, training=True)
C:\Users\subha\anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:975 __call__
input_spec.assert_input_compatibility(self.input_spec, inputs,
C:\Users\subha\anaconda3\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:155 assert_input_compatibility
raise ValueError('Layer ' + layer_name + ' expects ' +
ValueError: Layer sequential_2 expects 1 inputs, but it received 2 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, None, None, None) dtype=float32>, <tf.Tensor 'IteratorGetNext:1' shape=(None, None) dtype=float32>]
i couldn't understand the error message so kindly somebody help me out!
thank u
I am trying to follow the "Fine-tune InceptionV3 on a new set of classes" sample code to freeze the first 172 layers and re-train the last layers on cats/dogs dataset. I keep getting an error which I have noted at the bottom. Please help. I am using Ubuntu 16.04, keras 1.2.1, theano 0.9.0beta1.dev, numpy 1.12.0 and python 3.5.
from PIL import Image
import os
import matplotlib.pyplot as plt
import numpy as np
data_root_dir = "/home/ubuntu/ML/data/dogscats/"
train_dir = os.path.join(data_root_dir,"sample", "train")
valid_dir = os.path.join(data_root_dir, "valid")
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=True)
# add a global spatial average pooling layer
x = base_model.output
#x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)
# this is the model we will train
model = Model(input=base_model.input, output=predictions)
for layer in model.layers[:172]:
layer.trainable = False
for layer in model.layers[172:]:
layer.trainable = True
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')
from sklearn.preprocessing import OneHotEncoder
def get_data(path, target_size=(299,299)):
batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size)
return np.concatenate([batches.next() for i in range(batches.nb_sample)])
def get_batches(dirname, gen=image.ImageDataGenerator(), shuffle=True, batch_size=2, class_mode='categorical',
target_size=(299,299)):
return gen.flow_from_directory(dirname, target_size=target_size,
class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
def onehot(x): return np.array(OneHotEncoder().fit_transform(x.reshape(-1,1)).todense())
# Use batch size of 1 since we're just doing preprocessing on the CPU
val_batches = get_batches(valid_dir, shuffle=False, batch_size=10)
train_batches = get_batches(train_dir, shuffle=False, batch_size=10)
val_classes = val_batches.classes
trn_classes = train_batches.classes
val_labels = onehot(val_classes)
trn_labels = onehot(trn_classes)
model.fit_generator(train_batches, samples_per_epoch=train_batches.n, nb_epoch=10,
validation_data=val_batches, nb_val_samples=val_batches.n)
The exception is: padding must be zero for average_exc_pad
Here is the full stack-trace:
ValueError Traceback (most recent call last)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
883 outputs =\
--> 884 self.fn() if output_subset is None else\
885 self.fn(output_subset=output_subset)
ValueError: padding must be zero for average_exc_pad
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-4-369d7760ec6e> in <module>()
34
35 model.fit_generator(train_batches, samples_per_epoch=train_batches.n, nb_epoch=10,
---> 36 validation_data=val_batches, nb_val_samples=val_batches.n)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/training.py in fit_generator(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, initial_epoch)
1551 outs = self.train_on_batch(x, y,
1552 sample_weight=sample_weight,
-> 1553 class_weight=class_weight)
1554
1555 if not isinstance(outs, list):
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight)
1314 ins = x + y + sample_weights
1315 self._make_train_function()
-> 1316 outputs = self.train_function(ins)
1317 if len(outputs) == 1:
1318 return outputs[0]
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/backend/theano_backend.py in __call__(self, inputs)
957 def __call__(self, inputs):
958 assert isinstance(inputs, (list, tuple))
--> 959 return self.function(*inputs)
960
961
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
896 node=self.fn.nodes[self.fn.position_of_error],
897 thunk=thunk,
--> 898 storage_map=getattr(self.fn, 'storage_map', None))
899 else:
900 # old-style linkers raise their own exceptions
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/gof/link.py in raise_with_op(node, thunk, exc_info, storage_map)
323 # extra long error message in that case.
324 pass
--> 325 reraise(exc_type, exc_value, exc_trace)
326
327
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/six.py in reraise(tp, value, tb)
683 value = tp()
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
687
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
882 try:
883 outputs =\
--> 884 self.fn() if output_subset is None else\
885 self.fn(output_subset=output_subset)
886 except Exception:
ValueError: padding must be zero for average_exc_pad
Apply node that caused the error: AveragePoolGrad{ignore_border=True, mode='average_exc_pad', ndim=2}(Join.0, IncSubtensor{InplaceInc;::, ::, :int64:, :int64:}.0, TensorConstant{(2,) of 3}, TensorConstant{(2,) of 1}, TensorConstant{(2,) of 1})
Toposort index: 5270
Inputs types: [TensorType(float32, 4D), TensorType(float32, 4D), TensorType(int64, vector), TensorType(int64, vector), TensorType(int64, vector)]
Inputs shapes: [(10, 2048, 8, 8), (10, 2048, 8, 8), (2,), (2,), (2,)]
Inputs strides: [(524288, 256, 32, 4), (524288, 256, 32, 4), (8,), (8,), (8,)]
Inputs values: ['not shown', 'not shown', array([3, 3]), array([1, 1]), array([1, 1])]
Outputs clients: [[Elemwise{add,no_inplace}(CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, AveragePoolGrad{ignore_border=True, mode='average_exc_pad', ndim=2}.0)]]
Fine-tuning in that situation possibly means using the convolutional layers as pre-trained feature extractors. So you don't really want the top layers (densely connected layers) of the Inception network.
Changing
base_model = InceptionV3(weights='imagenet', include_top=True)
to
base_model = InceptionV3(weights='imagenet', include_top=False)
should work.
Also, if you have 200 classes you should change
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)
to
predictions = Dense(200, activation='softmax')(x)
So your last layer will have the desired 200 elements.