Error raised in model.fit() if validation_data ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all() - keras

I'm trying to run a simple autoencoder model. I'm reading training data from a csv which consists of word embeddings. I have this code, but the error in the title is raised in model.fit() function and connected with my validation data. I tried many things however the error remained. I'm new in NLP and maybe my logic is totally wrong I don't know. So, I'd be appreciated if anybody can help. Here is my code:
def train_predict(df):
X_train, X_validation = train_test_split(df, test_size=0.3, random_state=42, shuffle=True)
X = X_train.iloc[:, :-1].to_numpy() #shape is (1880,220) in here
X = tf.expand_dims(X, axis=-1) #shape is (1880,220,1)
X_val = X_validation.iloc[:,:-1].to_numpy() #shape is (300,220)
X_val= tf.expand_dims(X_val, axis=-1) #shape is (300,220,1)
inputs, decoder_output, visualization = autoEncoder(X)
model = Model(inputs=inputs, outputs=decoder_output)
encoder_model = Model(inputs=inputs, outputs=visualization)
batch_size = 128
train_steps = len(X) // batch_size
val_steps = len(X_val) // batch_size
model.summary()
model.compile(optimizer='adam', metrics=['accuracy'], loss='mean_squared_error')
model.fit(X, steps_per_epoch=train_steps, validation_data=X_val, validation_steps=val_steps,epochs=100)
result = model.evaluate(X_val, steps=10)
Also the detail of my autoEncoder function code is as follows:
def autoEncoder(X_train):
inputs = tf.keras.layers.Input(shape=(X_train.shape[1],1))
# parameters
conv_1 = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(inputs)
max_pool_1 = MaxPool1D(pool_size=2)(conv_1)
conv_2 = Conv1D(filters=128, kernel_size=3, activation='relu', padding='same')(max_pool_1)
max_pool_2 = MaxPool1D(pool_size=2)(conv_2)
# BOTTLE NECK
bottle_neck = Conv1D(filters=256, kernel_size=3, activation='relu', padding='same')(max_pool_2)
visualization = Conv1D(filters=1, kernel_size=3, activation='sigmoid', padding='same')(bottle_neck)
# DECODER
conv_3 = Conv1D(filters=128, kernel_size=3, activation='relu', padding='same')(bottle_neck)
upsample_1 = UpSampling1D(size=2)(conv_3)
conv_4 = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(upsample_1)
upsample_2 = UpSampling1D(size=2)(conv_4)
decoder_output = Conv1D(filters=1, kernel_size=3, activation='sigmoid', padding='same')(upsample_2)
return inputs, decoder_output, visualization

It'd be excellent if you could copy-paste the entire stack of error that your code produces, something that everyone should follow for error-related questions because that makes debugging that much easier.
Here's an attempt to reproduce the same error using a dummy dataset:
import numpy as np
import tensorflow as tf
np.random.seed(11)
np.set_printoptions(precision=2)
def autoEncoder(X_train):
inputs = tf.keras.layers.Input(shape=(X_train.shape[1], 1))
conv_1 = tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(inputs)
max_pool_1 = tf.keras.layers.MaxPool1D(pool_size=2)(conv_1)
conv_2 = tf.keras.layers.Conv1D(filters=128, kernel_size=3, activation='relu', padding='same')(max_pool_1)
max_pool_2 = tf.keras.layers.MaxPool1D(pool_size=2)(conv_2)
bottle_neck = tf.keras.layers.Conv1D(filters=256, kernel_size=3, activation='relu', padding='same')(max_pool_2)
visualization = tf.keras.layers.Conv1D(filters=1, kernel_size=3, activation='sigmoid', padding='same')(bottle_neck)
conv_3 = tf.keras.layers.Conv1D(filters=128, kernel_size=3, activation='relu', padding='same')(bottle_neck)
upsample_1 = tf.keras.layers.UpSampling1D(size=2)(conv_3)
conv_4 = tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(upsample_1)
upsample_2 = tf.keras.layers.UpSampling1D(size=2)(conv_4)
decoder_output = tf.keras.layers.Conv1D(filters=1, kernel_size=3, activation='sigmoid', padding='same')(upsample_2)
return inputs, decoder_output, visualization
X = np.random.randn(1880, 220)
X_val = np.random.randn(300, 220)
X = np.expand_dims(X, axis=-1)
X = tf.convert_to_tensor(X) # (1880, 220, 1)
X_val = np.expand_dims(X_val, axis=-1)
X_val = tf.convert_to_tensor(X_val) # (300, 220, 1)
inputs, decoder_output, visualization = autoEncoder(X)
model = tf.keras.Model(inputs=inputs, outputs=decoder_output)
encoder_model = tf.keras.Model(inputs=inputs, outputs=visualization)
batch_size = 128
train_steps = len(X) // batch_size
val_steps = len(X_val) // batch_size
model.compile(optimizer='adam', metrics=['accuracy'], loss='mean_squared_error')
model.fit(X, steps_per_epoch=train_steps, validation_data = X_val, validation_steps=val_steps, epochs=100)
On google-colab this gives the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-29-a889c5a46f35> in <module>()
3 val_steps = len(X_val) // batch_size
4 model.compile(optimizer='adam', metrics=['accuracy'], loss='mean_squared_error')
----> 5 model.fit(X, steps_per_epoch=train_steps, validation_data = X_val, validation_steps=val_steps, epochs=100)
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1041 (x, y, sample_weight), validation_split=validation_split))
1042
-> 1043 if validation_data:
1044 val_x, val_y, val_sample_weight = (
1045 data_adapter.unpack_x_y_sample_weight(validation_data))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py in __bool__(self)
990
991 def __bool__(self):
--> 992 return bool(self._numpy())
993
994 __nonzero__ = __bool__
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
which is identical to your OP. The reason it'd be better to post the error stack is because the answer is hidden in these lines, specifically:
1043 if validation_data:
1044 val_x, val_y, val_sample_weight = (
1045 data_adapter.unpack_x_y_sample_weight(validation_data))
The format of validation_data is identical to (x, y, sample_weight). Here's what fit method documentation has to say:
validation_data will override validation_split. validation_data could be: - tuple (x_val, y_val) of Numpy arrays or tensors - tuple (x_val, y_val, val_sample_weights) of Numpy arrays - dataset For the first two cases, batch_size must be provided. For the last case, validation_steps could be provided.
I think you now understand why you're getting an error, there's no Y for the your autoencoder. Which shouldn't be of any concern since your X itself is your Y. Here's a line from an encoder tutorial that would help us in this situation:
Train the model using x_train as both the input and the target. The encoder will learn to compress the dataset from 784 dimensions to the latent space, and the decoder will learn to reconstruct the original images.
So, what you were expected to do is to write the following:
model.fit(X, X, steps_per_epoch=train_steps, validation_data=(X_val, X_val), validation_steps=val_steps, epochs=100)
which indeed starts the training!

Related

How to fix error validation split in MLP using Keras?

I'm newbie in Neural Network. I'm going to do a text classification research using MLP model with keras. Input layer consisting of 900 nodes, 2 hidden layers, and 2 outputs.
The code I use is as follows:
#Split data training & testing (90:10)
Train_X, Test_X, Train_Y, Test_Y = model_selection.train_test_split(dataset['review'],dataset['sentimen'],test_size=0.2, random_state=8)
Encoder = LabelEncoder()
Train_Y = Encoder.fit_transform(Train_Y)
Test_Y = Encoder.fit_transform(Test_Y)
Tfidf_vect = TfidfVectorizer(max_features=None)
Tfidf_vect.fit(dataset['review'])
Train_X_Tfidf = Tfidf_vect.transform(Train_X)
Test_X_Tfidf = Tfidf_vect.transform(Test_X)
#ANN Architecture
model = Sequential()
model.add(Dense(units = 100, activation = 'sigmoid', input_shape=(32, 900)))
model.add(Dense(units = 100, activation = 'sigmoid'))
model.add(Dense(units = 2, activation = 'sigmoid'))
opt = Adam (learning_rate=0.001)
model.compile(loss = 'binary_crossentropy', optimizer = opt,
metrics = ['accuracy'])
print(model.summary())
#Hyperparameter
epochs= 100
batch_size= 32
es = EarlyStopping(monitor="val_loss",mode='min',patience=10)
model_prediction = model.fit(Train_X_Tfidf, Train_Y, epochs=epochs,
batch_size=batch_size, verbose=1,
validation_split=0.1, callbacks =[es])
But getting Error:
/usr/local/lib/python3.8/dist-packages/keras/engine/data_adapter.py in train_validation_split(arrays, validation_split)
1478 unsplitable = [type(t) for t in flat_arrays if not _can_split(t)]
1479 if unsplitable:
-> 1480 raise ValueError(
1481 "`validation_split` is only supported for Tensors or NumPy "
1482 "arrays, found following types in the input: {}".format(unsplitable))
ValueError: `validation_split` is only supported for Tensors or NumPy arrays, found following types in the input: [<class 'scipy.sparse.csr.csr_matrix'>]
How to Fix it? Thank you so much.

ValueError: Unknown initializer: my_filter

I build my CNN by using the the code:
def arbitrary_functionality(tensor):
return tf.abs(tensor)
def my_filter(shape, dtype=None):
f = np.array([
[[[-1]], [[2]], [[-2]], [[2]], [[-1]]],
[[[2]], [[-6]], [[8]], [[-6]], [[2]]],
[[[-2]], [[8]], [[-12]], [[8]], [[-2]]],
[[[2]], [[-6]], [[8]], [[-6]], [[2]]],
[[[-1]], [[2]], [[-2]], [[2]], [[-1]]]])
assert f.shape == shape
return K.variable(f, dtype='float32')
input_layer = Input(shape=(256, 256, 1))
conv = Conv2D(1, [5, 5], kernel_initializer=my_filter, input_shape=(256, 256, 1), trainable=True, padding='same')(input_layer)
conv = Conv2D(8, (5, 5), padding='same', strides=1, use_bias=False)(conv)
lambda_layer = Lambda(arbitrary_functionality)(conv)
output_layer = Activation(activation='tanh')(lambda_layer)
output_layer = AveragePooling2D(pool_size= (5, 5), strides=2)(output_layer)
hidden = Dense(256)(output_layer)
hidden = LeakyReLU(alpha=0.2)(hidden)
output = Dense(2, activation='softmax')(hidden)
model = Model(inputs=input_layer, outputs=output)
# Callback for loss logging per epoch
class LossHistory(Callback):
def on_train_begin(self, logs={}):
self.losses = []
self.val_losses = []
def on_epoch_end(self, batch, logs={}):
self.losses.append(logs.aget('loss'))
self.val_losses.append(logs.get('val_loss'))
history = LossHistory()
tensorboard = TensorBoard (log_dir='E:/logs/trail' , histogram_freq=0, write_graph=True , write_images=False)
adam = keras.optimizers.Adam(lr= lrate, beta_1= 0.9, beta_2= 0.999, epsilon= 1e-08, decay= decay)
model.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy', 'mse'])
batch_si = 64
fitted_model = model.fit(X_train, y_train, batch_size= batch_si, callbacks=[tensorboard], epochs=epochs, verbose=1, validation_split= 0.2 , shuffle=True)
# Save Model
model.save('E:/models/trail.h5', overwrite = True)
model.save_weights('E:/models/weights_trail.hdf5', overwrite=True)
# Evaluate the model
scores = model.evaluate(X_test, y_test, batch_size=batch_si, verbose=1)
print("Model Accuracy: {:5.2f}%".format(100*scores[1]))
# Load and Evaluate the Model
new_model = tf.keras.models.load_model('E:/models/trail.h5', custom_objects={'tf': tf})
new_model.load_weights('E:/models/trail.hdf5')
new_model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy', 'mse'])
scores = new_model.evaluate(X_test, y_test, verbose=1)
print("Accuracy After Model Reloaded: {:5.2f}%".format(100*scores[1]))
Now problem is, I can evaluate my output successfully before save an reload the model. But when i reload the trained model file and try to evaluate output, I am getting error the following error:
ValueError: Unknown initializer: my_filter
You have to register custom function name (see here: https://www.tensorflow.org/guide/keras/save_and_serialize#custom_objects):
new_model = tf.keras.models.load_model('E:/models/trail.h5', custom_objects={'my_filter': my_filter, 'tf': tf})

assertion failed: [Condition x == y did not hold element-wise:]

I have built a BiLSTM model with an attention layer for sentence classification task but I am getting an error that my assertion has failed due to mismatch in number of parameters. The attention layer code is here and the error is below the code.
class attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(attention,self).__init__()
def build(self, input_shape):
self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
initializer="normal")
self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
initializer="zeros")
super(attention,self).build(input_shape)
def call(self, x):
e = K.tanh(K.dot(x,self.W)+self.b)
a = K.softmax(e, axis=1)
output = x*a
if self.return_sequences:
return output
return K.sum(output, axis=1)
When i am training the model with attention layer included, it is giving an error that assertion failed.
Epoch 1/10
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-45-ac310033130c> in <module>()
1 #Early stopping, Adam, dropout = 0.3, 0.5, 0.5
2 #history = model.fit(sequences_matrix, Y_train, batch_size=256, epochs=5, validation_split=0.1, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])
----> 3 history = model.fit(sequences_matrix, Y_train, batch_size=32, epochs=10, validation_split=0.1)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
InvalidArgumentError: assertion failed: [Condition x == y did not hold element-wise:] [x (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [32 1] [y (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [32 758]
[[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert (defined at <ipython-input-45-ac310033130c>:3) ]] [Op:__inference_train_function_19854]
Function call stack:
train_function
My model is
model = Sequential()
model.add(Embedding(max_words, 768, input_length=max_len, weights=[embedding]))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(SpatialDropout1D(0.1))
model.add(Conv1D(16, kernel_size=11, activation='relu'))
model.add(Bidirectional(LSTM(16, return_sequences=True)))
model.add(attention(return_sequences=True))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax', use_bias=True, kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.l2(1e-4),
activity_regularizer=regularizers.l2(1e-5)))
model.summary()
Shape of Y_train is
max_words = 48369
max_len = 768
tok = Tokenizer(num_words = max_words)
tok.fit_on_texts(X_train)
sequences = tok.texts_to_sequences(X_train)
sequences_matrix = sequence.pad_sequences(sequences, maxlen = max_len)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)
print(Y_train.shape)
(43532, 1)
your target is in 2D so you need to set return_sequences=False in the last attention layer in order to return output in 2D format
Add flatten layer before Dropout and then execute.
model.add(Flatten())

Got Very Different Scores After Translating Simple Test Model from Keras to PyTorch

I'm trying to transition from Keras to PYTorch.
After reading tutorials and similar questions, I came up with the following simple models to test. However, the two models below gives me very different scores: Keras (0.9), PyTorch (0.03).
Could someone give me guidance?
Basically my dataset has 120 features and multilabels with 3 classes that look like below.
[
[1,1,1],
[0,1,1],
[1,0,0],
...
]
def score(true, pred):
lrl = label_ranking_loss(true, pred)
lrap = label_ranking_average_precision_score(true, pred)
print('LRL:', round(lrl), 'LRAP:', round(lrap))
#Keras:
model= Sequential()
model.add(Dense(60, activation="relu", input_shape=(120,)))
model.add(Dense(30, activation="relu"))
model.add(Dense(3, activation="sigmoid"))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=32, epochs=100)
pred = model.predict(x_test)
score(y_test, pred)
#PyTorch
model = torch.nn.Sequential(
torch.nn.Linear(120, 60),
torch.nn.ReLU(),
torch.nn.Linear(60, 30),
torch.nn.ReLU(),
torch.nn.Linear(30, 3),
torch.nn. Sigmoid())
loss_fn = torch.nn. BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 100
batch_size = 32
n_batch = int(x_train.shape[0]/batch_size)
for epoch in range(epochs):
avg_cost = 0
for i in range(n_batch):
x_batch = x_train[i*batch_size:(i+1)*batch_size]
y_batch = y_train[i*batch_size:(i+1)*batch_size]
x, y = Variable(torch.from_numpy(x_batch).float()), Variable(torch.from_numpy(y_batch).float(), requires_grad=False)
pred = model(x)
loss = loss_fn(pred, y)
loss.backward()
optimizer.step()
avg_cost += loss.item()/n_batch
print(epoch, avg_cost)
x, y = Variable(torch.from_numpy(x_test).float()), Variable(torch.from_numpy(y_test).float(), requires_grad=False)
pred = model(x)
score(y_test, pred.data.numpy())
You need to call optimizer.zero_grad() at the start of each iteration, otherwise the gradients from different batches just keep getting accumulated.

keras: cnn+lstm model using time distributed layer runtime error

I am using cnn with lstm model using time distributed layer for image classification. Although I have compiled the model, still it shows
RuntimeError: You must compile your model before using it.
I searched on multiple sites but I cannot find solution to my problem.
Here is my code:
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import TimeDistributed
from keras.layers import LSTM
import warnings
warnings.filterwarnings('ignore')
# Initialising the CNN
classifier = Sequential()
# Step 1 - Convolution
classifier.add(TimeDistributed(Convolution2D(32, (3, 3), padding = 'same', input_shape = (128, 128, 3),
activation = 'relu')))
# Step 2 -
classifier.add(TimeDistributed(MaxPooling2D(pool_size = (2, 2))))
# Adding a second convolutional layer
classifier.add(TimeDistributed(Convolution2D(64, (3, 3), padding = 'same', activation = 'relu')))
classifier.add(TimeDistributed(MaxPooling2D(pool_size = (2, 2))))
# Adding a third conolutional layer
classifier.add(TimeDistributed(Convolution2D(64, (3, 3), padding = 'same', activation = 'relu')))
classifier.add(TimeDistributed(MaxPooling2D(pool_size = (2, 2))))
# Step 3 - Flattening
classifier.add(TimeDistributed(Flatten()))
classifier.add(Dropout(rate = 0.5))
# Step 4 - Full connection
classifier.add(LSTM(256, return_sequences=False, dropout=0.5))
classifier.add(Dense(output_dim = 8, activation = 'softmax'))
# Compiling the CNN
classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
# Part 2 - Fitting the CNN to the images
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
zoom_range = 0.2,
height_shift_range = 0.1,
width_shift_range = 0.1,
channel_shift_range = 10)
test_datagen = ImageDataGenerator(rescale = 1./255)
training_set = train_datagen.flow_from_directory('dataset/mel/train/',
target_size = (128, 128),
batch_size = 32,
class_mode = 'categorical')
test_set = test_datagen.flow_from_directory('dataset/mel/test/',
target_size = (128, 128),
batch_size = 32,
class_mode = 'categorical')
classifier.fit_generator(training_set,
samples_per_epoch = 1088,
nb_epoch = 1,
validation_data = test_set,
nb_val_samples = 352)
Here is the complete output message:
Found 1088 images belonging to 8 classes.
Found 352 images belonging to 8 classes.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-8-6a3839aea8f8> in <module>()
81 nb_epoch = 1,
82 validation_data = test_set,
---> 83 nb_val_samples = 352)
~/.local/lib/python3.5/site-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name +
90 '` call to the Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
~/.local/lib/python3.5/site-packages/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1424 use_multiprocessing=use_multiprocessing,
1425 shuffle=shuffle,
-> 1426 initial_epoch=initial_epoch)
1427
1428 #interfaces.legacy_generator_methods_support
~/.local/lib/python3.5/site-packages/keras/engine/training_generator.py in fit_generator(model, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
35
36 do_validation = bool(validation_data)
---> 37 model._make_train_function()
38 if do_validation:
39 model._make_test_function()
~/.local/lib/python3.5/site-packages/keras/engine/training.py in _make_train_function(self)
482 def _make_train_function(self):
483 if not hasattr(self, 'train_function'):
--> 484 raise RuntimeError('You must compile your model before using it.')
485 self._check_trainable_weights_consistency()
486 if self.train_function is None:
RuntimeError: You must compile your model before using it.
What can be the possible mistakes.
Thanks
In order to use TimeDistributed as an input layer, you have to specify input_shape in TimeDistributed constructor, not Convolution one (or any layer you want to distribute). Keep in mind that you have to provide number of timesteps (frames) in this constructor. In your case it would look like that:
num_frames = 10 # e.g.
# Step 1 - Convolution
classifier.add(TimeDistributed(Convolution2D(32, (3, 3), padding = 'same', activation =
'relu'), input_shape = (num_frames,128, 128, 3)))

Resources