I am trying to train a 1D-CNN on tabular data to then use LRP on it (as it has been done here). I am stuck at the model.fit() part of the implementation. Up to that point everything seems to work.
Here is the error I get:
model_history = model.fit(X_train_smote, y_train_smote, batch_size=100, epochs=100, validation_split = 0.2)
Epoch 1/100
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [60], in <cell line: 1>()
----> 1 model_history = model.fit(X_train_smote, y_train_smote, batch_size=100, epochs=100, validation_split = 0.2)
File D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\utils\traceback_utils.py:67, in filter_traceback.<locals>.error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
File ~\AppData\Local\Temp\__autograph_generated_filelswqetod.py:15, in outer_factory.<locals>.inner_factory.<locals>.tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
ValueError: in user code:
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 1051, in train_function *
return step_function(self, iterator)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 1040, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 1030, in run_step **
outputs = model.train_step(data)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 890, in train_step
loss = self.compute_loss(x, y, y_pred, sample_weight)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 948, in compute_loss
return self.compiled_loss(
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 139, in __call__
losses = call_fn(y_true, y_pred)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 243, in call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 1787, in categorical_crossentropy
return backend.categorical_crossentropy(
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\backend.py", line 5119, in categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
ValueError: Shapes (None,) and (None, 6, 2) are incompatible
Yet, for the sake of clarity here is the rest of my current implementation (adapted from a Kaggle notebook). The model structure is based on the feedback of my previous question here on SO.
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.colors import ListedColormap
from sklearn.metrics import plot_confusion_matrix
from scipy.stats import norm, boxcox
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from collections import Counter
from scipy import stats
import tensorflow as tf
import warnings
warnings.simplefilter(action='ignore', category=Warning)
dataset = pd.read_csv('F:/Programmieren/this_one/data/Churn_Modelling.csv')
# split into variables and target
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values
# here gender is encoded
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])
# one hot encoding the country (as explained in Ullah et al.)
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
# split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .2)
# upsample minority class with SMOTE
import imblearn
from imblearn.over_sampling import SMOTENC
#get original class distribution
counter = Counter(y)
print(counter)
#SMOTENC is used instead of SMOTE because there are multiple categorical features in the dataset
oversample = SMOTENC(categorical_features=[0, 1, 2, 4, 9, 10])
X_train_smote, y_train_smote = oversample.fit_resample(X_train, y_train)
#get new class distribution
counter = Counter(y_train_smote)
print(counter)
# normalize values to range 0-1 as explained in Ullah et al.
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
X_train_smote = mms.fit_transform(X_train_smote)
X_test = mms.transform(X_test)
#record-wise normalization for relative value comparison as stated by one of the authors I was in contact with
from sklearn.preprocessing import normalize
X_train_smote = normalize(X_train_smote, axis=1, norm='l1')
X_test = normalize(X_test, axis=1, norm='l1')
#reshape data for CNN
sample_size = X_train_smote.shape[0] # number of samples in train set
time_steps = X_train_smote.shape[1] # number of features in train set
input_dimension = 1
train_data_reshaped = X_train_smote.reshape(sample_size,time_steps,input_dimension)
print("After reshape train data set shape:\n", train_data_reshaped.shape)
print("1 Sample shape:\n",train_data_reshaped[0].shape)
print("An example sample:\n", train_data_reshaped[0])
#reshape test data as well
test_data_reshaped = X_test.reshape(X_test.shape[0],X_test.shape[1],1)
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv1D
#create model as explained in the paper
model = Sequential()
model.add(Conv1D(filters=25, kernel_size=3, activation='relu', input_shape=(12,1)))
model.add(Conv1D(50, 3))
model.add(Conv1D(100, 3))
model.add(Dense(2200, activation='relu'))
model.add(Dense(2, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()
#output of model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1d (Conv1D) (None, 10, 25) 100
conv1d_1 (Conv1D) (None, 8, 50) 3800
conv1d_2 (Conv1D) (None, 6, 100) 15100
dense (Dense) (None, 6, 2200) 222200
dense_1 (Dense) (None, 6, 2) 4402
dense_2 (Dense) (None, 6, 2) 6
=================================================================
Total params: 245,608
Trainable params: 245,608
Non-trainable params: 0
_________________________________________________________________
Are there any major flaws in my current approach that I am not aware of?
You should add a Flatten() layer before the output layer as the output shape is (6,2) and not (2,).
#Add Flatten layer
model.add(Flatten())
model.add(Dense(2, activation='softmax'))
Also kindly change the loss to sparse_categorical_crossentopy because you are using integers(0,1) as the labels.
#change loss to sparse_categorical_crossentropy
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
Please refer to the gist for working code. Thank you!
Related
This was the code for building the artificial neural network and the classifier. It was simple churn modelling for determining whether a customer will leave a bank or not.
#Building the ANN
import keras
from keras.models import Sequential
from keras.layers import Dense
#Initializing the ANN
classifier = Sequential()
#Adding input layer and hidden layer iinto ANN
classifier.add(Dense(6, kernel_initializer = 'glorot_uniform', activation = 'relu', input_shape =
(11,)))
#Adding second hidden layer
classifier.add(Dense(6, kernel_initializer = 'glorot_uniform', activation = 'relu'))
#Adding the output/final layer
classifier.add(Dense(1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))
#Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=('accuracy'))
#Fitting the ANN on trainig set using fit method
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)
#Making prediction and analyzing the dataset
y_prediction = classifier.predict(X_test)
#Converting the probablities into definite results for model validation
y_prediction = (y_prediction > 0.5)
#Making confusion matrix for evaluating the resuts
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_prediction)
#Evaluating, improving and tuning the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense
def build_classifier():
classifier = Sequential()
classifier.add(Dense(6, kernel_initializer = 'glorot_uniform', activation = 'relu', input_shape =
(11,)))
classifier.add(Dense(6, kernel_initializer = 'glorot_uniform', activation = 'relu'))
classifier.add(Dense(1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=('accuracy'))
return classifier
classifier = KerasClassifier(build_fn = build_classifier(), batch_size = 10, epochs = 100)
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)
---
And this was the error
RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib\externals\loky\backend\queues.py", line 153, in _ feed
obj = dumps(obj, reducers=reducers)
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib\externals\loky\backend\reduction.py", line 271,
in dumps
dump(obj, buf, reducers=reducers, protocol=protocol)
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib\externals\loky\backend\reduction.py", line 264,
in dump
_LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib\externals\cloudpickle\cloudpickle_fast.py",
line 563, in dump
return Pickler.dump(self, obj)
TypeError: cannot pickle 'weakref' object
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\BSNL\Documents\Deep_Learning_A_Z\Volume 1 - Supervised Deep Learning\Part 1 -
Artificial Neural Networks (ANN)\Section 4 - Building an ANN\ANN.py", line 78, in
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)
File "C:\Users\BSNL\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 72, in inner_f
return f(**kwargs)
File "C:\Users\BSNL\anaconda3\lib\site-packages\sklearn\model_selection_validation.py", line 401, in
cross_val_score
cv_results = cross_validate(estimator=estimator, X=X, y=y, groups=groups,
File "C:\Users\BSNL\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 72, in inner_f
return f(**kwargs)
File "C:\Users\BSNL\anaconda3\lib\site-packages\sklearn\model_selection_validation.py", line 242, in
cross_validate
scores = parallel(
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib\parallel.py", line 1061, in call
self.retrieve()
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib\parallel.py", line 940, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "C:\Users\BSNL\anaconda3\lib\site-packages\joblib_parallel_backends.py", line 542, in
wrap_future_result
return future.result(timeout=timeout)
File "C:\Users\BSNL\anaconda3\lib\concurrent\futures_base.py", line 432, in result
return self.__get_result()
File "C:\Users\BSNL\anaconda3\lib\concurrent\futures_base.py", line 388, in __get_result
raise self._exception
PicklingError: Could not pickle the task to send it to the workers.
putting n_jobs = 1 worked because I had also did a mistake of putting brackets in Kerasclassifier's build_fn argument i.e. use build_classifier instead of build_classifier().
Running into an issue trying to get a custom metric callback to work with Tensorflow. I've created a minimal working example below to help troubleshoot. I'm running:
Windows 10
Python 3.6
scikit-learn==0.23.2
pandas==0.25.3
numpy==1.18.5
tensorflow==2.3.0
Using the breast cancer binary dataset, I'm trying to invoke the custom metric that was shown as a solution here, but running into the above error, probably because I'm not using it right.
This code...
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import Callback
# Get binary classification dataset
data = load_breast_cancer(as_frame=True)
print(data)
df = data['data']
df['target'] = data['target']
# Train Test split
train, test = train_test_split(data, test_size = 0.10, shuffle=False)
# Define features and labels
x_train = train.iloc[:, :-1]
y_train = train.iloc[:, -1]
x_test = test.iloc[:, :-1]
y_test = test.iloc[:, -1]
# https://github.com/keras-team/keras/issues/10472#issuecomment-472543538
class Metrics(Callback):
def __init__(self, val_data, batch_size=20):
super().__init__()
self.validation_data = val_data
self.batch_size = batch_size
def on_train_begin(self, logs={}):
# print(self.validation_data)
self.val_f1s = []
self.val_recalls = []
self.val_precisions = []
def on_epoch_end(self, epoch, logs={}):
batches = len(self.validation_data)
total = batches * self.batch_size
val_pred = np.zeros((total,1))
val_true = np.zeros((total))
for batch in range(batches):
xVal, yVal = next(self.validation_data)
val_pred[batch * self.batch_size : (batch+1) * self.batch_size] = np.asarray(self.model.predict(xVal)).round()
val_true[batch * self.batch_size : (batch+1) * self.batch_size] = yVal
val_pred = np.squeeze(val_pred)
_val_f1 = f1_score(val_true, val_pred)
_val_precision = precision_score(val_true, val_pred)
_val_recall = recall_score(val_true, val_pred)
self.val_f1s.append(_val_f1)
self.val_recalls.append(_val_recall)
self.val_precisions.append(_val_precision)
return
# Define a function that creates a basic model
def make_deep_learning_classifier():
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=x_train.shape[1], kernel_initializer='normal'))
model.add(Dense(32, activation='relu', input_dim=x_train.shape[1], kernel_initializer='normal'))
model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
return model
# Get our model
model = make_deep_learning_classifier()
print(model.summary())
# Define some params
batch_size = 32
# Call our custom callback
callback = [Metrics(val_data=[x_test, y_test], batch_size=batch_size)] # < Issue here?
# Start training
model.fit(x_train, y_train, epochs=1000, batch_size=batch_size, verbose=1, callbacks=callback, validation_data=(x_test, y_test))
print(Metrics.val_precisions) # < Issue here?
...produces this traceback...
File "test.py", line 91, in <module>
model.fit(x_train, y_train, epochs=1000, batch_size=batch_size, verbose=1, callbacks=callback, validation_data=(x_test, y_test))
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\training.py", line 108, in _method_wrapper
return method(self, *args, **kwargs)
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1137, in fit
callbacks.on_epoch_end(epoch, epoch_logs)
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\callbacks.py", line 416, in on_epoch_end
callback.on_epoch_end(epoch, numpy_logs)
File "test.py", line 54, in on_epoch_end
xVal, yVal = next(self.validation_data)
TypeError: 'list' object is not an iterator
When I change val_data=[x_test, y_test] to val_data=(x_test, y_test) in the callback variable, I get...
TypeError: 'tuple' object is not an iterator
The user who proposed this callback solution mentions something about generators, but I'm not sure how those work. Just trying to define my own custom metric for Tensorflow/Keras. I won't be using this exact callback, but once I get this one running, I can modify it to my own. Just providing it as an example that seemed to work in that GitHub post that I hope someone will be able to point out what I'm doing wrong.
Thanks!
UPDATE
Using the solution below, I try to properly call my iterator function on my val_data by using
iter_val_data = iter(self.validation_data)
for batch in range(batches):
xVal, yVal = next(iter_val_data)
But then I get a too many values to unpack error, so I change it to:
iter_val_data = iter(self.validation_data)
for batch in range(batches):
xVal = next(iter_val_data)
yVal = next(iter_val_data)
Then I get the error:
Traceback (most recent call last):
File "test.py", line 89, in <module>
model.fit(x_train, y_train, epochs=1000, batch_size=batch_size, verbose=1, callbacks=callback, validation_data=(x_test, y_test))
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\training.py", line 108, in _method_wrapper
return method(self, *args, **kwargs)
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1137, in fit
callbacks.on_epoch_end(epoch, epoch_logs)
File "C:\Users\...\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\callbacks.py", line 416, in on_epoch_end
callback.on_epoch_end(epoch, numpy_logs)
File "test.py", line 53, in on_epoch_end
val_pred[batch * self.batch_size : (batch+1) * self.batch_size] = np.asarray(self.model.predict(xVal)).round()
ValueError: could not broadcast input array from shape (57,1) into shape (32,1)
Ideas from here? Try and run the code in the same environment as described above if you can. Thanks!
As you see here and as your error messages states, you need to use next() with an iterator. You call next() on the list, how should next() know, which element is coming next? For that you need an iterator, that saves that state. So this should fix your issue:
iter_val_data = iter(self.validation_data)
for batch in range(batches):
xVal, yVal = next(iter_val_data)
I am trying to create a neural net model that return the similarity score of two sentences using manhattan LSTM (e.g.https://medium.com/mlreview/implementing-malstm-on-kaggles-quora-question-pairs-competition-8b31b0b16a07 ). I have used quora-questions pairs dataset and generated their embeddings using google-bert. Now, i want to create a LSTM model like the above examples and use it but i am getting the following error:
Using TensorFlow backend.
(100000, 1, 768)
(100000, 1, 768)
(100000,)
(100000, 100)
Traceback (most recent call last):
File "train_model_manhattan.py", line 151, in <module>
model = Model(inputs=[inp1,inp2], outputs=[malstm_distance])
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 93, in __init__
self._init_graph_network(*args, **kwargs)
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 231, in _init_graph_network
self.inputs, self.outputs)
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 1366, in _map_graph_network
tensor_index=tensor_index)
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 1353, in build_map
node_index, tensor_index)
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 1353, in build_map
node_index, tensor_index)
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 1325, in build_map
node = layer._inbound_nodes[node_index]
AttributeError: 'NoneType' object has no attribute '_inbound_nodes'
Here is what I have already tried. Note that embeddings returned has shape(768) i.e. is a vector of size 768 like this [1.2e+05 2.7e-01 7.8 .... 8.9]
print(np.shape(train_vec1)) => (100000, 1, 768)
print(np.shape(train_vec2)) => (100000, 1, 768)
print(np.shape(train_label))
#################################################
def exponent_neg_manhattan_distance(left, right):
return np.exp(-np.sum(np.abs(left-right), axis=1, keepdims=True))
def manhattan_distance(left, right):
''' Helper function for the similarity estimate of the LSTMs outputs'''
print(np.shape(left))
return K.sum(K.abs(left - right), axis=1, keepdims=True)
#################################################
import keras
from keras.layers import Input, LSTM, Dense
from keras.models import Model
inp1= Input(shape=(768,))
inp2= Input(shape=(768,))
x = keras.layers.concatenate([inp1, inp2],axis=-1)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5) (x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5) (x)
x = Dense(64, activation='relu')(x)
out=Dense(1)(x)
# Since this is a siamese network, both sides share the same LSTM
shared_lstm = LSTM(100)
left_output = shared_lstm(train_vec1_tensor)
right_output = shared_lstm(train_vec2_tensor)
# Calculates the distance as defined by the MaLSTM model
malstm_distance = Lambda(function=lambda x: manhattan_distance(x[0], x[1]),output_shape=lambda x: (x[0][0], 1))([left_output, right_output])
#######################
Getting error when code flow reaches the following line
#######################
model = Model(inputs=[inp1,inp2], outputs=[malstm_distance])
This is my entire code
import os
data_file='quora_duplicate_questions.tsv'
# 0 means dont load, 1 means fetch from file
LOAD_ENCODING_FROM_FILE=1
encoding_data_file_quest1='encoding_quest1'
encoding_data_file_quest2='encoding_quest2'
encoding_data_file_label='quest_label'
#################################################
import numpy as np
import pandas as pd
import tensorflow as tf
import re
from bert_serving.client import BertClient
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import pickle
from keras import models
from keras import layers
from keras import optimizers
from keras.layers import Dropout
from keras import backend as K
from keras.layers import Lambda
#################################################
maxlen = 125 # We will cut reviews after 125 words
# The next step is to tranform all sentences to fixed length encoding using bert embeddings
# [0.1 0.4 0.4] [0.9 0.6 0.1] 2.4
# [0.4 0.1 0.3] [0.5 0.6 0.1] 1.0
# Save the encodings in a file
if LOAD_ENCODING_FROM_FILE == 1:
with open(encoding_data_file_quest1, "rb") as fp:
vec1=pickle.load(fp)
with open(encoding_data_file_quest2, "rb") as fp:
vec2=pickle.load(fp)
with open(encoding_data_file_label, "rb") as fp:
label=pickle.load(fp)
train_vec1 = np.asarray(vec1, np.float32)
train_vec2 = np.asarray(vec2, np.float32)
train_vec1 = train_vec1.reshape((100000,1,768))
train_vec2 = train_vec2.reshape((100000,1,768))
train_vec1_tensor = K.cast(train_vec1,dtype='float32')
train_vec2_tensor = K.cast(train_vec2,dtype='float32')
train_label = np.asarray(label,np.float32)
print(np.shape(train_vec1))
print(np.shape(train_vec2))
print(np.shape(train_label))
#################################################
def exponent_neg_manhattan_distance(left, right):
return np.exp(-np.sum(np.abs(left-right), axis=1, keepdims=True))
def manhattan_distance(left, right):
''' Helper function for the similarity estimate of the LSTMs outputs'''
print(np.shape(left))
return K.sum(K.abs(left - right), axis=1, keepdims=True)
#################################################
import keras
from keras.layers import Input, LSTM, Dense
from keras.models import Model
inp1= Input(shape=(768,))
inp2= Input(shape=(768,))
x = keras.layers.concatenate([inp1, inp2],axis=-1)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5) (x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5) (x)
x = Dense(64, activation='relu')(x)
out=Dense(1)(x)
# Since this is a siamese network, both sides share the same LSTM
shared_lstm = LSTM(100)
left_output = shared_lstm(train_vec1_tensor)
right_output = shared_lstm(train_vec2_tensor)
# Calculates the distance as defined by the MaLSTM model
malstm_distance = Lambda(function=lambda x: manhattan_distance(x[0], x[1]),output_shape=lambda x: (x[0][0], 1))([left_output, right_output])
#######################
Getting error when code flow reaches the following line
#######################
model = Model(inputs=[inp1,inp2], outputs=[malstm_distance])
model.summary()
optimizer = optimizers.Adadelta(clipnorm=gradient_clipping_norm)
model.compile(optimizer,
loss='mean_squared_error',
metrics=['accuracy'])
history=model.fit([train_vec1, train_vec2], train_label,
epochs=30,batch_size=200,
validation_split=0.2)
I want the model to take two embeddings, calculate the manhattan distance of the embeddings and return the distance.
left_output and right_output are obtained from the LSTM layer. The inputs are fed to the Input layer and through a series of Dense layers. However, note that there is no connection anywhere between the set of Dense layers and the LSTM. The Model expects the output from the LSTM layer which is not possible. This line keras.layers.concatenate should use the outputs from the shared_lstm rather than using the outputs of input layers directly. Like this
keras.layers.concatenate([left_output, right_output],axis=-1)
Only, then this can be a Siamese network.
So I have this error message that ruins all the fun with my work:
Traceback (most recent call last):
File "C:\Python\Python36\Scripts\Masterarbeit-1308\CNN - Kopie.py", line 97, in <module>
model.fit(np.asarray(X_train), np.asarray(Y_train), batch_size=32, epochs=100, verbose=1, validation_data=(np.asarray(X_test), np.asarray(Y_test)))
File "C:\Users\\****\AppData\Roaming\Python\Python36\site-packages\numpy\core\numeric.py", line 492, in asarray
return array(a, dtype, copy=False, order=order)
MemoryError
Does anyone has a solution for this?
I work on a machine i7 7th generation with 16 GB RAM.
To explain more, That's my code, It take al list of arrays (.npy) converted from sounds spectograms to .npy and saved in Input-CNN:
import os, numpy as np
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, Activation, Flatten, Conv2D, Dropout, Dense
from keras.layers.normalization import BatchNormalization
import tensorflow as tf
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
from keras.utils import to_categorical
folder = 'D:\InputCNN - Copie'
folder1 = 'C:\Python\Python36\Scripts\Masterarbeit-1308\Data'
from keras import backend as K
My_Data = os.listdir(folder)
num_data= len(My_Data)
Classnames = os.listdir(folder1)
class_num = len(Classnames)
arr =[np.load(os.path.join(folder, filename), fix_imports=True) for filename in os.listdir(folder)]
labels = np.ones((num_data,))
labels[0:31]= 0
labels[31:80] = 1
labels[80:128] = 2
labels[128:131] = 3
labels[131:143] = 4
labels[143:157] = 5
labels[157:209] = 6
labels[209:] = 7
Y = to_categorical(labels,class_num)
x, y = shuffle(arr, Y, random_state=2)
dataset = tf.data.Dataset.from_tensor_slices(My_Data)
X_train, X_test, Y_train, Y_test = train_test_split(x, Y, test_size=0.2)
##
def build_model(idx,X,Y,nb_classes):
K.set_image_data_format('channels_last')
nb_filters = 64 # number of convolutional filters to use
pool_size = (2, 2) # size of pooling area for max pooling
kernel_size = (3, 3) # convolution kernel size
nb_layers = 4
input_shape = (X[idx].shape[1], X[idx].shape[2], X[idx].shape[3])
model = Sequential()
model.add(Conv2D(nb_filters, kernel_size, padding='valid', input_shape=input_shape))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))
for layer in range(nb_layers-1):
model.add(Conv2D(nb_filters, kernel_size, padding='valid', input_shape=input_shape))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.6))
model.add(Dense(nb_classes, activation='sigmoid'))
return model
for idx in range(len(X_train)-1):
model = build_model(idx,X_train,Y_train, class_num)
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer='adadelta',
metrics=['accuracy'])
model.fit(np.array(X_train), np.array(Y_train), batch_size=8, epochs=100, verbose=1, validation_data=(np.array(X_test), np.array(Y_test))) #Here I have the problem
score = model.evaluate(np.array(X_test), np.array(Y_test), verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
The model fit function is the problem in my code, that should train my preconfigured model and returns an history object (A record of the training). I tried np.array and np.asarray and I got the same error message.
If someone think that the model`s summary can be helpful, I'll post it.
I solved this issue. Actually I changed the shape of my data in the list "X_train" (from (218,128,740,1) to (128,740,1)).
I found that, thanks to Keras, it will add automatically another axis with the number of my data injected to the network, and np.asarray works well even with more data.
I am getting into a similar problem with reshaping data for 1-D CNN:
I am loading data (training and testing data sets ) from a csv file with 24,325 lines. Each line is a vector of 256 numbers - independent variables plus 11 numbers of expected outcome ( labels ) [0,0,0,0,1,0,0,0,0,0,0]
I am using TensorFlow backend.
The code looks like that:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
#Importing training set
training_set = pd.read_csv("Data30.csv")
X_train = training_set.iloc[:20000, 3 :-11].values
y_train = training_set.iloc[:20000, -11:-1].values
#Importing test set
test_set = pd.read_csv("Data30.csv")
X_test = training_set.iloc[ 20001:, 3 :-11].values
y_test = training_set.iloc[ 20001:, -11:].values
X_train /= np.max(X_train) # Normalise data to [0, 1] range
X_test /= np.max(X_test) # Normalise data to [0, 1] range
print("X_train.shape[0] = " + str(X_train.shape[0]))
print("X_train.shape[1] = " + str(X_train.shape[1]))
print("y_train.shape[0] = " + str(y_train.shape[0]))
print("y_train.shape[1] = " + str(y_train.shape[1]))
print("X_test.shape[0] = " + str(X_test.shape[0]))
print("X_test.shape[1] = " + str(X_test.shape[1]))
This is what I get:
X_train.shape[0] = 20000
X_train.shape1 = 256
y_train.shape[0] = 20000
y_train.shape1 = 11
X_test.shape[0] = 4325
X_test.shape1 = 256
#Convert data into 3d tensor
# Old Version
# X_train = np.reshape(X_train,(1,X_train.shape[0],X_train.shape[1]))
# X_test = np.reshape(X_test,(1,X_test.shape[0],X_test.shape[1]))
**# New Correct Version based on the Answer:**
X_train = np.reshape(X_train,( X_train.shape[0],X_train.shape[1], 1 ))
X_test = np.reshape(X_test,( X_test.shape[0],X_test.shape[1], 1 ))
print("X_train.shape[0] = " + str(X_train.shape[0]))
print("X_train.shape[1] = " + str(X_train.shape[1]))
print("X_test.shape[0] = " + str(X_test.shape[0]))
print("X_test.shape[1] = " + str(X_test.shape[1]))
This is result of the reshaping:
X_train.shape[0] = 1
X_train.shape1 = 20000
X_test.shape[0] = 1
X_test.shape1 = 4325
#Importing convolutional layers
from keras.models import Sequential
from keras.layers import Convolution1D
from keras.layers import MaxPooling1D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers.normalization import BatchNormalization
#Initialising the CNN
classifier = Sequential()
#1.Multiple convolution and max pooling
classifier.add(Convolution1D(filters=8, kernel_size=11, activation="relu", input_shape=( 256, 1 )))
classifier.add(MaxPooling1D(strides=4))
classifier.add(BatchNormalization())
classifier.add(Convolution1D(filters=16, kernel_size=11, activation='relu'))
classifier.add(MaxPooling1D(strides=4))
classifier.add(BatchNormalization())
classifier.add(Convolution1D(filters=32, kernel_size=11, activation='relu'))
classifier.add(MaxPooling1D(strides=4))
classifier.add(BatchNormalization())
#classifier.add(Convolution1D(filters=64, kernel_size=11,activation='relu'))
#classifier.add(MaxPooling1D(strides=4))
#2.Flattening
classifier.add(Flatten())
#3.Full Connection
classifier.add(Dropout(0.5))
classifier.add(Dense(64, activation='relu'))
classifier.add(Dropout(0.25))
classifier.add(Dense(64, activation='relu'))
classifier.add(Dense(1, activation='sigmoid'))
#Configure the learning process
classifier.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
#Train!
classifier.fit_generator(training_set,
steps_per_epoch= 100,
nb_epoch = 200,
validation_data = (X_test,y_test),
validation_steps = 40)
score = classifier.evaluate(X_test, y_test)
This is the error I get:
Traceback (most recent call last):
File "C:/Conda/ML_Folder/CNN Data30.py", line 85, in
classifier.fit_generator(X_train, steps_per_epoch=10, epochs=10, validation_data=(X_test,y_test))
File "C:\Conda\lib\site-packages\keras\legacy\interfaces.py", line 87, in wrapper
return func(*args, **kwargs)
File "C:\Conda\lib\site-packages\keras\models.py", line 1121, in fit_generator
initial_epoch=initial_epoch)
File "C:\Conda\lib\site-packages\keras\legacy\interfaces.py", line 87, in wrapper
return func(*args, **kwargs)
File "C:\Conda\lib\site-packages\keras\engine\training.py", line 1978, in fit_generator
val_x, val_y, val_sample_weight)
File "C:\Conda\lib\site-packages\keras\engine\training.py", line 1378, in _standardize_user_data
exception_prefix='input')
File "C:\Conda\lib\site-packages\keras\engine\training.py", line 144, in _standardize_input_data
str(array.shape))
ValueError: Error when checking input: expected conv1d_1_input to have shape (None, 256, 1) but got array with shape (1, 4325, 256)
Can you please help me to fix the code?
Shapes should be (batchSize, length, channels)
So: (20000,256,1) and (20000,11)
Detail: your last Dense must output 11, so: Dense(11,...)