Training issue in keras - python-3.x

I am trying to train my lstm model for sentiment analysis but the program doesnt proceed at all after displaying the following output:
F:\Softwares\Anaconda\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
Using TensorFlow backend.
Extracting features & training batches
Training...
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_1 (Embedding) (None, 134, 70) 42481880
_________________________________________________________________
dropout_1 (Dropout) (None, 134, 70) 0
_________________________________________________________________
lstm_1 (LSTM) (None, 128) 101888
_________________________________________________________________
dense_1 (Dense) (None, 64) 8256
_________________________________________________________________
dropout_2 (Dropout) (None, 64) 0
_________________________________________________________________
activation_1 (Activation) (None, 64) 0
_________________________________________________________________
dense_2 (Dense) (None, 1) 65
_________________________________________________________________
activation_2 (Activation) (None, 1) 0
=================================================================
Total params: 42,592,089
Trainable params: 42,592,089
Non-trainable params: 0
_________________________________________________________________
None
Train on 360000 samples, validate on 90000 samples
Epoch 1/8
2018-12-08 15:56:04.680836: I T:\src\github\tensorflow\tensorflow\core\platform\cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
The code below has some commented out since it was used to save some textual data on disk beforehand. Now, the code only trains the lstm model using that training and testing textual data. It is given below:
import pandas as pd
import Preprocessing as pre
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.utils import shuffle
import pickle
import numpy as np
import sys
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.layers import LSTM
from keras.preprocessing.sequence import pad_sequences
from keras.models import model_from_json
from keras.preprocessing.text import Tokenizer
import os
# fileDir = os.path.dirname(os.path.realpath('__file__'))
# df = pd.read_csv(os.path.join(fileDir, '../Dataset/tweets.csv'),header=None,encoding = "ISO-8859-1")
# df=shuffle(df)
# length=df.size
#
# train=[]
# test=[]
# Y=[]
# Y2=[]
#
# count=450000
# for a in range(450000): #loading data
# b=pre.preprocess_tweet(df[1][a])
# label=int(df[0][a])
# train.append(b)
# Y.append(label)
# count-=1
# print("Loading training data...", count)
#
# with open('training_data(latest).obj', 'wb') as fp:
# pickle.dump(train, fp)
# with open('training_labels(latest).obj', 'wb') as fp:
# pickle.dump(Y, fp)
with open ('training_data(latest).obj', 'rb') as fp:
train = pickle.load(fp)
with open ('training_labels(latest).obj', 'rb') as fp:
Y = pickle.load(fp)
# count=156884
# for a in range(450000,606884): #loading testin data
# b = pre.preprocess_tweet(df[1][a])
# label=int(df[0][a])
# test.append(b)
# Y2.append(label)
# count-=1
# print("Loading testing data...", count)
#
# with open('testing_data(latest).obj', 'wb') as fp:
# pickle.dump(test, fp)
# with open('testing_labels(latest).obj', 'wb') as fp:
# pickle.dump(Y2, fp)
with open ('testing_data(latest).obj', 'rb') as fp:
test = pickle.load(fp)
with open ('testing_labels(latest).obj', 'rb') as fp:
Y2 = pickle.load(fp)
# vectorizer = CountVectorizer(analyzer = "word",tokenizer = None, preprocessor = None, stop_words = None, max_features = 2000)
# # # fit_transform() does two functions: First, it fits the model
# # # and learns the vocabulary; second, it transforms our training data
# # # into feature vectors. The input to fit_transform should be a list of
# # # strings.
#
# train = vectorizer.fit_transform(train)
# test = vectorizer.transform(test)
tokenizer = Tokenizer(split=' ')
tokenizer.fit_on_texts(train)
train = tokenizer.texts_to_sequences(train)
max_words = 134
train = pad_sequences(train, maxlen=max_words)
tokenizer.fit_on_texts(test)
test = tokenizer.texts_to_sequences(test)
test = pad_sequences(test, maxlen=max_words)
print('Extracting features & training batches')
print("Training...")
embedding_size=32
model = Sequential()
model.add(Embedding(606884, 70, input_length=134))
model.add(Dropout(0.4))
model.add(LSTM(128))
model.add(Dense(64))
model.add(Dropout(0.5))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
print(model.summary())
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
batch_size = 100
num_epochs = 8
model.fit(train, np.array(Y), batch_size=batch_size, epochs=num_epochs ,validation_split=0.2,shuffle=True,verbose=2)
# Save the weights
model.save_weights('LSTM_model_weights_updated.h5')
# Save the model architecture
with open('LSTM_model_updated.json', 'w') as f:
f.write(model.to_json())
# #
# Model reconstruction from JSON file
# with open(os.path.join(fileDir, '../Dataset/LSTM_model.json'), 'r') as f:
# model = model_from_json(f.read())
#
# # Load weights into the new model
# model.load_weights(os.path.join(fileDir, '../Dataset/LSTM_model_weights.h5'))
# model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
scores = model.evaluate(test, np.array(Y2))
print('Evaluation Test accuracy:', scores[1])
count=0
sum=0
#
#
b=model.predict(test)
for a in b:
print(count)
if a<0.5:
sum = sum + abs(Y2[count] - 0) # error finding
else:
sum=sum+ abs(Y2[count]-1) #error finding
count+=1
acc=100-((sum/156884)*100)
print ("Accuracy=",acc,"count",count)

Total params: 42,592,089
Trainable params: 42,592,089
Non-trainable params: 0
Your model has more than 42 million trainable parameters which is too much for your machine's configuration (CPU, RAM, etc.), thus it can't handle it. What are the options?
Use smaller model
Use a better more powerful computer (with GPU of course)
Consider using an online cloud solution like crestle or paperspace

Related

Discrepancy between tensorflow model accuracy and actual accuracy

I am trying to do categorical classification of data. The data consists of 3 text variables, and one real value. I split the data into three sets - training, validation and testing. I am using tensorflow and python. For the test data I get the following stats: test data statistics
A categorical accuracy of 0.9919, however when I perform a prediction on the same test data, and evaluate the accuracy with the sci-kit classification_report function, I get an accuracy of 0.60, as seen here: classification report.
df.info() looks like this ('category' being the Y value): info. I don't think that the sci-kit learn accuracy statistic is misrepresenting the prediction, since the confusion matrix looks like this.
I have re-built the model multiple times, and tried balancing classes with class weights just in case, however this still wouldn't explain the discrepancy between tensorflows accuracy, and the sci-kit one (gained by prediction).
The code:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text
import tensorflow_addons as tfa
import tensorflow_datasets as tfds
df = pd.read_csv("procData_nosub.csv")
df = df.sample(frac=1).reset_index(drop=True) # Shuffling
df = df[(df.category != 'Pictures')] # Removing small categories
df = df[(df.category != 'Software')]
df = df.drop("fileAmount", axis=1)
df = df.drop("more100Files", axis=1)
train, val, test = np.split(df.sample(frac=1), [int(0.8 * len(df)), int(0.9 * len(df))])
# Function to convert dataframe to dataset (from https://www.tensorflow.org/tutorials/structured_data/preprocessing_layers)
def df_to_dataset(dataframe, shuffle=True, batch_size=64):
df = dataframe.copy()
labels = df.pop('category')
df = {key: value[:,tf.newaxis] for key, value in df.items()}
ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(df))
ds = ds.batch(batch_size)
ds = ds.prefetch(batch_size)
return ds
train_data = df_to_dataset(train)
test_data = df_to_dataset(test)
validation_data = df_to_dataset(val)
# Function to convert text (for the Y) to one-hot-encoding
catVals = np.unique(df['category'])
table = tf.lookup.StaticHashTable(
initializer = tf.lookup.KeyValueTensorInitializer(
keys = tf.constant(catVals),
values = list(range(len(catVals)))
),
default_value = -1,
name = "target_encoding"
)
#tf.function
def target(x):
return table.lookup(x)
def fetch(features, labels):
return features, tf.one_hot(target(labels), len(catVals))
# Applying the text (Y) -> one-hot-encoding
train_data_f = train_data.map(fetch)
test_data_f = test_data.map(fetch)
validation_data_f = validation_data.map(fetch)
# Using an encoder
embedding = "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3"
hub_layer = hub.KerasLayer(embedding, output_shape = 512, input_shape = [], dtype=tf.string, trainable=True)
# Normalizing real values (from https://www.tensorflow.org/tutorials/structured_data/preprocessing_layers)
def get_normalization_layer(name, dataset):
normalizer = tf.keras.layers.Normalization(axis=None)
feature_ds = dataset.map(lambda x, y: x[name])
normalizer.adapt(feature_ds)
return normalizer
all_inputs = []
encoded_features = []
# Adding filesize to features
for header in ['fileSize']: # fileAmount
numeric_col = tf.keras.Input(shape=(1,), name=header)
normalization_layer = get_normalization_layer(header, train_data_f)
encoded_numeric_col = normalization_layer(numeric_col)
all_inputs.append(numeric_col)
encoded_features.append(encoded_numeric_col)
# Adding title, description, files to features
for header in ['title', 'description', 'files']:
text_col = tf.keras.Input(shape=(), name=header, dtype='string')
encoded_text_col = hub_layer(text_col)
all_inputs.append(text_col)
encoded_features.append(encoded_text_col)
# Describing the model
all_features = tf.keras.layers.concatenate(encoded_features)
x = all_features
for i in range(3):
x = tf.keras.layers.Dense(16, activation='relu')(x)
# x = tf.keras.layers.Dropout(0.2)(x)
output = tf.keras.layers.Dense(len(catVals), activation='softmax')(x)
model = tf.keras.Model(all_inputs, output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
loss=tf.keras.losses.CategoricalCrossentropy(), # deleted from_logits=True
metrics=["categorical_accuracy",
tf.keras.metrics.Precision(),
tf.keras.metrics.Recall(),
tfa.metrics.F1Score(num_classes=len(catVals),
average='macro',
threshold=0.5)])
history = model.fit(train_data_f, epochs=5, validation_data=validation_data_f) # removed class weights
model.evaluate(test_data_f)
model.evaluate(validation_data_f)
# Taking x and y from the test data
test_x = test_data_f.unbatch().map(lambda x, y: x)
test_y = test_data_f.unbatch().map(lambda x, y: y)
test_predicted = model.predict(test_x)
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
# converting test_y to a numpy array (from a tensorflow Dataset)
test_y = np.array([x for x in test_y])
print(classification_report(test_y.argmax(1), test_predicted.argmax(1)))
print(tf.math.confusion_matrix(test_y.argmax(1), test_predicted.argmax(1)))
As a TLDR: I am using universal-sentence-encoder-multilingual/3 from tensorflow hub, and the model looks like this: model build, fit history. Model summary:
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
fileSize (InputLayer) [(None, 1)] 0 []
title (InputLayer) [(None,)] 0 []
description (InputLayer) [(None,)] 0 []
files (InputLayer) [(None,)] 0 []
normalization (Normalization) (None, 1) 3 ['fileSize[0][0]']
keras_layer (KerasLayer) (None, 512) 68927232 ['title[0][0]',
'description[0][0]',
'files[0][0]']
concatenate (Concatenate) (None, 1537) 0 ['normalization[0][0]',
'keras_layer[0][0]',
'keras_layer[1][0]',
'keras_layer[2][0]']
dense (Dense) (None, 16) 24608 ['concatenate[0][0]']
dense_1 (Dense) (None, 16) 272 ['dense[0][0]']
dense_2 (Dense) (None, 16) 272 ['dense_1[0][0]']
dense_3 (Dense) (None, 4) 68 ['dense_2[0][0]']
==================================================================================================
Total params: 68,952,455
Trainable params: 68,952,452
Non-trainable params: 3
__________________________________________________________________________________________________

model.fit() error message when trying to train 1D CNN

I am trying to train a 1D-CNN on tabular data to then use LRP on it (as it has been done here). I am stuck at the model.fit() part of the implementation. Up to that point everything seems to work.
Here is the error I get:
model_history = model.fit(X_train_smote, y_train_smote, batch_size=100, epochs=100, validation_split = 0.2)
Epoch 1/100
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [60], in <cell line: 1>()
----> 1 model_history = model.fit(X_train_smote, y_train_smote, batch_size=100, epochs=100, validation_split = 0.2)
File D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\utils\traceback_utils.py:67, in filter_traceback.<locals>.error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
File ~\AppData\Local\Temp\__autograph_generated_filelswqetod.py:15, in outer_factory.<locals>.inner_factory.<locals>.tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
ValueError: in user code:
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 1051, in train_function *
return step_function(self, iterator)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 1040, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 1030, in run_step **
outputs = model.train_step(data)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 890, in train_step
loss = self.compute_loss(x, y, y_pred, sample_weight)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 948, in compute_loss
return self.compiled_loss(
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 139, in __call__
losses = call_fn(y_true, y_pred)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 243, in call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 1787, in categorical_crossentropy
return backend.categorical_crossentropy(
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\backend.py", line 5119, in categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
ValueError: Shapes (None,) and (None, 6, 2) are incompatible
Yet, for the sake of clarity here is the rest of my current implementation (adapted from a Kaggle notebook). The model structure is based on the feedback of my previous question here on SO.
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.colors import ListedColormap
from sklearn.metrics import plot_confusion_matrix
from scipy.stats import norm, boxcox
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from collections import Counter
from scipy import stats
import tensorflow as tf
import warnings
warnings.simplefilter(action='ignore', category=Warning)
dataset = pd.read_csv('F:/Programmieren/this_one/data/Churn_Modelling.csv')
# split into variables and target
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values
# here gender is encoded
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])
# one hot encoding the country (as explained in Ullah et al.)
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
# split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .2)
# upsample minority class with SMOTE
import imblearn
from imblearn.over_sampling import SMOTENC
#get original class distribution
counter = Counter(y)
print(counter)
#SMOTENC is used instead of SMOTE because there are multiple categorical features in the dataset
oversample = SMOTENC(categorical_features=[0, 1, 2, 4, 9, 10])
X_train_smote, y_train_smote = oversample.fit_resample(X_train, y_train)
#get new class distribution
counter = Counter(y_train_smote)
print(counter)
# normalize values to range 0-1 as explained in Ullah et al.
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
X_train_smote = mms.fit_transform(X_train_smote)
X_test = mms.transform(X_test)
#record-wise normalization for relative value comparison as stated by one of the authors I was in contact with
from sklearn.preprocessing import normalize
X_train_smote = normalize(X_train_smote, axis=1, norm='l1')
X_test = normalize(X_test, axis=1, norm='l1')
#reshape data for CNN
sample_size = X_train_smote.shape[0] # number of samples in train set
time_steps = X_train_smote.shape[1] # number of features in train set
input_dimension = 1
train_data_reshaped = X_train_smote.reshape(sample_size,time_steps,input_dimension)
print("After reshape train data set shape:\n", train_data_reshaped.shape)
print("1 Sample shape:\n",train_data_reshaped[0].shape)
print("An example sample:\n", train_data_reshaped[0])
#reshape test data as well
test_data_reshaped = X_test.reshape(X_test.shape[0],X_test.shape[1],1)
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv1D
#create model as explained in the paper
model = Sequential()
model.add(Conv1D(filters=25, kernel_size=3, activation='relu', input_shape=(12,1)))
model.add(Conv1D(50, 3))
model.add(Conv1D(100, 3))
model.add(Dense(2200, activation='relu'))
model.add(Dense(2, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()
#output of model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1d (Conv1D) (None, 10, 25) 100
conv1d_1 (Conv1D) (None, 8, 50) 3800
conv1d_2 (Conv1D) (None, 6, 100) 15100
dense (Dense) (None, 6, 2200) 222200
dense_1 (Dense) (None, 6, 2) 4402
dense_2 (Dense) (None, 6, 2) 6
=================================================================
Total params: 245,608
Trainable params: 245,608
Non-trainable params: 0
_________________________________________________________________
Are there any major flaws in my current approach that I am not aware of?
You should add a Flatten() layer before the output layer as the output shape is (6,2) and not (2,).
#Add Flatten layer
model.add(Flatten())
model.add(Dense(2, activation='softmax'))
Also kindly change the loss to sparse_categorical_crossentopy because you are using integers(0,1) as the labels.
#change loss to sparse_categorical_crossentropy
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
Please refer to the gist for working code. Thank you!

Input 0 of layer "dense_14" is incompatible with the layer: expected axis -1 of input shape to have value 148, but received input with shape (32, 21)

I'm trying to run a script using my data, but I get this error:
Input 0 of layer "dense_14" is incompatible with the layer: expected axis -1 of input shape to have value 148, but received input with shape (32, 21).
I understand that the shapes of my data which are ((11598, 1)) aren't compatible with the model shapes. If anyone can help ?
This is the code:
from __future__ import print_function, division
import tensorflow as tf
from keras.layers import Input, Dense, Activation
from keras.layers import Maximum, Concatenate
from keras.models import Model
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, BatchNormalization
from keras.callbacks import TensorBoard
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
class MalGAN():
def __init__(self):
self.apifeature_dims = 128
self.z_dims = 20
self.hide_layers = 256
self.generator_layers = [self.apifeature_dims+self.z_dims, self.hide_layers, self.apifeature_dims]
self.substitute_detector_layers = [self.apifeature_dims, self.hide_layers, 1]
self.blackbox = 'MLP'
optimizer = Adam(lr=0.001)
# Build and Train blackbox_detector
self.blackbox_detector = self.build_blackbox_detector()
# Build and compile the substitute_detector
self.substitute_detector = self.build_substitute_detector()
self.substitute_detector.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
# Build the generator
self.generator = self.build_generator()
# The generator takes malware and noise as input and generates adversarial malware examples
#example = Input(shape=(None,1),name='exampe',dtype='float32')
# noise = Input(shape=(None,1),name='noise',dtype='float32')
example = Input(shape=(self.apifeature_dims,))
noise = Input(shape=(self.z_dims,))
input = [example, noise]
malware_examples = self.generator(input)
# For the combined model we will only train the generator
self.substitute_detector.trainable = True
# The discriminator takes generated images as input and determines validity
validity = self.substitute_detector(malware_examples)
# The combined model (stacked generator and substitute_detector)
# Trains the generator to fool the discriminator
self.combined = Model(input, validity)
self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)
def build_blackbox_detector(self):
if self.blackbox is 'MLP':
blackbox_detector = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4,
solver='sgd', verbose=0, tol=1e-4, random_state=1,
learning_rate_init=.1)
return blackbox_detector
def build_generator(self):
#example = Input(shape=(None,1))
# noise = Input(shape=(None,1))
example = Input(shape=(self.apifeature_dims,))
noise = Input(shape=(self.z_dims,))
#example = Input(shape=(256),name='exampe',dtype='float32')
#noise = Input(shape=(256),name='noise',dtype='float32')
x = Concatenate(axis=1)([example, noise])
for dim in self.generator_layers[1:]:
x = Dense(dim)(x)
x = Activation(activation='sigmoid')(x)
x = Maximum()([example, x])
generator = Model([example, noise], x, name='generator')
generator.summary()
return generator
def build_substitute_detector(self):
input = Input(shape=(self.substitute_detector_layers[0],))
#input = Input(shape=(5,256),dtype='float32')
x = input
for dim in self.substitute_detector_layers[1:]:
x = Dense(dim)(x)
x = Activation(activation='sigmoid')(x)
substitute_detector = Model(input, x, name='substitute_detector')
substitute_detector.summary()
return substitute_detector
def load_data(self, filename):
# data = pd.read_csv("feature_vectors_system calls.csv")
data = np.load(filename)
xmal, ymal, xben, yben = data['arr_0'].reshape(-1,1), data['arr_1'].reshape(-1,1), data['arr_2'].reshape(-1,1), data['arr_3'].reshape(-1,1)
return (xmal, ymal), (xben, yben)
def train(self, epochs, batch_size=128):
# Load the dataset
(xmal, ymal), (xben, yben) = self.load_data('my_data.npz')
xtrain_mal, xtest_mal, ytrain_mal, ytest_mal = train_test_split(xmal, ymal, test_size=0.20)
xtrain_ben, xtest_ben, ytrain_ben, ytest_ben = train_test_split(xben, yben, test_size=0.20)
print("xmal is : ", xmal.shape)
print("ymal is : ", ymal.shape)
print("xben is : ",xben.shape)
print("yben is ", yben.shape)
print("here 1")
# Train blackbox_detctor
self.blackbox_detector.fit(np.concatenate([xmal, xben]),
np.concatenate([ymal, yben]))
ytrain_ben_blackbox = self.blackbox_detector.predict(xtrain_ben)
Original_Train_TRR = self.blackbox_detector.score(xtrain_mal, ytrain_mal)
Original_Test_TRR = self.blackbox_detector.score(xtest_mal, ytest_mal)
print("here 2")
Train_TRR, Test_TRR = [], []
for epoch in range(epochs):
for step in range(1):#range(xtrain_mal.shape[0] // batch_size):
# ---------------------
# Train substitute_detector
# ---------------------
# Select a random batch of malware examples
idx = np.random.randint(0, xtrain_mal.shape[0],batch_size)
xmal_batch = xtrain_mal[idx]
noise = np.random.uniform(0, 1, (batch_size, self.z_dims))
idx = np.random.randint(0, xmal_batch.shape[0], batch_size)
xben_batch = xtrain_ben[idx]
yben_batch = ytrain_ben_blackbox[idx]
# Generate a batch of new malware examples
# noise.shape = (320,2)
#xmal_batch = np.asarray(xmal_batch).astype(np.float32)
#noise = np.asarray(noise).astype(np.float32)
# xmal_batch.shape = (32,1)
# noise.shape= (32,20)
print("xmal is: ", xmal_batch.shape)
print("noise is:", noise.shape)
#xmal_batch = tf.convert_to_tensor(xmal_batch)
#noise = tf.convert_to_tensor(noise)
xmal_batch.shape = (128,1)
noise.shape= (128,20)
#print("xmal is:", xmal)
#print("noise is:", noise)
# xmal_batch = np.asarray(xmal_batch).astype(np.float32)
noise = np.asarray(noise).astype(np.float32)
gen_examples = self.generator.predict([xmal_batch, noise])
# gen_examples = np.vectorize(gen_examples)
# gen_examples = np.array(gen_examples)
# gen_examples = gen_examples.reshape(gen_examples.shape[0],-1)
# print(gen_examples.shape)
gen_examples = gen_examples.reshape(-1,1)
ymal_batch = self.blackbox_detector.predict(np.ones(gen_examples.shape)*(gen_examples > 0.5))
print("ymal shape is:", ymal_batch.shape)
print("gen_examples is:", gen_examples.shape)
# gen_examples.shape = (2688,128)
# Train the substitute_detector
ymal_batch = np.array([0 for _ in range(len(gen_examples)//2)] + [1 for _ in range(len(gen_examples)//2)])
d_loss_real = self.substitute_detector.train_on_batch((gen_examples), ymal_batch)
print("yben shape is:", yben_batch.shape)
print("xben shape is:", xben_batch.shape)
# xben_batch.shape =(1,128)
# yben_batch.shape=(128,1)
print("yben shape is:", yben_batch.shape)
print("xben shape is:", xben_batch.shape)
d_loss_fake = self.substitute_detector.train_on_batch(xben_batch, yben_batch)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# ---------------------
# Train Generator
# ---------------------
idx = np.random.randint(0, xtrain_mal.shape[0], batch_size)
xmal_batch = xtrain_mal[idx]
noise = np.random.uniform(0, 1, (batch_size, self.z_dims))
# Train the generator
g_loss = self.combined.train_on_batch([xmal_batch, noise], np.zeros((batch_size, 1)))
# Compute Train TRR
noise = np.random.uniform(0, 1, (xtrain_mal.shape[0], self.z_dims))
gen_examples = self.generator.predict([xtrain_mal, noise])
TRR = self.blackbox_detector.score(np.ones(gen_examples.shape) * (gen_examples > 0.5), ytrain_mal)
Train_TRR.append(TRR)
# Compute Test TRR
noise = np.random.uniform(0, 1, (xtest_mal.shape[0], self.z_dims))
gen_examples = self.generator.predict([xtest_mal, noise])
TRR = self.blackbox_detector.score(np.ones(gen_examples.shape) * (gen_examples > 0.5), ytest_mal)
Test_TRR.append(TRR)
# Plot the progress
print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
print('Original_Train_TRR: {0}, Adver_Train_TRR: {1}'.format(Original_Train_TRR, Train_TRR[-1]))
print('Original_Test_TRR: {0}, Adver_Test_TRR: {1}'.format(Original_Test_TRR, Test_TRR[-1]))
# Plot TRR
plt.figure()
plt.plot(range(epochs), Train_TRR, c='r', label='Training Set', linewidth=2)
plt.plot(range(epochs), Test_TRR, c='g', linestyle='--', label='Validation Set', linewidth=2)
plt.xlabel("Epoch")
plt.ylabel("TRR")
plt.legend()
plt.show()
if __name__ == '__main__':
malgan = MalGAN()
malgan.train(epochs=1000, batch_size=128)
The output is:
Model: "substitute_detector"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_16 (InputLayer) [(None, 128)] 0
dense_12 (Dense) (None, 256) 33024
activation_12 (Activation) (None, 256) 0
dense_13 (Dense) (None, 1) 257
activation_13 (Activation) (None, 1) 0
=================================================================
Total params: 33,281
Trainable params: 33,281
Non-trainable params: 0
_________________________________________________________________
Model: "generator"
__________________________________________________________________________________________________
/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/adam.py:105: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
super(Adam, self).__init__(name, **kwargs)
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_17 (InputLayer) [(None, 128)] 0 []
input_18 (InputLayer) [(None, 20)] 0 []
concatenate_3 (Concatenate) (None, 148) 0 ['input_17[0][0]',
'input_18[0][0]']
dense_14 (Dense) (None, 256) 38144 ['concatenate_3[0][0]']
activation_14 (Activation) (None, 256) 0 ['dense_14[0][0]']
dense_15 (Dense) (None, 128) 32896 ['activation_14[0][0]']
activation_15 (Activation) (None, 128) 0 ['dense_15[0][0]']
maximum_3 (Maximum) (None, 128) 0 ['input_17[0][0]',
'activation_15[0][0]']
==================================================================================================
Total params: 71,040
Trainable params: 71,040
Non-trainable params: 0
__________________________________________________________________________________________________
xmal is : (11598, 1)
ymal is : (11598, 1)
xben is : (11598, 1)
yben is (11598, 1)
here 1
after which I get
/usr/local/lib/python3.7/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:1109: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
/usr/local/lib/python3.7/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:696: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (10) reached and the optimization hasn't converged yet.
ConvergenceWarning,
here 2
xmal is: (128, 1)
noise is: (128, 20)
WARNING:tensorflow:Model was constructed with shape (None, 128) for input KerasTensor(type_spec=TensorSpec(shape=(None, 128), dtype=tf.float32, name='input_17'), name='input_17', description="created by layer 'input_17'"), but it was called on an input with incompatible shape (32, 1).
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-937d6995f5c4> in <module>()
240 if __name__ == '__main__':
241 malgan = MalGAN()
--> 242 malgan.train(epochs=1000, batch_size=128)
2 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in autograph_handler(*args, **kwargs)
1145 except Exception as e: # pylint:disable=broad-except
1146 if hasattr(e, "ag_error_metadata"):
-> 1147 raise e.ag_error_metadata.to_exception(e)
1148 else:
1149 raise
ValueError: in user code:
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1801, in predict_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1790, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1783, in run_step **
outputs = model.predict_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1751, in predict_step
return self(x, training=False)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/usr/local/lib/python3.7/dist-packages/keras/engine/input_spec.py", line 249, in assert_input_compatibility
f'Input {input_index} of layer "{layer_name}" is '
ValueError: Exception encountered when calling layer "generator" (type Functional).
Input 0 of layer "dense_14" is incompatible with the layer: expected axis -1 of input shape to have value 148, but received input with shape (32, 21)
Call arguments received:
• inputs=('tf.Tensor(shape=(32, 1), dtype=int64)', 'tf.Tensor(shape=(32, 20), dtype=float32)')
• training=False
• mask=None

ValueError while performing 'binary-crossentropy' ==> array shapes not matching

I was using tensorflow + keras while trying to implement a "Text classification" model to classify different types of movie reviews. I am running into a error which tells that the shapes aren't equal.
Because I am not sure where the error might be hidden, I can't produce a reprex example as i am not certain how to isolate the problem. It may be worth your time if the line with variable(x_val) is there as there may be a problem with the partitioning.
note this is not the final code. as I had already encountered an error at this point, i stopped writing it.
from __future__ import absolute_import, division, print_function
import numpy as np
import tensorflow as tf
from tensorflow import keras
imdb = keras.datasets.imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)
#print ("The length if training data: "len(train_data[0]), "And labels: "len(test_data[0]))
word_index = imdb.get_word_index()
word_index = {k: (v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNKNOWN>"] = 2
word_index["<END>"] = 3
reverse_word_index = dict([(value, key) for (key, value) in word_index.items() ])
def decode_review(text):
return (' '.join([reverse_word_index.get(i , "?") for i in text ]))
print (decode_review(train_data[0]))
train_data = keras.preprocessing.sequence.pad_sequences(train_data,
value=word_index["<PAD>"],
padding="post",
maxlen=256)
test_data = keras.preprocessing.sequence.pad_sequences(test_data,
value=word_index["<PAD>"],
padding="post",
maxlen=256)
#print ('train length :' ,len(train_data[0]), 'test length: ', len(train_data[1]))
vocab_size = 10000
model = keras.Sequential()
model.add(keras.layers.Embedding(vocab_size , 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation=tf.nn.relu))
model.add(keras.layers.Dense(16, activation=tf.nn.sigmoid))
print ("the model summary is :======>>" , model.summary())
model.compile(optimizer="adam" , loss="binary_crossentropy", metrics=["acc"])
x_val = train_data[:10000]
partial_x_train = train_data[10000:]
y_val = train_labels[:10000]
partial_y_train = train_labels[10000:]
history = model.fit(partial_x_train , partial_y_train , epochs=40 , batch_size=512,
validation_data=(x_val, y_val), verbose=1)
This is the error message I was getting:-----
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_1 (Embedding) (None, None, 16) 160000
_________________________________________________________________
global_average_pooling1d_1 ( (None, 16) 0
_________________________________________________________________
dense_2 (Dense) (None, 16) 272
_________________________________________________________________
dense_3 (Dense) (None, 16) 272
=================================================================
Total params: 160,544
Trainable params: 160,544
Non-trainable params: 0
_________________________________________________________________
the model summary is :======>> None
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py:180: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-18-02082e1f39d4> in <module>()
57
58 history = model.fit(partial_x_train , partial_y_train , epochs=40 , batch_size=512,
---> 59 validation_data=(x_val, y_val), verbose=1)
ValueError: A target array with shape (15000, 1) was passed for an output of shape (None, 16) while using as loss `binary_crossentropy`. This loss expects targets to have the same shape as the output.
You need to update the final/output layer of your model. Since it's a binary classification problem, the output Dense layer should have one node like follows:
model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid))
You may want to check out this tutorial on text classification using IMDB dataset.

How to graph tf.keras model in Tensorflow-2.0?

I upgraded to Tensorflow 2.0 and there is no tf.summary.FileWriter("tf_graphs", sess.graph). I was looking through some other StackOverflow questions on this and they said to use tf.compat.v1.summary etc. Surely there must be a way to graph and visualize a tf.keras model in Tensorflow version 2. What is it? I'm looking for a tensorboard output like the one below. Thank you!
You can visualize the graph of any tf.function decorated function, but first, you have to trace its execution.
Visualizing the graph of a Keras model means to visualize it's call method.
By default, this method is not tf.function decorated and therefore you have to wrap the model call in a function correctly decorated and execute it.
import tensorflow as tf
model = tf.keras.Sequential(
[
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(32, activation="relu"),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation="softmax"),
]
)
#tf.function
def traceme(x):
return model(x)
logdir = "log"
writer = tf.summary.create_file_writer(logdir)
tf.summary.trace_on(graph=True, profiler=True)
# Forward pass
traceme(tf.zeros((1, 28, 28, 1)))
with writer.as_default():
tf.summary.trace_export(name="model_trace", step=0, profiler_outdir=logdir)
According to the docs, you can use Tensorboard to visualise graphs once your model has been trained.
First, define your model and run it. Then, open Tensorboard and switch to the Graph tab.
Minimal Compilable Example
This example is taken from the docs. First, define your model and data.
# Relevant imports.
%load_ext tensorboard
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import datetime
from packaging import version
import tensorflow as tf
from tensorflow import keras
# Define the model.
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(32, activation='relu'),
keras.layers.Dropout(0.2),
keras.layers.Dense(10, activation='softmax')
])
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
(train_images, train_labels), _ = keras.datasets.fashion_mnist.load_data()
train_images = train_images / 255.0
Next, train your model. Here, you will need to define a callback for Tensorboard to use for visualising stats and graphs.
# Define the Keras TensorBoard callback.
logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
# Train the model.
model.fit(
train_images,
train_labels,
batch_size=64,
epochs=5,
callbacks=[tensorboard_callback])
After training, in your notebook, run
%tensorboard --logdir logs
And switch to the Graph tab in the navbar:
You will see a graph that looks a lot like this:
Here is the solution for tf2.x with Graph visualization of subclassed model/layer
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model,Input
class MyModel(Model):
def __init__(self, dim):
super(MyModel, self).__init__()
self.conv1 = Conv2D(16, 3, activation='relu')
self.conv2 = Conv2D(32, 3, activation='relu')
self.conv3 = Conv2D(8, 3, activation='relu')
self.flatten = Flatten()
self.d1 = Dense(128, activation='relu')
self.d2 = Dense(1)
def call(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
def build_graph(self):
x = Input(shape=(dim))
return Model(inputs=[x], outputs=self.call(x))
dim = (28, 28, 1)
# Create an instance of the model
model = MyModel((dim))
model.build((None, *dim))
model.build_graph().summary()
tf.keras.utils.plot_model(model.build_graph(), to_file="model.png",
expand_nested=True, show_shapes=True)
the output is
TensorFlow version: 2.5.0
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 28, 28, 1)] 0
_________________________________________________________________
conv2d (Conv2D) (None, 26, 26, 16) 160
_________________________________________________________________
conv2d_1 (Conv2D) (None, 24, 24, 32) 4640
_________________________________________________________________
conv2d_2 (Conv2D) (None, 22, 22, 8) 2312
_________________________________________________________________
flatten (Flatten) (None, 3872) 0
_________________________________________________________________
dense (Dense) (None, 128) 495744
_________________________________________________________________
dense_1 (Dense) (None, 1) 129
=================================================================
Total params: 502,985
Trainable params: 502,985
Non-trainable params: 0
Here is also a graph visualization
Here's what is working for me at the moment (TF 2.0.0), based on the tf.keras.callbacks.TensorBoard code:
# After model has been compiled
from tensorflow.python.ops import summary_ops_v2
from tensorflow.python.keras.backend import get_graph
tb_path = '/tmp/tensorboard/'
tb_writer = tf.summary.create_file_writer(tb_path)
with tb_writer.as_default():
if not model.run_eagerly:
summary_ops_v2.graph(get_graph(), step=0)
Another option is to use this website: https://lutzroeder.github.io/netron/
which generate a graph with a .h5 or .tflite file.
The github repo it's based on may be found here:
https://github.com/lutzroeder/netron

Resources