I'm going through some tutorials using the Keras functional API in Tensorflow 2, and I'm having some trouble including BatchNormalization layers when using the functional API.
Using roughly the same code:
This network trains with the sequential API and batch normalization
This network trains with the functional API, but commenting out the batch normalization layers
This network does not train using the functional API and batch normalization layers
Am I missing a step somewhere? Do I set training=true or training=false somewhere in the code?
Working Sequential Code:
#subclassed layers in keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import SeparableConv2D
from tensorflow.keras.layers import BatchNormalization
import numpy as np
import logging
tf.get_logger().setLevel(logging.ERROR)
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import matplotlib.pyplot as plt
%matplotlib inline
cifar_dataset = keras.datasets.cifar10
(train_images, train_labels), (test_images,
test_labels) = cifar_dataset.load_data()
EPOCHS = 128
BATCH_SIZE = 128
#standardize dataset
mean = np.mean(train_images)
stdev = np.std(train_images)
train_images = (train_images - mean)/stdev
test_images = (test_images - mean)/stdev
#change labels to one-hot
train_labels = to_categorical(train_labels, num_classes=10)
test_labels = to_categorical(test_labels, num_classes=10)
# Keras model subclassing: build your own layers
#CNN -> batch norm -> Relu
#create a class for this kind of block
class CNNBlock(layers.Layer):#inherits from layers.Layer - keeps track of what we need for back propagation
def __init__(self, out_channels, kernel_size=3, strides=(1,1)): #needs both __init__ and call functions, initialize layer
super(CNNBlock, self).__init__() #superclass layers.Layer with our new class
self.conv = layers.Conv2D(out_channels, kernel_size, padding='same',
kernel_initializer='he_normal',bias_initializer='zeros')#initialize the conv portion of this class
self.bn = layers.BatchNormalization()#initialize batch normalization in this block
def call(self, input_tensor, training=False): #what happens when this block is encountered, specify training bool for traning/evaluation
#call method (forward method in pytorch)
#take input tensor, run it though our initialized layers in __init__
x = self.conv(input_tensor)#run convolution operation
x = self.bn(x, training=training)#batch norm
x = tf.nn.relu(x)#activation function for this layer
return x
class CNNBlock_init(layers.Layer):#inherits from layers.Layer - keeps track of what we need for back propagation
def __init__(self, out_channels, input_size, kernel_size=3): #needs both __init__ and call functions, initialize layer
super(CNNBlock_init, self).__init__() #superclass layers.Layer with our new class - make sure new class name matches
self.input_size = input_size
self.conv = layers.Conv2D(out_channels, kernel_size,
input_shape=input_size, #first layer needs input shape to build properly
padding='same')#initialize the conv portion of this class
self.bn = layers.BatchNormalization()#initialize batch normalization in this block
def call(self, input_tensor, training=False): #what happens when this block is encountered, specify training bool for traning/evaluation
#call method (forward method in pytorch)
#take input tensor, run it though our initialized layers in __init__
x = self.conv(input_tensor,input_shape=self.input_size)#run convolution operation
x = self.bn(x, training=training)#batch norm
x = tf.nn.relu(x)#activation function for this layer
return x
#build model with this
model = keras.Sequential(
[
CNNBlock(64,kernel_size=4,strides=(2,2)),
Dropout(0.2),
CNNBlock(64,kernel_size=2,strides=(2,2)),
Dropout(0.2),
CNNBlock(32),
Dropout(0.2),
CNNBlock(32),
MaxPooling2D(pool_size=(2,2), strides=2),
Dropout(0.2),
Flatten(),
Dense(64, activation='relu',#dense layers to combine features
kernel_initializer='he_normal',
bias_initializer='zeros'),
Dropout(0.2),
Dense(10, activation='softmax',#softmax for classification
kernel_initializer='glorot_uniform',
bias_initializer='zeros')
])
#compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
#model.build(input_shape=(32,32,3))
#model.summary()
#train model
history = model.fit(
train_images, train_labels,
validation_data=(test_images,test_labels),
epochs=EPOCHS, batch_size=BATCH_SIZE,
verbose=1, shuffle=True) #verbose 1 is cool gives time for each epoch
#evaluate model
import matplotlib.pyplot as plt
%matplotlib inline
def plot_error(history):
history_dict_vals = history.__dict__['history']
history_x = history.epoch
plt.plot(history_x,history_dict_vals['accuracy'],'r-', label='training accuracy')
plt.plot(history_x,history_dict_vals['val_accuracy'],'g-', label='test accuracy')
plt.axis([0,len(history_x),0.0,1])
plt.xlabel('training epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()
print(f"Final test accuracy = {history_dict_vals['val_accuracy'][-1]}")
plot_error(history)
Working Functional Code:
# same convolutional structure but with the keras functional API
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import SeparableConv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
import numpy as np
import logging
tf.get_logger().setLevel(logging.ERROR)
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import matplotlib.pyplot as plt
%matplotlib inline
cifar_dataset = keras.datasets.cifar10
(train_images, train_labels), (test_images,
test_labels) = cifar_dataset.load_data()
EPOCHS = 128
BATCH_SIZE = 128
#standardize dataset
mean = np.mean(train_images)
stdev = np.std(train_images)
train_images = (train_images - mean)/stdev
test_images = (test_images - mean)/stdev
#change labels to one-hot
train_labels = to_categorical(train_labels, num_classes=10)
test_labels = to_categorical(test_labels, num_classes=10)
# Keras model subclassing: build your own layers
#CNN -> batch norm -> Relu
#create a class for this kind of block
class CNNBlock(layers.Layer):#inherits from layers.Layer - keeps track of what we need for back propagation
def __init__(self, out_channels, kernel_size=3, strides=(1,1)): #needs both __init__ and call functions, initialize layer
super(CNNBlock, self).__init__() #superclass layers.Layer with our new class
self.conv = layers.Conv2D(out_channels, kernel_size, padding='same',
kernel_initializer='he_normal',bias_initializer='zeros')#initialize the conv portion of this class
#self.bn = layers.BatchNormalization()#initialize batch normalization in this block
def call(self, input_tensor, training=False): #what happens when this block is encountered, specify training bool for traning/evaluation
#call method (forward method in pytorch)
#take input tensor, run it though our initialized layers in __init__
x = self.conv(input_tensor)#run convolution operation
#x = self.bn(x, training=training)#batch norm
x = tf.nn.relu(x)#activation function for this layer
return x
class CNNBlock_init(layers.Layer):#inherits from layers.Layer - keeps track of what we need for back propagation
def __init__(self, out_channels, input_size, kernel_size=3): #needs both __init__ and call functions, initialize layer
super(CNNBlock_init, self).__init__() #superclass layers.Layer with our new class - make sure new class name matches
self.input_size = input_size
self.conv = layers.Conv2D(out_channels, kernel_size,
input_shape=input_size, #first layer needs input shape to build properly
padding='same')#initialize the conv portion of this class
#self.bn = layers.BatchNormalization()#initialize batch normalization in this block
def call(self, input_tensor, training=False): #what happens when this block is encountered, specify training bool for traning/evaluation
#call method (forward method in pytorch)
#take input tensor, run it though our initialized layers in __init__
x = self.conv(input_tensor,input_shape=self.input_size)#run convolution operation
#x = self.bn(x, training=training)#batch norm
x = tf.nn.relu(x)#activation function for this layer
return x
#build model with this
#Build the model with the Keras functional API
input_shape = (32,32,3)
chanDim = -1
#define model with first inputs
inputs = Input(shape=input_shape)
#functional API passing layers through
x = CNNBlock(64,kernel_size=4,strides=(2,2))(inputs)
x = Dropout(0.2)(x)
x = CNNBlock(64,kernel_size=2,strides=(2,2))(x)
x = Dropout(0.2)(x)
x = CNNBlock(64)(x)
x = MaxPooling2D(pool_size=(2,2), strides=2)(x)
x = Dropout(0.2)(x)
x = Flatten()(x)
x = Dense(64, activation='relu',#dense layers to combine features
kernel_initializer='he_normal',
bias_initializer='zeros')(x)
x = Dropout(0.2)(x)
y = Dense(10, activation='softmax',#softmax for classification
kernel_initializer='glorot_uniform',
bias_initializer='zeros')(x)
#initialize model with inputs and outputs
model = Model(inputs, y, name='convnet_func')
#compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
#train model
history = model.fit(
train_images, train_labels,
validation_data=(test_images,test_labels),
epochs=EPOCHS, batch_size=BATCH_SIZE,
verbose=1, shuffle=True) #verbose 1 is cool gives time for each epoch
#evaluate model
import matplotlib.pyplot as plt
%matplotlib inline
def plot_error(history):
history_dict_vals = history.__dict__['history']
history_x = history.epoch
plt.plot(history_x,history_dict_vals['accuracy'],'r-', label='training accuracy')
plt.plot(history_x,history_dict_vals['val_accuracy'],'g-', label='test accuracy')
plt.axis([0,len(history_x),0.0,1])
plt.xlabel('training epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()
print(f"Final test accuracy = {history_dict_vals['val_accuracy'][-1]}")
plot_error(history)
Unfortunately the model does not train when I remove the comments around the batch normalization layers.
I'm running an LSTM network that works fine (TF 2.0). My problem starts when trying to modify the loss function.
I planed to adjust some data manipulation over 'y_true' and 'y_pred' but since TF force to maintain the data as tensors (and not convert it to Pandas or NumPy) it is challenging.
To get better control of the data inside the loss function I've simulated tf.keras.losses.mae function.
My goal was to be able to see the data ('y_true' and 'y_pred') so I can make my desire adjustments.
The original function:
def mean_absolute_error(y_true, y_pred):
y_pred = ops.convert_to_tensor(y_pred)
y_true = math_ops.cast(y_true, y_pred.dtype)
return K.mean(math_ops.abs(y_pred - y_true), axis=-1)
And after adjustments for debugging:
from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops
import tensorflow.keras.backend as K
def mean_absolute_error_test(y_true, y_pred):
global temp_true
temp_true=y_true
print(y_true)
y_pred = ops.convert_to_tensor(y_pred)
y_true = math_ops.cast(y_true, y_pred.dtype)
return K.mean(math_ops.abs(y_pred - y_true), axis=-1)
when I run model.compile and print y_true I get:
Tensor("dense_target:0", shape=(None, None), dtype=float32)
type=tensorflow.python.framework.ops.Tensor
Does anyone know how can I see 'y_pred' and 'y_true' or what am I missing?
Seems like I can't see samples of y_true or the data is empty.
The main code part:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dropout,Dense
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.python.keras.layers.recurrent import LSTM
from tensorflow.keras.callbacks import EarlyStopping
K.clear_session()
model = Sequential()
model.add(LSTM(20,activation='relu',input_shape=(look_back,len(training_columns)),recurrent_dropout=0.4))
model.add(Dropout(0.1))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='adam', loss=test2,experimental_run_tf_function=False)# mse,mean_squared_logarithmic_error
num_epochs = 20
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)
history=model.fit(X_train_lstm, y_train_lstm, epochs = num_epochs, batch_size = 128,shuffle=False,verbose=1,validation_data=[X_test_lstm,y_test_lstm],callbacks=[es])
I am loading my pre-trained keras model and then trying to parallelize a large number of input data using dask? Unfortunately, I'm running into some issues with this relating to how I'm creating my dask array. Any guidance would be greatly appreciated!
Setup:
First I cloned from this repo https://github.com/sanchit2843/dlworkshop.git
Reproducible Code Example:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import train_test_split
from keras.models import load_model
import keras
from keras.models import Sequential
from keras.layers import Dense
from dask.distributed import Client
import warnings
import dask.array as DaskArray
warnings.filterwarnings('ignore')
dataset = pd.read_csv('data/train.csv')
X = dataset.drop(['price_range'], axis=1).values
y = dataset[['price_range']].values
# scale data
sc = StandardScaler()
X = sc.fit_transform(X)
ohe = OneHotEncoder()
y = ohe.fit_transform(y).toarray()
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2)
# Neural network
model = Sequential()
model.add(Dense(16, input_dim=20, activation="relu"))
model.add(Dense(12, activation="relu"))
model.add(Dense(4, activation="softmax"))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=100, batch_size=64)
# Use dask
client = Client()
def load_and_predict(input_data_chunk):
def contrastive_loss(y_true, y_pred):
margin = 1
square_pred = K.square(y_pred)
margin_square = K.square(K.maximum(margin - y_pred, 0))
return K.mean(y_true * square_pred + (1 - y_true) * margin_square)
mlflow.set_tracking_uri('<uri>')
mlflow.set_experiment('clean_parties_ml')
runs = mlflow.search_runs()
artifact_uri = runs.loc[runs['start_time'].idxmax()]['artifact_uri']
model = mlflow.keras.load_model(artifact_uri + '/model', custom_objects={'contrastive_loss': contrastive_loss})
y_pred = model.predict(input_data_chunk)
return y_pred
da_input_data = da.from_array(X_test, chunks=(100, None))
prediction_results = da_input_data.map_blocks(load_and_predict, dtype=X_test.dtype).compute()
The Error I'm receiving:
AttributeError: '_thread._local' object has no attribute 'value'
Keras/Tensorflow don't play nicely with other threaded systems. There is an ongoing issue on this topic here: https://github.com/dask/dask-examples/issues/35
I've seen some few similar posts on this topic, but none seem to address my issue.
I have trained a Keras model (CPU only) and want to call the predict function asynchronously using a multithreading.Pool. However, the call to predict just hangs. There is no exception thrown or anything. Calling it from the main thread works fine. I tried using model._make_predict_function() as suggested before, but this doesn't resolve this for me.
I've set up a Jupyter notebook to reproduce this (Keras==2.2.4, tensorflow==1.11.0):
In [1]: from keras.models import Sequential
from keras.layers import Dense
from multiprocessing.pool import Pool
In [2]: # Create sample model from Keras documentation
model = Sequential()
model.add(Dense(32, activation='relu', input_dim=100))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
# Generate dummy data
import numpy as np
data = np.random.random((1000, 100))
labels = np.random.randint(2, size=(1000, 1))
# Train the model, iterating on the data in batches of 32 samples
model.fit(data, labels, epochs=10, batch_size=32, verbose=0)
In [3]: test_data = np.random.random((1,100))
def predict(model, data):
return model.predict(data)
def do_predict(_=1):
print('Prediction:', predict(model, test_data))
print('Done')
In [4]: do_predict()
Out [4]: Prediction: [[0.5553096]]
Done
In [5]: with Pool(1) as pool:
pool.apply_async(do_predict, [1]).get()
pool.close()
pool.join()
At the last step it just hangs. Can anybody help me finding out what's going on here? Is it not possible to use predict asynchronously?
I'm attempting to train multiple keras models with different parameter values using multiple threads (and the tensorflow backend). I've seen a few examples of using the same model within multiple threads, but in this particular case, I run into various errors regarding conflicting graphs, etc. Here's a simple example of what I'd like to be able to do:
from concurrent.futures import ThreadPoolExecutor
import numpy as np
import tensorflow as tf
from keras import backend as K
from keras.layers import Dense
from keras.models import Sequential
sess = tf.Session()
def example_model(size):
model = Sequential()
model.add(Dense(size, input_shape=(5,)))
model.add(Dense(1))
model.compile(optimizer='sgd', loss='mse')
return model
if __name__ == '__main__':
K.set_session(sess)
X = np.random.random((10, 5))
y = np.random.random((10, 1))
models = [example_model(i) for i in range(5, 10)]
e = ThreadPoolExecutor(4)
res_list = [e.submit(model.fit, X, y) for model in models]
for res in res_list:
print(res.result())
The resulting error is ValueError: Tensor("Variable:0", shape=(5, 5), dtype=float32_ref) must be from the same graph as Tensor("Variable_2/read:0", shape=(), dtype=float32).. I've also tried initializing the models within the threads which gives a similar failure.
Any thoughts on the best way to go about this? I'm not at all attached to this exact structure, but I'd prefer to be able to use multiple threads rather than processes so all the models are trained within the same GPU memory allocation.
Tensorflow Graphs are not threadsafe (see https://www.tensorflow.org/api_docs/python/tf/Graph) and when you create a new Tensorflow Session, it by default uses the default graph.
You can get around this by creating a new session with a new graph in your parallelized function and constructing your keras model there.
Here is some code that creates and fits a model on each available gpu in parallel:
import concurrent.futures
import numpy as np
import keras.backend as K
from keras.layers import Dense
from keras.models import Sequential
import tensorflow as tf
from tensorflow.python.client import device_lib
def get_available_gpus():
local_device_protos = device_lib.list_local_devices()
return [x.name for x in local_device_protos if x.device_type == 'GPU']
xdata = np.random.randn(100, 8)
ytrue = np.random.randint(0, 2, 100)
def fit(gpu):
with tf.Session(graph=tf.Graph()) as sess:
K.set_session(sess)
with tf.device(gpu):
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam')
model.fit(xdata, ytrue, verbose=0)
return model.evaluate(xdata, ytrue, verbose=0)
gpus = get_available_gpus()
with concurrent.futures.ThreadPoolExecutor(len(gpus)) as executor:
results = [x for x in executor.map(fit, gpus)]
print('results: ', results)