Implementing an l2 loss into a tensorflow Sequential regression model - python-3.x

I created a keras- tensorflow model, much influenced by
this guide
which looks like
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import time
import numpy as np
import sys
from keras import losses
model = keras.Sequential()
model.add(layers.Dense(nodes,activation = tf.keras.activations.relu, input_shape=[len(data_initial.keys())]))
model.add(layers.Dense(64,activation = tf.keras.activations.relu))
model.add(layers.Dropout(0.1, noise_shape=None))
model.add(layers.Dense(1))
model.compile(loss='mse', # <-------- Here we define the loss function
optimizer=tf.keras.optimizers.Adam(lr= 0.01,
beta_1 = 0.01,
beta_2 = 0.001,
epsilon= 0.03),
metrics=['mae', 'mse'])
model.fit(train_data,train_labels,epochs = 200)
It is a regression model and instead of the loss = 'mse' I would like to use
tf keras mse loss together with an L2 regularization term. The question is
How can I add a predefined regularizer function (I think, it is this one ) into the model.compile statement.
How can I write a completely custom loss function and add it to model.compile.

You can add regularization as either a layer parameter or as a layer.
Use it as a layer parameter looks like below
model.add(layers.Dense(8,
kernel_regularizer=regularizers.l2(0.01),
activity_regularizer=regularizers.l1(0.01)))
Sample code with first dense layer regularized and a custom loss function
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import time
import numpy as np
import sys
from keras import losses
from keras import regularizers
import keras.backend as K
model = keras.Sequential()
model.add(layers.Dense(8,activation = tf.keras.activations.relu, input_shape=(8,),
kernel_regularizer=regularizers.l2(0.01),
activity_regularizer=regularizers.l1(0.01)))
model.add(layers.Dense(4,activation = tf.keras.activations.relu))
model.add(layers.Dropout(0.1, noise_shape=None))
model.add(layers.Dense(1))
def custom_loss(y_true, y_pred):
return K.mean(y_true - y_pred)**2
model.compile(loss=custom_loss,
optimizer=tf.keras.optimizers.Adam(lr= 0.01,
beta_1 = 0.01,
beta_2 = 0.001,
epsilon= 0.03),
metrics=['mae', 'mse'])
model.fit(np.random.randn(10,8),np.random.randn(10,1),epochs = 1)

Related

Questions about Multitask deep neural network modeling using Keras

I'm trying to develop a multitask deep neural network (MTDNN) to make prediction on small molecule bioactivity against kinase targets and something is definitely wrong with my model structure but I can't figure out what.
For my training data (highly imbalanced data with 0 as inactive and 1 as active), I have 423 unique kinase targets (tasks) and over 400k unique compounds. I first calculate the ECFP fingerprint using smiles, and then I randomly split the input data into train, test, and valid sets based on 8:1:1 ratio using RandomStratifiedSplitter from deepchem package. After training my model using the train set and I want to make prediction on the test set to check model performance.
Here's what my data looks like (screenshot example):
(https://i.stack.imgur.com/8Hp36.png)
Here's my code:
# Import Packages
import numpy as np
import pandas as pd
import deepchem as dc
from sklearn.metrics import roc_auc_score, roc_curve, auc, confusion_matrix
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import initializers, regularizers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, Dropout, Reshape
from tensorflow.keras.optimizers import SGD
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
# Build Model
inputs = keras.Input(shape = (1024, ))
x = keras.layers.Dense(2000, activation='relu', name="dense2000",
kernel_initializer=initializers.RandomNormal(stddev=0.02),
bias_initializer=initializers.Ones(),
kernel_regularizer=regularizers.L2(l2=.0001))(inputs)
x = keras.layers.Dropout(rate=0.25)(x)
x = keras.layers.Dense(500, activation='relu', name='dense500')(x)
x = keras.layers.Dropout(rate=0.25)(x)
x = keras.layers.Dense(846, activation='relu', name='output1')(x)
logits = Reshape([423, 2])(x)
outputs = keras.layers.Softmax(axis=2)(logits)
Model1 = keras.Model(inputs=inputs, outputs=outputs, name='MTDNN')
Model1.summary()
opt = keras.optimizers.SGD(learning_rate=.0003, momentum=0.9)
def loss_function (output, labels):
loss = tf.nn.softmax_cross_entropy_with_logits(output,labels)
return loss
loss_fn = loss_function
Model1.compile(loss=loss_fn, optimizer=opt,
metrics=[keras.metrics.Accuracy(),
keras.metrics.AUC(),
keras.metrics.Precision(),
keras.metrics.Recall()])
for train, test, valid in split2:
trainX = pd.DataFrame(train.X)
trainy = pd.DataFrame(train.y)
trainy2 = tf.one_hot(trainy,2)
testX = pd.DataFrame(test.X)
testy = pd.DataFrame(test.y)
testy2 = tf.one_hot(testy,2)
validX = pd.DataFrame(valid.X)
validy = pd.DataFrame(valid.y)
validy2 = tf.one_hot(validy,2)
history = Model1.fit(x=trainX, y=trainy2,
shuffle=True,
epochs=10,
verbose=1,
batch_size=100,
validation_data=(validX, validy2))
y_pred = Model1.predict(testX)
y_pred2 = y_pred[:, :, 1]
y_pred3 = np.round(y_pred2)
# Check the # of nonzero in assay
(y_pred3!=0).sum () #all 0s
My questions are:
The roc and precision recall are all extremely high (>0.99), but the prediction result of test set contains all 0s, no actives at all. I also use the randomized dataset with same active:inactive ratio for each task to test if those values are too good to be true, and turns out all values are still above 0.99, including roc which is expected to be 0.5.
Can anyone help me to identify what is wrong with my model and how should I fix it please?
Can I use built-in functions in sklearn to calculate roc/accuracy/precision-recall? Or should I manually calculate the metrics based on confusion matrix on my own for multitasking purpose. Why and why not?

TensorFlow custom loss ValueError: No gradients provided for any variable:

I am implementing a custom loss function as in the code below for a simple classification. However, when I run the code I get the error ValueError: No gradients provided for any variable:
import os
os.environ['KERAS_BACKEND'] = "tensorflow"
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import statistics as st
import tensorflow as tf
from keras.utils import np_utils
# if the probability is greater than 0.75 then set the value to 1 for buy or sell else set it to None
# convert the y_pred to 0 and 1 using argmax function
# add the two matrices y_pred and y_true
# if value is 2 then set that to 0
# multiply by misclassification matrix
# add the losses to give a unique number
def custom_loss(y_true, y_pred):
y_pred = y_pred.numpy()
y_pred_dummy = np.zeros_like(y_pred)
y_pred_dummy[np.arange(len(y_pred)), y_pred.argmax(1)] = 1
y_pred = y_pred_dummy
y_true = y_true.numpy()
y_final = y_pred + y_true
y_final[y_final == 2] = 0
w_array = [[1,1,5],[1,1,1],[5,1,1]]
return tf.convert_to_tensor(np.sum(np.dot(y_final, w_array)))
model = keras.Sequential()
model.add(layers.Dense(32, input_dim=4, activation='relu'))
model.add(layers.Dense(16, input_dim=4, activation='relu'))
model.add(layers.Dense(8, input_dim=4, activation='relu'))
model.add(layers.Dense(3, activation='softmax'))
model.compile(loss=custom_loss, optimizer='adam', run_eagerly=True)
I do not understand what I am doing incorrectly over here. I read through the issues on tensorflow and one of the reasons is that the link between the loss function and input variables is broken. But I am using y_true in the loss function
Thanks
You can not use numpy within custom loss function. this function is a part of graph and should deal with tensors, not arrays. Numpy doesn't support backpropagation of gradients.

Tensorflow custom loss function - can't get samples of y_pred and y_true in loss function

I'm running an LSTM network that works fine (TF 2.0). My problem starts when trying to modify the loss function.
I planed to adjust some data manipulation over 'y_true' and 'y_pred' but since TF force to maintain the data as tensors (and not convert it to Pandas or NumPy) it is challenging.
To get better control of the data inside the loss function I've simulated tf.keras.losses.mae function.
My goal was to be able to see the data ('y_true' and 'y_pred') so I can make my desire adjustments.
The original function:
def mean_absolute_error(y_true, y_pred):
y_pred = ops.convert_to_tensor(y_pred)
y_true = math_ops.cast(y_true, y_pred.dtype)
return K.mean(math_ops.abs(y_pred - y_true), axis=-1)
And after adjustments for debugging:
from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops
import tensorflow.keras.backend as K
def mean_absolute_error_test(y_true, y_pred):
global temp_true
temp_true=y_true
print(y_true)
y_pred = ops.convert_to_tensor(y_pred)
y_true = math_ops.cast(y_true, y_pred.dtype)
return K.mean(math_ops.abs(y_pred - y_true), axis=-1)
when I run model.compile and print y_true I get:
Tensor("dense_target:0", shape=(None, None), dtype=float32)
type=tensorflow.python.framework.ops.Tensor
Does anyone know how can I see 'y_pred' and 'y_true' or what am I missing?
Seems like I can't see samples of y_true or the data is empty.
The main code part:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dropout,Dense
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.python.keras.layers.recurrent import LSTM
from tensorflow.keras.callbacks import EarlyStopping
K.clear_session()
model = Sequential()
model.add(LSTM(20,activation='relu',input_shape=(look_back,len(training_columns)),recurrent_dropout=0.4))
model.add(Dropout(0.1))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='adam', loss=test2,experimental_run_tf_function=False)# mse,mean_squared_logarithmic_error
num_epochs = 20
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)
history=model.fit(X_train_lstm, y_train_lstm, epochs = num_epochs, batch_size = 128,shuffle=False,verbose=1,validation_data=[X_test_lstm,y_test_lstm],callbacks=[es])

Keras LSTM to Pytorch

I am using the following code to apply sequential LSTM to time-series data with one value. It works fine with a Keras version. I am wondering how could I do the same using PyTorch?
import tensorflow
from tensorflow.keras import optimizers
from tensorflow.keras import losses
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, Dropout, Embedding, LSTM
from tensorflow.keras.optimizers import RMSprop, Adam, Nadam
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.callbacks import TensorBoard
# training_dataset.shape = (303, 24, 1)
time_steps = 24
metric = 'mean_absolute_error'
model = Sequential()
model.add(LSTM(units=32, activation='tanh', input_shape=(time_steps, 1), return_sequences=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='mean_absolute_error', metrics=[metric])
print(model.summary())
batch_size=32
epochs=20
model.fit(x=training_dataset, y=training_dataset,
batch_size=batch_size, epochs=epochs,
verbose=1, validation_data=(training_dataset, training_dataset),
callbacks=[TensorBoard(log_dir='../logs/{0}'.format(tensorlog))])
testing_pred = model.predict(x=testing_dataset)
You can check the pytorch documentation for that: https://pytorch.org/docs/master/generated/torch.nn.LSTM.html
the simplest code is the following:
import torch, torch.nn as nn, torch.optim.Adam as Adam
model = nn.Sequential(nn.LSTM(input_size=1, hidden_size=32, output_size=1), nn.Sigmoid)
opt = Adam(model.parameters())
loss_func = nn.MSELoss()
for (x, y) in dataloader:
opt.zero_grad()
pred = model(x)
loss = loss_func(y, pred)
loss.backward()
opt.step()

How To Do Model Predict Using Distributed Dask With a Pre-Trained Keras Model?

I am loading my pre-trained keras model and then trying to parallelize a large number of input data using dask? Unfortunately, I'm running into some issues with this relating to how I'm creating my dask array. Any guidance would be greatly appreciated!
Setup:
First I cloned from this repo https://github.com/sanchit2843/dlworkshop.git
Reproducible Code Example:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import train_test_split
from keras.models import load_model
import keras
from keras.models import Sequential
from keras.layers import Dense
from dask.distributed import Client
import warnings
import dask.array as DaskArray
warnings.filterwarnings('ignore')
dataset = pd.read_csv('data/train.csv')
X = dataset.drop(['price_range'], axis=1).values
y = dataset[['price_range']].values
# scale data
sc = StandardScaler()
X = sc.fit_transform(X)
ohe = OneHotEncoder()
y = ohe.fit_transform(y).toarray()
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2)
# Neural network
model = Sequential()
model.add(Dense(16, input_dim=20, activation="relu"))
model.add(Dense(12, activation="relu"))
model.add(Dense(4, activation="softmax"))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=100, batch_size=64)
# Use dask
client = Client()
def load_and_predict(input_data_chunk):
def contrastive_loss(y_true, y_pred):
margin = 1
square_pred = K.square(y_pred)
margin_square = K.square(K.maximum(margin - y_pred, 0))
return K.mean(y_true * square_pred + (1 - y_true) * margin_square)
mlflow.set_tracking_uri('<uri>')
mlflow.set_experiment('clean_parties_ml')
runs = mlflow.search_runs()
artifact_uri = runs.loc[runs['start_time'].idxmax()]['artifact_uri']
model = mlflow.keras.load_model(artifact_uri + '/model', custom_objects={'contrastive_loss': contrastive_loss})
y_pred = model.predict(input_data_chunk)
return y_pred
da_input_data = da.from_array(X_test, chunks=(100, None))
prediction_results = da_input_data.map_blocks(load_and_predict, dtype=X_test.dtype).compute()
The Error I'm receiving:
AttributeError: '_thread._local' object has no attribute 'value'
Keras/Tensorflow don't play nicely with other threaded systems. There is an ongoing issue on this topic here: https://github.com/dask/dask-examples/issues/35

Resources